selvaonline commited on
Commit
02ae7c1
·
verified ·
1 Parent(s): c5b17bb

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +282 -15
app.py CHANGED
@@ -19,11 +19,160 @@ def extract_text_from_html(html):
19
  text = text.replace('&nbsp;', ' ').replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>')
20
  return text.strip()
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Function to fetch deals from DealsFinders.com (from shopping_assistant.py)
23
- def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100):
24
  """
25
- Fetch deals data exclusively from the DealsFinders API
26
  """
 
 
 
 
 
27
  all_deals = []
28
 
29
  # Fetch from the DealsFinders API
@@ -46,10 +195,17 @@ def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num
46
  break
47
  else:
48
  print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
49
- break
 
50
  except Exception as e:
51
  print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
52
- break
 
 
 
 
 
 
53
 
54
  return all_deals
55
 
@@ -144,9 +300,99 @@ except Exception as e:
144
  # Not using recommended models
145
  using_recommended_models = False
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  # Global variable to store deals data
148
  deals_cache = None
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def classify_text(text, fetch_deals=True):
151
  """
152
  Classify the text using the model and fetch relevant deals
@@ -202,7 +448,8 @@ def classify_text(text, fetch_deals=True):
202
  try:
203
  # Fetch deals data if not already cached
204
  if deals_cache is None:
205
- deals_data = fetch_deals_data(num_pages=2) # Limit to 2 pages for faster response
 
206
  deals_cache = process_deals_data(deals_data)
207
 
208
  # Using MPNet for semantic search if available
@@ -227,19 +474,30 @@ def classify_text(text, fetch_deals=True):
227
  # Extract the relevant deals
228
  relevant_deals = [deals_cache[idx] for idx in top_indices]
229
  else:
230
- # Fallback to keyword-based search
231
  query_terms = text.lower().split()
232
  expanded_terms = list(query_terms)
233
 
234
- # Add related terms based on the query
235
- if any(term in text.lower() for term in ['headphone', 'headphones']):
236
- expanded_terms.extend(['earbuds', 'earphones', 'earpods', 'airpods', 'audio', 'bluetooth', 'wireless'])
237
- elif any(term in text.lower() for term in ['laptop', 'computer']):
238
- expanded_terms.extend(['notebook', 'macbook', 'chromebook', 'pc'])
239
- elif any(term in text.lower() for term in ['tv', 'television']):
240
- expanded_terms.extend(['smart tv', 'roku', 'streaming'])
241
- elif any(term in text.lower() for term in ['kitchen', 'appliance']):
242
- expanded_terms.extend(['mixer', 'blender', 'toaster', 'microwave', 'oven'])
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  # Score deals based on relevance to the query
245
  scored_deals = []
@@ -269,6 +527,15 @@ def classify_text(text, fetch_deals=True):
269
  if term in excerpt:
270
  score += 1
271
 
 
 
 
 
 
 
 
 
 
272
  # Add to scored deals if it has any relevance
273
  if score > 0:
274
  scored_deals.append((deal, score))
 
19
  text = text.replace('&nbsp;', ' ').replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>')
20
  return text.strip()
21
 
22
+ # Sample deals data to use as fallback
23
+ SAMPLE_DEALS = [
24
+ {
25
+ "id": 1,
26
+ "title": {
27
+ "rendered": "Apple AirPods Pro (2nd Generation) - 20% Off"
28
+ },
29
+ "link": "https://www.example.com/deals/airpods-pro",
30
+ "date": "2025-02-25T10:00:00",
31
+ "content": {
32
+ "rendered": "<p>Get the latest Apple AirPods Pro (2nd Generation) for 20% off the regular price. These wireless earbuds feature active noise cancellation, transparency mode, and spatial audio with dynamic head tracking.</p><p>Regular price: $249.99</p><p>Deal price: $199.99</p><p>You save: $50.00</p>"
33
+ },
34
+ "excerpt": {
35
+ "rendered": "<p>Apple AirPods Pro (2nd Generation) with active noise cancellation and transparency mode. Now 20% off - only $199.99!</p>"
36
+ }
37
+ },
38
+ {
39
+ "id": 2,
40
+ "title": {
41
+ "rendered": "Samsung 65\" QLED 4K Smart TV - $300 Off"
42
+ },
43
+ "link": "https://www.example.com/deals/samsung-qled-tv",
44
+ "date": "2025-02-26T09:30:00",
45
+ "content": {
46
+ "rendered": "<p>Upgrade your home entertainment with this Samsung 65\" QLED 4K Smart TV. Features Quantum HDR, Motion Xcelerator Turbo+, and Object Tracking Sound for an immersive viewing experience.</p><p>Regular price: $1,299.99</p><p>Deal price: $999.99</p><p>You save: $300.00</p>"
47
+ },
48
+ "excerpt": {
49
+ "rendered": "<p>Samsung 65\" QLED 4K Smart TV with Quantum HDR and Object Tracking Sound. Save $300 - now only $999.99!</p>"
50
+ }
51
+ },
52
+ {
53
+ "id": 3,
54
+ "title": {
55
+ "rendered": "Sony WH-1000XM5 Wireless Headphones - 25% Off"
56
+ },
57
+ "link": "https://www.example.com/deals/sony-wh1000xm5",
58
+ "date": "2025-02-26T14:15:00",
59
+ "content": {
60
+ "rendered": "<p>Experience industry-leading noise cancellation with the Sony WH-1000XM5 wireless headphones. Features 30-hour battery life, quick charging, and exceptional sound quality with the new Integrated Processor V1.</p><p>Regular price: $399.99</p><p>Deal price: $299.99</p><p>You save: $100.00</p>"
61
+ },
62
+ "excerpt": {
63
+ "rendered": "<p>Sony WH-1000XM5 wireless headphones with industry-leading noise cancellation and 30-hour battery life. Now 25% off at $299.99!</p>"
64
+ }
65
+ },
66
+ {
67
+ "id": 4,
68
+ "title": {
69
+ "rendered": "Bose QuietComfort Ultra Headphones - 20% Off"
70
+ },
71
+ "link": "https://www.example.com/deals/bose-quietcomfort-ultra",
72
+ "date": "2025-02-25T15:30:00",
73
+ "content": {
74
+ "rendered": "<p>Experience the ultimate in noise cancellation with Bose QuietComfort Ultra headphones. Features spatial audio, custom EQ, and up to 24 hours of battery life.</p><p>Regular price: $429.99</p><p>Deal price: $343.99</p><p>You save: $86.00</p>"
75
+ },
76
+ "excerpt": {
77
+ "rendered": "<p>Bose QuietComfort Ultra headphones with advanced noise cancellation and spatial audio. Now 20% off at $343.99!</p>"
78
+ }
79
+ },
80
+ {
81
+ "id": 5,
82
+ "title": {
83
+ "rendered": "Beats Studio Pro Wireless Headphones - 40% Off"
84
+ },
85
+ "link": "https://www.example.com/deals/beats-studio-pro",
86
+ "date": "2025-02-26T16:30:00",
87
+ "content": {
88
+ "rendered": "<p>The Beats Studio Pro wireless headphones deliver premium sound with active noise cancellation, transparency mode, and up to 40 hours of battery life.</p><p>Regular price: $349.99</p><p>Deal price: $209.99</p><p>You save: $140.00</p>"
89
+ },
90
+ "excerpt": {
91
+ "rendered": "<p>Beats Studio Pro wireless headphones with active noise cancellation and 40-hour battery life. Now 40% off at $209.99!</p>"
92
+ }
93
+ },
94
+ {
95
+ "id": 6,
96
+ "title": {
97
+ "rendered": "Dyson V12 Detect Slim Cordless Vacuum - $150 Off"
98
+ },
99
+ "link": "https://www.example.com/deals/dyson-v12",
100
+ "date": "2025-02-27T08:45:00",
101
+ "content": {
102
+ "rendered": "<p>The Dyson V12 Detect Slim cordless vacuum features a laser that reveals microscopic dust, an LCD screen that displays particle counts, and powerful suction for deep cleaning.</p><p>Regular price: $649.99</p><p>Deal price: $499.99</p><p>You save: $150.00</p>"
103
+ },
104
+ "excerpt": {
105
+ "rendered": "<p>Dyson V12 Detect Slim cordless vacuum with laser dust detection and powerful suction. Save $150 - now only $499.99!</p>"
106
+ }
107
+ },
108
+ {
109
+ "id": 7,
110
+ "title": {
111
+ "rendered": "Nintendo Switch OLED Model - Bundle Deal"
112
+ },
113
+ "link": "https://www.example.com/deals/nintendo-switch-oled",
114
+ "date": "2025-02-27T11:20:00",
115
+ "content": {
116
+ "rendered": "<p>Get the Nintendo Switch OLED Model with a vibrant 7-inch OLED screen, plus two games and a carrying case. The perfect gaming package for home or on-the-go play.</p><p>Regular price: $439.99</p><p>Deal price: $379.99</p><p>You save: $60.00</p>"
117
+ },
118
+ "excerpt": {
119
+ "rendered": "<p>Nintendo Switch OLED Model bundle with two games and carrying case. Special bundle price of $379.99!</p>"
120
+ }
121
+ },
122
+ {
123
+ "id": 8,
124
+ "title": {
125
+ "rendered": "MacBook Air M3 - $200 Off"
126
+ },
127
+ "link": "https://www.example.com/deals/macbook-air-m3",
128
+ "date": "2025-02-26T10:45:00",
129
+ "content": {
130
+ "rendered": "<p>The latest MacBook Air with M3 chip offers incredible performance and battery life in an ultra-thin design. Features a 13.6-inch Liquid Retina display, 8GB RAM, and 256GB SSD storage.</p><p>Regular price: $1,099.99</p><p>Deal price: $899.99</p><p>You save: $200.00</p>"
131
+ },
132
+ "excerpt": {
133
+ "rendered": "<p>MacBook Air with M3 chip, 13.6-inch Liquid Retina display, and all-day battery life. Save $200 - now only $899.99!</p>"
134
+ }
135
+ },
136
+ {
137
+ "id": 9,
138
+ "title": {
139
+ "rendered": "Kindle Paperwhite Signature Edition - 30% Off"
140
+ },
141
+ "link": "https://www.example.com/deals/kindle-paperwhite",
142
+ "date": "2025-02-27T09:15:00",
143
+ "content": {
144
+ "rendered": "<p>The Kindle Paperwhite Signature Edition features a 6.8-inch display, wireless charging, auto-adjusting front light, and 32GB storage. Perfect for reading anywhere, anytime.</p><p>Regular price: $189.99</p><p>Deal price: $132.99</p><p>You save: $57.00</p>"
145
+ },
146
+ "excerpt": {
147
+ "rendered": "<p>Kindle Paperwhite Signature Edition with 6.8-inch display, wireless charging, and 32GB storage. Now 30% off at $132.99!</p>"
148
+ }
149
+ },
150
+ {
151
+ "id": 10,
152
+ "title": {
153
+ "rendered": "LG C3 65\" OLED 4K Smart TV - $500 Off"
154
+ },
155
+ "link": "https://www.example.com/deals/lg-c3-oled",
156
+ "date": "2025-02-25T13:00:00",
157
+ "content": {
158
+ "rendered": "<p>Experience stunning picture quality with the LG C3 65\" OLED 4K Smart TV. Features self-lit OLED pixels, Dolby Vision, Dolby Atmos, and NVIDIA G-SYNC for gaming.</p><p>Regular price: $1,799.99</p><p>Deal price: $1,299.99</p><p>You save: $500.00</p>"
159
+ },
160
+ "excerpt": {
161
+ "rendered": "<p>LG C3 65\" OLED 4K Smart TV with self-lit pixels and Dolby Vision. Save $500 - now only $1,299.99!</p>"
162
+ }
163
+ }
164
+ ]
165
+
166
  # Function to fetch deals from DealsFinders.com (from shopping_assistant.py)
167
+ def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100, use_sample_data=False):
168
  """
169
+ Fetch deals data exclusively from the DealsFinders API or use sample data
170
  """
171
+ # If use_sample_data is True, return the sample deals
172
+ if use_sample_data:
173
+ print("Using sample deals data")
174
+ return SAMPLE_DEALS
175
+
176
  all_deals = []
177
 
178
  # Fetch from the DealsFinders API
 
195
  break
196
  else:
197
  print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
198
+ print("Falling back to sample deals data")
199
+ return SAMPLE_DEALS
200
  except Exception as e:
201
  print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
202
+ print("Falling back to sample deals data")
203
+ return SAMPLE_DEALS
204
+
205
+ # If no deals were fetched, use sample data
206
+ if not all_deals:
207
+ print("No deals fetched from API. Using sample deals data")
208
+ return SAMPLE_DEALS
209
 
210
  return all_deals
211
 
 
300
  # Not using recommended models
301
  using_recommended_models = False
302
 
303
+ # File path for storing deals data locally
304
+ DEALS_DATA_PATH = "deals_data.json"
305
+
306
+ # Function to fetch and save a large number of deals
307
+ def fetch_and_save_deals(max_deals=10000, per_page=100):
308
+ """
309
+ Fetch a large number of deals and save them to a local file
310
+ """
311
+ print(f"Fetching up to {max_deals} deals...")
312
+
313
+ all_deals = []
314
+ num_pages = min(max_deals // per_page + (1 if max_deals % per_page > 0 else 0), 100) # Limit to 100 pages max
315
+
316
+ # Fetch from the DealsFinders API
317
+ for page in range(1, num_pages + 1):
318
+ try:
319
+ # Add a user agent to avoid being blocked
320
+ headers = {
321
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
322
+ }
323
+ response = requests.get(f"https://www.dealsfinders.com/wp-json/wp/v2/posts?page={page}&per_page={per_page}", headers=headers)
324
+
325
+ if response.status_code == 200:
326
+ deals = response.json()
327
+ all_deals.extend(deals)
328
+ print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API")
329
+
330
+ # If we get fewer deals than requested, we've reached the end
331
+ if len(deals) < per_page:
332
+ print(f"Reached the end of available deals at page {page}")
333
+ break
334
+
335
+ # If we've reached the maximum number of deals, stop
336
+ if len(all_deals) >= max_deals:
337
+ all_deals = all_deals[:max_deals] # Trim to max_deals
338
+ print(f"Reached the maximum number of deals ({max_deals})")
339
+ break
340
+ else:
341
+ print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
342
+ break
343
+ except Exception as e:
344
+ print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
345
+ break
346
+
347
+ # Process the deals
348
+ processed_deals = process_deals_data(all_deals)
349
+
350
+ # Save the deals to a local file
351
+ try:
352
+ with open(DEALS_DATA_PATH, "w") as f:
353
+ json.dump(processed_deals, f)
354
+ print(f"Saved {len(processed_deals)} deals to {DEALS_DATA_PATH}")
355
+ return processed_deals
356
+ except Exception as e:
357
+ print(f"Error saving deals to file: {str(e)}")
358
+ return processed_deals
359
+
360
+ # Function to load deals from the local file
361
+ def load_deals_from_file():
362
+ """
363
+ Load deals from the local file
364
+ """
365
+ try:
366
+ if os.path.exists(DEALS_DATA_PATH):
367
+ with open(DEALS_DATA_PATH, "r") as f:
368
+ deals = json.load(f)
369
+ print(f"Loaded {len(deals)} deals from {DEALS_DATA_PATH}")
370
+ return deals
371
+ else:
372
+ print(f"Deals file {DEALS_DATA_PATH} does not exist")
373
+ return None
374
+ except Exception as e:
375
+ print(f"Error loading deals from file: {str(e)}")
376
+ return None
377
+
378
  # Global variable to store deals data
379
  deals_cache = None
380
 
381
+ # Try to fetch and save deals on startup
382
+ try:
383
+ # First try to load from file
384
+ deals_cache = load_deals_from_file()
385
+
386
+ # If file doesn't exist or is empty, fetch and save
387
+ if deals_cache is None or len(deals_cache) == 0:
388
+ print("No deals found in local file. Fetching deals...")
389
+ deals_cache = fetch_and_save_deals()
390
+
391
+ print(f"Initialized with {len(deals_cache) if deals_cache else 0} deals")
392
+ except Exception as e:
393
+ print(f"Error initializing deals cache: {str(e)}")
394
+ deals_cache = None
395
+
396
  def classify_text(text, fetch_deals=True):
397
  """
398
  Classify the text using the model and fetch relevant deals
 
448
  try:
449
  # Fetch deals data if not already cached
450
  if deals_cache is None:
451
+ # Use sample data by default in Hugging Face space environment
452
+ deals_data = fetch_deals_data(num_pages=2, use_sample_data=True) # Use sample data for reliability
453
  deals_cache = process_deals_data(deals_data)
454
 
455
  # Using MPNet for semantic search if available
 
474
  # Extract the relevant deals
475
  relevant_deals = [deals_cache[idx] for idx in top_indices]
476
  else:
477
+ # Improved keyword-based search with category awareness
478
  query_terms = text.lower().split()
479
  expanded_terms = list(query_terms)
480
 
481
+ # Get the top category from the classification results
482
+ top_category = top_categories[0][0] if top_categories else None
483
+
484
+ # Add category-specific terms
485
+ if top_category == "electronics":
486
+ expanded_terms.extend(['electronic', 'device', 'gadget', 'tech', 'technology'])
487
+ if any(term in text.lower() for term in ['headphone', 'headphones']):
488
+ expanded_terms.extend(['earbuds', 'earphones', 'earpods', 'airpods', 'audio', 'bluetooth', 'wireless'])
489
+ elif any(term in text.lower() for term in ['laptop', 'computer']):
490
+ expanded_terms.extend(['notebook', 'macbook', 'chromebook', 'pc'])
491
+ elif any(term in text.lower() for term in ['tv', 'television']):
492
+ expanded_terms.extend(['smart tv', 'roku', 'streaming'])
493
+ elif top_category == "kitchen":
494
+ expanded_terms.extend(['appliance', 'cookware', 'utensil', 'blender', 'mixer', 'toaster', 'microwave', 'oven'])
495
+ elif top_category == "home":
496
+ expanded_terms.extend(['furniture', 'decor', 'decoration', 'bedding', 'household'])
497
+ elif top_category == "clothing":
498
+ expanded_terms.extend(['clothes', 'shirt', 'pants', 'dress', 'fashion', 'wear', 'apparel'])
499
+ elif top_category == "toys":
500
+ expanded_terms.extend(['game', 'play', 'children', 'kid', 'kids', 'fun'])
501
 
502
  # Score deals based on relevance to the query
503
  scored_deals = []
 
527
  if term in excerpt:
528
  score += 1
529
 
530
+ # Boost score for deals matching the top category
531
+ if top_category:
532
+ if top_category.lower() in title.lower():
533
+ score += 15
534
+ if top_category.lower() in content.lower():
535
+ score += 5
536
+ if top_category.lower() in excerpt.lower():
537
+ score += 5
538
+
539
  # Add to scored deals if it has any relevance
540
  if score > 0:
541
  scored_deals.append((deal, score))