siddhartharya commited on
Commit
96b351a
·
verified ·
1 Parent(s): 81670c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -127
app.py CHANGED
@@ -43,6 +43,10 @@ fetch_cache = {}
43
 
44
  # Lock for thread-safe operations
45
  lock = threading.Lock()
 
 
 
 
46
 
47
  # Define the categories
48
  CATEGORIES = [
@@ -70,35 +74,41 @@ CATEGORIES = [
70
  "Uncategorized",
71
  ]
72
 
73
- # Remove OpenAI dependencies
74
  # Define a function to generate responses using llama-3.1-70b-versatile
75
  def generate_llama_response(prompt):
76
  """
77
  Generate a response using the llama-3.1-70b-versatile model.
78
 
79
- Replace the implementation below with the actual method to interact with your LLM.
80
- This could be an API call to a local server, a cloud service, or any interface you have set up.
81
-
82
- Example (pseudo-code):
83
- response = requests.post("http://localhost:5000/generate", json={"prompt": prompt})
84
- return response.json().get("response", "")
85
-
86
- For demonstration purposes, we'll return a placeholder response.
87
  """
88
  try:
89
- # Example implementation; replace with actual API call or method
90
- # For instance, if using a local API:
91
- # response = requests.post("http://localhost:5000/generate", json={"prompt": prompt})
92
- # return response.json().get("response", "")
93
-
94
- # Placeholder response
95
  logger.info("Generating response using llama-3.1-70b-versatile")
96
- # Simulate processing time
97
- time.sleep(1)
98
- return "This is a placeholder response from llama-3.1-70b-versatile."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  except Exception as e:
100
- logger.error(f"Error generating response from llama: {e}", exc_info=True)
101
- return "Error generating response."
102
 
103
  def extract_main_content(soup):
104
  """
@@ -175,55 +185,51 @@ def generate_summary_and_assign_category(bookmark):
175
  """
176
  logger.info(f"Generating summary and assigning category for bookmark: {bookmark.get('url')}")
177
 
178
- max_retries = 3
179
- retry_count = 0
180
-
181
- while retry_count < max_retries:
182
- try:
183
- # Rate Limiting Logic (if necessary)
184
- with lock:
185
- global last_api_call_time
186
- current_time = time.time()
187
- elapsed = current_time - last_api_call_time
188
- if elapsed < 2:
189
- sleep_duration = 2 - elapsed
190
- logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
191
- time.sleep(sleep_duration)
192
- last_api_call_time = time.time()
193
-
194
- # Prepare the prompt
195
- html_content = bookmark.get('html_content', '')
196
- soup = BeautifulSoup(html_content, 'html.parser')
197
- metadata = get_page_metadata(soup)
198
- main_content = extract_main_content(soup)
199
-
200
- # Prepare content for the prompt
201
- content_parts = []
202
- if metadata['title']:
203
- content_parts.append(f"Title: {metadata['title']}")
204
- if metadata['description']:
205
- content_parts.append(f"Description: {metadata['description']}")
206
- if metadata['keywords']:
207
- content_parts.append(f"Keywords: {metadata['keywords']}")
208
- if main_content:
209
- content_parts.append(f"Main Content: {main_content}")
210
-
211
- content_text = '\n'.join(content_parts)
212
-
213
- # Detect insufficient or erroneous content
214
- error_keywords = ['Access Denied', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic']
215
- if not content_text or len(content_text.split()) < 50:
216
- use_prior_knowledge = True
217
- logger.info(f"Content for {bookmark.get('url')} is insufficient. Instructing LLM to use prior knowledge.")
218
- elif any(keyword.lower() in content_text.lower() for keyword in error_keywords):
219
- use_prior_knowledge = True
220
- logger.info(f"Content for {bookmark.get('url')} contains error messages. Instructing LLM to use prior knowledge.")
221
- else:
222
- use_prior_knowledge = False
223
 
224
- # Craft the prompt based on content availability
225
- if use_prior_knowledge:
226
- prompt = f"""
227
  You are a knowledgeable assistant with up-to-date information as of 2023.
228
  URL: {bookmark.get('url')}
229
  Provide:
@@ -235,8 +241,8 @@ Format:
235
  Summary: [Your summary]
236
  Category: [One category]
237
  """
238
- else:
239
- prompt = f"""
240
  You are an assistant that creates concise webpage summaries and assigns categories.
241
  Content:
242
  {content_text}
@@ -250,46 +256,44 @@ Summary: [Your summary]
250
  Category: [One category]
251
  """
252
 
253
- # Generate response using llama-3.1-70b-versatile
254
- response = generate_llama_response(prompt)
255
 
256
- if not response:
257
- raise ValueError("Empty response received from the model.")
258
 
259
- # Parse the response
260
- summary_match = re.search(r"Summary:\s*(.*)", response)
261
- category_match = re.search(r"Category:\s*(.*)", response)
262
 
263
- if summary_match:
264
- bookmark['summary'] = summary_match.group(1).strip()
265
- else:
266
- bookmark['summary'] = 'No summary available.'
267
 
268
- if category_match:
269
- category = category_match.group(1).strip().strip('"')
270
- if category in CATEGORIES:
271
- bookmark['category'] = category
272
- else:
273
- bookmark['category'] = 'Uncategorized'
274
  else:
275
  bookmark['category'] = 'Uncategorized'
 
 
276
 
277
- # Optional: Simple keyword-based validation
278
- summary_lower = bookmark['summary'].lower()
279
- url_lower = bookmark['url'].lower()
280
- if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
281
- bookmark['category'] = 'Social Media'
282
- elif 'wikipedia' in url_lower:
283
- bookmark['category'] = 'Reference and Knowledge Bases'
284
 
285
- logger.info("Successfully generated summary and assigned category")
286
- break # Exit the retry loop upon success
287
 
288
- except Exception as e:
289
- logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
290
- bookmark['summary'] = 'No summary available.'
291
- bookmark['category'] = 'Uncategorized'
292
- break # Exit the retry loop on exceptions
293
 
294
  def parse_bookmarks(file_content):
295
  """
@@ -444,7 +448,7 @@ def display_bookmarks():
444
  logger.info("HTML display generated")
445
  return cards
446
 
447
- def process_uploaded_file(file, state_bookmarks):
448
  """
449
  Process the uploaded bookmarks file.
450
  """
@@ -453,23 +457,23 @@ def process_uploaded_file(file, state_bookmarks):
453
 
454
  if file is None:
455
  logger.warning("No file uploaded")
456
- return "Please upload a bookmarks HTML file.", '', state_bookmarks, gr.CheckboxGroup.update(choices=[])
457
 
458
  try:
459
  file_content = file.decode('utf-8')
460
  except UnicodeDecodeError as e:
461
  logger.error(f"Error decoding the file: {e}", exc_info=True)
462
- return "Error decoding the file. Please ensure it's a valid HTML file.", '', state_bookmarks, gr.CheckboxGroup.update(choices=[])
463
 
464
  try:
465
  bookmarks = parse_bookmarks(file_content)
466
  except Exception as e:
467
  logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
468
- return "Error parsing the bookmarks HTML file.", '', state_bookmarks, gr.CheckboxGroup.update(choices=[])
469
 
470
  if not bookmarks:
471
  logger.warning("No bookmarks found in the uploaded file")
472
- return "No bookmarks found in the uploaded file.", '', state_bookmarks, gr.CheckboxGroup.update(choices=[])
473
 
474
  # Assign unique IDs to bookmarks
475
  for idx, bookmark in enumerate(bookmarks):
@@ -489,7 +493,7 @@ def process_uploaded_file(file, state_bookmarks):
489
  faiss_index = vectorize_and_index(bookmarks)
490
  except Exception as e:
491
  logger.error(f"Error building FAISS index: {e}", exc_info=True)
492
- return "Error building search index.", '', state_bookmarks, gr.CheckboxGroup.update(choices=[])
493
 
494
  message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
495
  logger.info(message)
@@ -502,15 +506,15 @@ def process_uploaded_file(file, state_bookmarks):
502
  # Update state
503
  state_bookmarks = bookmarks.copy()
504
 
505
- return message, bookmark_html, state_bookmarks, gr.CheckboxGroup.update(choices=choices)
506
 
507
- def delete_selected_bookmarks(selected_indices, state_bookmarks):
508
  """
509
  Delete selected bookmarks and remove their vectors from the FAISS index.
510
  """
511
  global bookmarks, faiss_index
512
  if not selected_indices:
513
- return "⚠️ No bookmarks selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks()
514
 
515
  ids_to_delete = []
516
  indices_to_delete = []
@@ -541,16 +545,16 @@ def delete_selected_bookmarks(selected_indices, state_bookmarks):
541
  # Update state
542
  state_bookmarks = bookmarks.copy()
543
 
544
- return message, gr.CheckboxGroup.update(choices=choices), display_bookmarks()
545
 
546
- def edit_selected_bookmarks_category(selected_indices, new_category, state_bookmarks):
547
  """
548
  Edit category of selected bookmarks.
549
  """
550
  if not selected_indices:
551
- return "⚠️ No bookmarks selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks(), state_bookmarks
552
  if not new_category:
553
- return "⚠️ No new category selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks(), state_bookmarks
554
 
555
  indices = []
556
  for s in selected_indices:
@@ -577,7 +581,7 @@ def edit_selected_bookmarks_category(selected_indices, new_category, state_bookm
577
  # Update state
578
  state_bookmarks = bookmarks.copy()
579
 
580
- return message, gr.CheckboxGroup.update(choices=choices), display_bookmarks(), state_bookmarks
581
 
582
  def export_bookmarks():
583
  """
@@ -625,7 +629,7 @@ def chatbot_response(user_query, chat_history):
625
  chat_history.append({"role": "user", "content": user_query})
626
 
627
  # Rate Limiting Logic (if necessary)
628
- with lock:
629
  global last_api_call_time
630
  current_time = time.time()
631
  elapsed = current_time - last_api_call_time
@@ -633,7 +637,7 @@ def chatbot_response(user_query, chat_history):
633
  sleep_duration = 2 - elapsed
634
  logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
635
  time.sleep(sleep_duration)
636
- last_api_call_time = time.time()
637
 
638
  # Encode the query and search the FAISS index
639
  query_vector = embedding_model.encode([user_query]).astype('float32')
@@ -694,12 +698,6 @@ def build_app():
694
  # Initialize state
695
  state_bookmarks = gr.State([])
696
 
697
- # Define 'bookmark_selector' once
698
- bookmark_selector = gr.CheckboxGroup(
699
- label="✅ Select Bookmarks",
700
- choices=[]
701
- )
702
-
703
  # General Overview
704
  gr.Markdown("""
705
  # 📚 SmartMarks - AI Browser Bookmarks Manager
@@ -744,8 +742,8 @@ Navigate through the tabs to explore each feature in detail.
744
 
745
  process_button.click(
746
  process_uploaded_file,
747
- inputs=[upload, state_bookmarks],
748
- outputs=[output_text, bookmark_display, state_bookmarks, bookmark_selector]
749
  )
750
 
751
  with gr.Tab("Chat with Bookmarks"):
@@ -815,8 +813,11 @@ Navigate through the tabs to explore each feature in detail.
815
  bookmark_display_manage = gr.HTML(label="📄 Bookmarks")
816
 
817
  with gr.Row():
818
- # Include 'bookmark_selector' directly in the layout
819
- bookmark_selector
 
 
 
820
 
821
  with gr.Row():
822
  delete_button = gr.Button("🗑️ Delete Selected")
 
43
 
44
  # Lock for thread-safe operations
45
  lock = threading.Lock()
46
+ api_lock = threading.Lock() # Added api_lock
47
+
48
+ # Initialize last_api_call_time
49
+ last_api_call_time = 0 # Added initialization
50
 
51
  # Define the categories
52
  CATEGORIES = [
 
74
  "Uncategorized",
75
  ]
76
 
 
77
  # Define a function to generate responses using llama-3.1-70b-versatile
78
  def generate_llama_response(prompt):
79
  """
80
  Generate a response using the llama-3.1-70b-versatile model.
81
 
82
+ This implementation assumes that the model is accessible via a local HTTP API endpoint.
83
+ Replace the URL and request parameters as per your actual setup.
 
 
 
 
 
 
84
  """
85
  try:
 
 
 
 
 
 
86
  logger.info("Generating response using llama-3.1-70b-versatile")
87
+ api_url = "http://localhost:5000/generate" # Replace with your actual endpoint
88
+ headers = {
89
+ 'Content-Type': 'application/json',
90
+ }
91
+ payload = {
92
+ 'prompt': prompt,
93
+ 'max_tokens': 500, # Adjust as needed
94
+ 'temperature': 0.7, # Adjust as needed
95
+ }
96
+ response = requests.post(api_url, json=payload, headers=headers, timeout=30)
97
+ response.raise_for_status() # Raise an exception for HTTP errors
98
+ data = response.json()
99
+ generated_text = data.get('response', '').strip()
100
+ if not generated_text:
101
+ raise ValueError("Empty response received from the model.")
102
+ return generated_text
103
+ except requests.exceptions.RequestException as e:
104
+ logger.error(f"HTTP Request failed: {e}", exc_info=True)
105
+ return "Error generating response due to HTTP request failure."
106
+ except ValueError as ve:
107
+ logger.error(f"Value Error: {ve}", exc_info=True)
108
+ return "Error generating response: Received empty response from the model."
109
  except Exception as e:
110
+ logger.error(f"Unexpected error: {e}", exc_info=True)
111
+ return "An unexpected error occurred while generating the response."
112
 
113
  def extract_main_content(soup):
114
  """
 
185
  """
186
  logger.info(f"Generating summary and assigning category for bookmark: {bookmark.get('url')}")
187
 
188
+ try:
189
+ # Rate Limiting Logic
190
+ with api_lock:
191
+ global last_api_call_time
192
+ current_time = time.time()
193
+ elapsed = current_time - last_api_call_time
194
+ if elapsed < 2:
195
+ sleep_duration = 2 - elapsed
196
+ logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
197
+ time.sleep(sleep_duration)
198
+ last_api_call_time = current_time
199
+
200
+ # Prepare the prompt
201
+ html_content = bookmark.get('html_content', '')
202
+ soup = BeautifulSoup(html_content, 'html.parser')
203
+ metadata = get_page_metadata(soup)
204
+ main_content = extract_main_content(soup)
205
+
206
+ # Prepare content for the prompt
207
+ content_parts = []
208
+ if metadata['title']:
209
+ content_parts.append(f"Title: {metadata['title']}")
210
+ if metadata['description']:
211
+ content_parts.append(f"Description: {metadata['description']}")
212
+ if metadata['keywords']:
213
+ content_parts.append(f"Keywords: {metadata['keywords']}")
214
+ if main_content:
215
+ content_parts.append(f"Main Content: {main_content}")
216
+
217
+ content_text = '\n'.join(content_parts)
218
+
219
+ # Detect insufficient or erroneous content
220
+ error_keywords = ['Access Denied', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic']
221
+ if not content_text or len(content_text.split()) < 50:
222
+ use_prior_knowledge = True
223
+ logger.info(f"Content for {bookmark.get('url')} is insufficient. Instructing LLM to use prior knowledge.")
224
+ elif any(keyword.lower() in content_text.lower() for keyword in error_keywords):
225
+ use_prior_knowledge = True
226
+ logger.info(f"Content for {bookmark.get('url')} contains error messages. Instructing LLM to use prior knowledge.")
227
+ else:
228
+ use_prior_knowledge = False
 
 
 
 
229
 
230
+ # Craft the prompt based on content availability
231
+ if use_prior_knowledge:
232
+ prompt = f"""
233
  You are a knowledgeable assistant with up-to-date information as of 2023.
234
  URL: {bookmark.get('url')}
235
  Provide:
 
241
  Summary: [Your summary]
242
  Category: [One category]
243
  """
244
+ else:
245
+ prompt = f"""
246
  You are an assistant that creates concise webpage summaries and assigns categories.
247
  Content:
248
  {content_text}
 
256
  Category: [One category]
257
  """
258
 
259
+ # Generate response using llama-3.1-70b-versatile
260
+ response = generate_llama_response(prompt)
261
 
262
+ if not response:
263
+ raise ValueError("Empty response received from the model.")
264
 
265
+ # Parse the response
266
+ summary_match = re.search(r"Summary:\s*(.*)", response)
267
+ category_match = re.search(r"Category:\s*(.*)", response)
268
 
269
+ if summary_match:
270
+ bookmark['summary'] = summary_match.group(1).strip()
271
+ else:
272
+ bookmark['summary'] = 'No summary available.'
273
 
274
+ if category_match:
275
+ category = category_match.group(1).strip().strip('"')
276
+ if category in CATEGORIES:
277
+ bookmark['category'] = category
 
 
278
  else:
279
  bookmark['category'] = 'Uncategorized'
280
+ else:
281
+ bookmark['category'] = 'Uncategorized'
282
 
283
+ # Optional: Simple keyword-based validation
284
+ summary_lower = bookmark['summary'].lower()
285
+ url_lower = bookmark['url'].lower()
286
+ if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
287
+ bookmark['category'] = 'Social Media'
288
+ elif 'wikipedia' in url_lower:
289
+ bookmark['category'] = 'Reference and Knowledge Bases'
290
 
291
+ logger.info("Successfully generated summary and assigned category")
 
292
 
293
+ except Exception as e:
294
+ logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
295
+ bookmark['summary'] = 'No summary available.'
296
+ bookmark['category'] = 'Uncategorized'
 
297
 
298
  def parse_bookmarks(file_content):
299
  """
 
448
  logger.info("HTML display generated")
449
  return cards
450
 
451
+ def process_uploaded_file(file, state_bookmarks, bookmark_selector):
452
  """
453
  Process the uploaded bookmarks file.
454
  """
 
457
 
458
  if file is None:
459
  logger.warning("No file uploaded")
460
+ return "Please upload a bookmarks HTML file.", '', state_bookmarks, bookmark_selector.update(choices=[])
461
 
462
  try:
463
  file_content = file.decode('utf-8')
464
  except UnicodeDecodeError as e:
465
  logger.error(f"Error decoding the file: {e}", exc_info=True)
466
+ return "Error decoding the file. Please ensure it's a valid HTML file.", '', state_bookmarks, bookmark_selector.update(choices=[])
467
 
468
  try:
469
  bookmarks = parse_bookmarks(file_content)
470
  except Exception as e:
471
  logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
472
+ return "Error parsing the bookmarks HTML file.", '', state_bookmarks, bookmark_selector.update(choices=[])
473
 
474
  if not bookmarks:
475
  logger.warning("No bookmarks found in the uploaded file")
476
+ return "No bookmarks found in the uploaded file.", '', state_bookmarks, bookmark_selector.update(choices=[])
477
 
478
  # Assign unique IDs to bookmarks
479
  for idx, bookmark in enumerate(bookmarks):
 
493
  faiss_index = vectorize_and_index(bookmarks)
494
  except Exception as e:
495
  logger.error(f"Error building FAISS index: {e}", exc_info=True)
496
+ return "Error building search index.", '', state_bookmarks, bookmark_selector.update(choices=[])
497
 
498
  message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
499
  logger.info(message)
 
506
  # Update state
507
  state_bookmarks = bookmarks.copy()
508
 
509
+ return message, bookmark_html, state_bookmarks, bookmark_selector.update(choices=choices)
510
 
511
+ def delete_selected_bookmarks(selected_indices, state_bookmarks, bookmark_selector, bookmark_display_manage):
512
  """
513
  Delete selected bookmarks and remove their vectors from the FAISS index.
514
  """
515
  global bookmarks, faiss_index
516
  if not selected_indices:
517
+ return "⚠️ No bookmarks selected.", bookmark_selector.update(choices=[]), display_bookmarks()
518
 
519
  ids_to_delete = []
520
  indices_to_delete = []
 
545
  # Update state
546
  state_bookmarks = bookmarks.copy()
547
 
548
+ return message, bookmark_selector.update(choices=choices), display_bookmarks()
549
 
550
+ def edit_selected_bookmarks_category(selected_indices, new_category, state_bookmarks, bookmark_selector, bookmark_display_manage):
551
  """
552
  Edit category of selected bookmarks.
553
  """
554
  if not selected_indices:
555
+ return "⚠️ No bookmarks selected.", bookmark_selector.update(choices=[]), display_bookmarks(), state_bookmarks
556
  if not new_category:
557
+ return "⚠️ No new category selected.", bookmark_selector.update(choices=[]), display_bookmarks(), state_bookmarks
558
 
559
  indices = []
560
  for s in selected_indices:
 
581
  # Update state
582
  state_bookmarks = bookmarks.copy()
583
 
584
+ return message, bookmark_selector.update(choices=choices), display_bookmarks(), state_bookmarks
585
 
586
  def export_bookmarks():
587
  """
 
629
  chat_history.append({"role": "user", "content": user_query})
630
 
631
  # Rate Limiting Logic (if necessary)
632
+ with api_lock:
633
  global last_api_call_time
634
  current_time = time.time()
635
  elapsed = current_time - last_api_call_time
 
637
  sleep_duration = 2 - elapsed
638
  logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
639
  time.sleep(sleep_duration)
640
+ last_api_call_time = current_time
641
 
642
  # Encode the query and search the FAISS index
643
  query_vector = embedding_model.encode([user_query]).astype('float32')
 
698
  # Initialize state
699
  state_bookmarks = gr.State([])
700
 
 
 
 
 
 
 
701
  # General Overview
702
  gr.Markdown("""
703
  # 📚 SmartMarks - AI Browser Bookmarks Manager
 
742
 
743
  process_button.click(
744
  process_uploaded_file,
745
+ inputs=[upload, state_bookmarks, None], # 'bookmark_selector' is defined within "Manage Bookmarks" tab
746
+ outputs=[output_text, bookmark_display, state_bookmarks, gr.State(None)]
747
  )
748
 
749
  with gr.Tab("Chat with Bookmarks"):
 
813
  bookmark_display_manage = gr.HTML(label="📄 Bookmarks")
814
 
815
  with gr.Row():
816
+ # Define 'bookmark_selector' within the tab
817
+ bookmark_selector = gr.CheckboxGroup(
818
+ label="✅ Select Bookmarks",
819
+ choices=[]
820
+ )
821
 
822
  with gr.Row():
823
  delete_button = gr.Button("🗑️ Delete Selected")