Guiyom commited on
Commit
487344e
Β·
verified Β·
1 Parent(s): 57ab787

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -193
app.py CHANGED
@@ -1,16 +1,19 @@
 
 
1
  import gradio as gr
2
  from openai import OpenAI
3
  import requests
4
  import json
5
  import os
6
  import logging
7
- from typing import Dict, List, Tuple, Optional
8
- from newspaper import Article
9
- import markdown
10
  from datetime import datetime
11
  from bs4 import BeautifulSoup
12
  from googlesearch import search
13
  from newsapi import NewsApiClient
 
 
 
14
 
15
  # Set up logging
16
  logging.basicConfig(
@@ -23,10 +26,9 @@ class RaindropSearchBot:
23
  def __init__(self):
24
  self.openai_api_key = os.getenv('openaikey')
25
  self.raindrop_api_token = os.getenv('raindroptoken')
26
- self.serpapi_key = os.getenv('serpapikey')
27
  self.newsapi_key = os.getenv('newsapikey')
28
 
29
- if not all([self.openai_api_key, self.raindrop_api_token, self.serpapi_key, self.newsapi_key]):
30
  raise EnvironmentError(
31
  "Missing required environment variables. Please ensure all API keys are set."
32
  )
@@ -34,6 +36,38 @@ class RaindropSearchBot:
34
  self.client = OpenAI(api_key=self.openai_api_key)
35
  self.newsapi = NewsApiClient(api_key=self.newsapi_key)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def extract_content_from_url(self, url: str) -> Optional[str]:
38
  """Extract main content from a URL using BeautifulSoup."""
39
  try:
@@ -84,18 +118,18 @@ class RaindropSearchBot:
84
  url = item.get('link') or item.get('url')
85
  if not url:
86
  return item
87
-
88
  # For Raindrop items, use existing excerpt if available
89
  if source_type == 'raindrop' and item.get('excerpt'):
90
  content = item['excerpt']
91
  else:
92
  content = self.extract_content_from_url(url)
93
-
94
  if not content:
95
  logger.warning(f"No content extracted from {url}")
96
  item['detailed_summary'] = "Content extraction failed."
97
  return item
98
-
99
  # Generate summary focused on the query topic
100
  try:
101
  prompt = f"""
@@ -110,7 +144,7 @@ class RaindropSearchBot:
110
  4. Keep the summary to 2-3 paragraphs
111
  5. Highlight any unique insights from this source
112
  """
113
-
114
  response = self.client.chat.completions.create(
115
  model="gpt-4o-mini",
116
  messages=[{"role": "user", "content": prompt}],
@@ -124,77 +158,12 @@ class RaindropSearchBot:
124
  except Exception as e:
125
  logger.error(f"Error generating summary: {e}")
126
  item['detailed_summary'] = "Summary generation failed."
127
-
128
  return item
129
 
130
  except Exception as e:
131
  logger.error(f"Error processing item: {e}")
132
  return item
133
-
134
- def get_google_results(self, query: str, num_results: int = 5) -> List[Dict]:
135
- """Get Google search results using googlesearch-python."""
136
- try:
137
- search_results = []
138
- for result in search(query, num_results=num_results, advanced=True):
139
- search_results.append({
140
- 'title': result.title,
141
- 'link': result.url,
142
- 'snippet': result.description
143
- })
144
- return search_results
145
-
146
- except Exception as e:
147
- logger.error(f"Google search error: {e}")
148
- return []
149
-
150
- def get_news_results(self, query: str, num_results: int = 5) -> List[Dict]:
151
- """Get news articles using NewsAPI."""
152
- try:
153
- news_results = self.newsapi.get_everything(
154
- q=query,
155
- language='en',
156
- sort_by='relevancy',
157
- page_size=num_results
158
- )
159
-
160
- return news_results.get('articles', [])
161
-
162
- except Exception as e:
163
- logger.error(f"News API error: {e}")
164
- return []
165
-
166
- def generate_search_query(self, user_request: str) -> str:
167
- """Convert user request to optimized search terms."""
168
- logger.info(f"Generating search query for: {user_request}")
169
-
170
- prompt = f"""
171
- You are a search expert. Create a search query to find relevant documents about:
172
- {user_request}
173
-
174
- Guidelines:
175
- - Focus on key concepts and synonyms
176
- - Use simple keywords that would appear in titles or descriptions
177
- - Avoid complex operators or special characters
178
- - Return only the search terms, no explanation
179
- - Include alternative phrasings
180
- - Keep it concise (max 3-4 key terms/phrases)
181
-
182
- Return only the search query terms.
183
- """
184
-
185
- try:
186
- response = self.client.chat.completions.create(
187
- model="gpt-4o-mini",
188
- messages=[{"role": "user", "content": prompt}],
189
- temperature=0.3,
190
- max_tokens=50
191
- )
192
- search_query = response.choices[0].message.content.strip()
193
- logger.info(f"Generated search query: {search_query}")
194
- return search_query
195
- except Exception as e:
196
- logger.error(f"Error generating search query: {e}")
197
- return user_request
198
 
199
  def search_raindrop(self, search_query: str) -> List[Dict]:
200
  """Search Raindrop.io with enhanced error handling and logging."""
@@ -244,122 +213,6 @@ class RaindropSearchBot:
244
  logger.error(f"Search error: {e}")
245
  return []
246
 
247
- def analyze_results(self, results: List[Dict], google_results: List[Dict],
248
- news_results: List[Dict], user_query: str) -> str:
249
- """Generate an analysis of all search results."""
250
- if not any([results, google_results, news_results]):
251
- return "No relevant results found. Try modifying your search terms."
252
-
253
- # Create context for analysis
254
- context = f"Based on the search query: '{user_query}'\n\n"
255
- context += "Analyze these sources:\n\n"
256
-
257
- # Add Raindrop results
258
- if results:
259
- context += "Bookmarked Sources:\n"
260
- for item in results:
261
- context += f"Title: {item.get('title', 'No title')}\n"
262
- if item.get('excerpt'):
263
- context += f"Content: {item['excerpt'][:500]}...\n"
264
- context += f"Created: {item.get('created', 'No date')}\n\n"
265
-
266
- # Add Google results
267
- if google_results:
268
- context += "Web Sources:\n"
269
- for item in google_results:
270
- context += f"Title: {item.get('title', 'No title')}\n"
271
- if item.get('snippet'):
272
- context += f"Content: {item['snippet']}\n\n"
273
-
274
- # Add News results
275
- if news_results:
276
- context += "News Sources:\n"
277
- for item in news_results:
278
- context += f"Title: {item.get('title', 'No title')}\n"
279
- if item.get('description'):
280
- context += f"Content: {item['description']}\n"
281
- context += f"Published: {item.get('publishedAt', 'No date')}\n\n"
282
-
283
- try:
284
- prompt = f"""
285
- Based on all available sources, provide a comprehensive analysis of {user_query}.
286
-
287
- Requirements:
288
- 1. Focus on the most relevant and recent information
289
- 2. Organize the response by key themes
290
- 3. Include only factual information from the sources
291
- 4. Highlight any significant developments or changes
292
- 5. Note any conflicts or differences between sources
293
- 6. Prioritize official sources and recent developments
294
-
295
- Context:
296
- {context}
297
- """
298
-
299
- response = self.client.chat.completions.create(
300
- model="gpt-4o-mini",
301
- messages=[{"role": "user", "content": prompt}],
302
- temperature=0.5,
303
- max_tokens=1000
304
- )
305
-
306
- analysis = response.choices[0].message.content
307
- return analysis
308
- except Exception as e:
309
- logger.error(f"Analysis generation error: {e}")
310
- return "Error generating analysis."
311
-
312
- def format_results(self, results: Tuple[List[Dict], List[Dict], List[Dict]],
313
- essay: str) -> str:
314
- """Format the essay and results with detailed summaries."""
315
- raindrop_results, google_results, news_results = results
316
-
317
- output = f"{essay}\n\n"
318
- output += "---\n\n"
319
- output += "# References and Detailed Summaries\n\n"
320
-
321
- ref_counter = 1
322
-
323
- # Format Raindrop results
324
- if raindrop_results:
325
- output += "## πŸ” Bookmarked Sources\n\n"
326
- for item in raindrop_results:
327
- output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
328
- output += f"**Link**: {item.get('link')}\n"
329
- if item.get('tags'):
330
- output += f"**Tags**: {', '.join(item['tags'])}\n"
331
- if item.get('created'):
332
- output += f"**Created**: {item['created'][:10]}\n"
333
- output += "\n**Summary**:\n"
334
- output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
335
- ref_counter += 1
336
-
337
- # Format Google results
338
- if google_results:
339
- output += "## 🌐 Web Sources\n\n"
340
- for item in google_results:
341
- output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
342
- output += f"**Link**: {item.get('link')}\n"
343
- output += "\n**Summary**:\n"
344
- output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
345
- ref_counter += 1
346
-
347
- # Format News results
348
- if news_results:
349
- output += "## πŸ“° Recent News\n\n"
350
- for item in news_results:
351
- output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
352
- output += f"**Link**: {item.get('url')}\n"
353
- if item.get('source', {}).get('name'):
354
- output += f"**Source**: {item['source']['name']}\n"
355
- if item.get('publishedAt'):
356
- output += f"**Published**: {item['publishedAt'][:10]}\n"
357
- output += "\n**Summary**:\n"
358
- output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
359
- ref_counter += 1
360
-
361
- return output
362
-
363
  def process_all_results(self, raindrop_results: List[Dict],
364
  google_results: List[Dict],
365
  news_results: List[Dict]) -> Tuple[List[Dict], List[Dict], List[Dict]]:
@@ -439,7 +292,58 @@ class RaindropSearchBot:
439
  except Exception as e:
440
  logger.error(f"Error generating essay: {e}")
441
  return "Error generating analysis."
442
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  def process_request(self, user_request: str) -> str:
444
  """Process the user request with enhanced content collection and analysis."""
445
  try:
@@ -469,6 +373,39 @@ class RaindropSearchBot:
469
  logger.error(f"Error processing request: {e}", exc_info=True)
470
  return f"An error occurred while processing your request. Please try again."
471
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  # Initialize bot
473
  bot = RaindropSearchBot()
474
 
@@ -511,4 +448,4 @@ with gr.Blocks(title="Enhanced Search Assistant", theme=gr.themes.Soft()) as dem
511
 
512
  # Launch the interface
513
  if __name__ == "__main__":
514
- demo.launch(share=True)
 
1
+ python
2
+
3
  import gradio as gr
4
  from openai import OpenAI
5
  import requests
6
  import json
7
  import os
8
  import logging
9
+ from typing import Dict, List, Optional, Tuple
 
 
10
  from datetime import datetime
11
  from bs4 import BeautifulSoup
12
  from googlesearch import search
13
  from newsapi import NewsApiClient
14
+ import markdown
15
+ import re
16
+ import time
17
 
18
  # Set up logging
19
  logging.basicConfig(
 
26
  def __init__(self):
27
  self.openai_api_key = os.getenv('openaikey')
28
  self.raindrop_api_token = os.getenv('raindroptoken')
 
29
  self.newsapi_key = os.getenv('newsapikey')
30
 
31
+ if not all([self.openai_api_key, self.raindrop_api_token, self.newsapi_key]):
32
  raise EnvironmentError(
33
  "Missing required environment variables. Please ensure all API keys are set."
34
  )
 
36
  self.client = OpenAI(api_key=self.openai_api_key)
37
  self.newsapi = NewsApiClient(api_key=self.newsapi_key)
38
 
39
+ def get_google_results(self, query: str, num_results: int = 5) -> List[Dict]:
40
+ """Get Google search results using googlesearch-python."""
41
+ try:
42
+ search_results = []
43
+ for result in search(query, num_results=num_results, advanced=True):
44
+ search_results.append({
45
+ 'title': result.title,
46
+ 'link': result.url,
47
+ 'snippet': result.description
48
+ })
49
+ return search_results
50
+
51
+ except Exception as e:
52
+ logger.error(f"Google search error: {e}")
53
+ return []
54
+
55
+ def get_news_results(self, query: str, num_results: int = 5) -> List[Dict]:
56
+ """Get news articles using NewsAPI."""
57
+ try:
58
+ news_results = self.newsapi.get_everything(
59
+ q=query,
60
+ language='en',
61
+ sort_by='relevancy',
62
+ page_size=num_results
63
+ )
64
+
65
+ return news_results.get('articles', [])
66
+
67
+ except Exception as e:
68
+ logger.error(f"News API error: {e}")
69
+ return []
70
+
71
  def extract_content_from_url(self, url: str) -> Optional[str]:
72
  """Extract main content from a URL using BeautifulSoup."""
73
  try:
 
118
  url = item.get('link') or item.get('url')
119
  if not url:
120
  return item
121
+
122
  # For Raindrop items, use existing excerpt if available
123
  if source_type == 'raindrop' and item.get('excerpt'):
124
  content = item['excerpt']
125
  else:
126
  content = self.extract_content_from_url(url)
127
+
128
  if not content:
129
  logger.warning(f"No content extracted from {url}")
130
  item['detailed_summary'] = "Content extraction failed."
131
  return item
132
+
133
  # Generate summary focused on the query topic
134
  try:
135
  prompt = f"""
 
144
  4. Keep the summary to 2-3 paragraphs
145
  5. Highlight any unique insights from this source
146
  """
147
+
148
  response = self.client.chat.completions.create(
149
  model="gpt-4o-mini",
150
  messages=[{"role": "user", "content": prompt}],
 
158
  except Exception as e:
159
  logger.error(f"Error generating summary: {e}")
160
  item['detailed_summary'] = "Summary generation failed."
161
+
162
  return item
163
 
164
  except Exception as e:
165
  logger.error(f"Error processing item: {e}")
166
  return item
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  def search_raindrop(self, search_query: str) -> List[Dict]:
169
  """Search Raindrop.io with enhanced error handling and logging."""
 
213
  logger.error(f"Search error: {e}")
214
  return []
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  def process_all_results(self, raindrop_results: List[Dict],
217
  google_results: List[Dict],
218
  news_results: List[Dict]) -> Tuple[List[Dict], List[Dict], List[Dict]]:
 
292
  except Exception as e:
293
  logger.error(f"Error generating essay: {e}")
294
  return "Error generating analysis."
295
+
296
+ def format_results(self, results: Tuple[List[Dict], List[Dict], List[Dict]],
297
+ essay: str) -> str:
298
+ """Format the essay and results with detailed summaries."""
299
+ raindrop_results, google_results, news_results = results
300
+
301
+ output = f"{essay}\n\n"
302
+ output += "---\n\n"
303
+ output += "# References and Detailed Summaries\n\n"
304
+
305
+ ref_counter = 1
306
+
307
+ # Format Raindrop results
308
+ if raindrop_results:
309
+ output += "## πŸ” Bookmarked Sources\n\n"
310
+ for item in raindrop_results:
311
+ output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
312
+ output += f"**Link**: {item.get('link')}\n"
313
+ if item.get('tags'):
314
+ output += f"**Tags**: {', '.join(item['tags'])}\n"
315
+ if item.get('created'):
316
+ output += f"**Created**: {item['created'][:10]}\n"
317
+ output += "\n**Summary**:\n"
318
+ output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
319
+ ref_counter += 1
320
+
321
+ # Format Google results
322
+ if google_results:
323
+ output += "## 🌐 Web Sources\n\n"
324
+ for item in google_results:
325
+ output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
326
+ output += f"**Link**: {item.get('link')}\n"
327
+ output += "\n**Summary**:\n"
328
+ output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
329
+ ref_counter += 1
330
+
331
+ # Format News results
332
+ if news_results:
333
+ output += "## πŸ“° Recent News\n\n"
334
+ for item in news_results:
335
+ output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
336
+ output += f"**Link**: {item.get('url')}\n"
337
+ if item.get('source', {}).get('name'):
338
+ output += f"**Source**: {item['source']['name']}\n"
339
+ if item.get('publishedAt'):
340
+ output += f"**Published**: {item['publishedAt'][:10]}\n"
341
+ output += "\n**Summary**:\n"
342
+ output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
343
+ ref_counter += 1
344
+
345
+ return output
346
+
347
  def process_request(self, user_request: str) -> str:
348
  """Process the user request with enhanced content collection and analysis."""
349
  try:
 
373
  logger.error(f"Error processing request: {e}", exc_info=True)
374
  return f"An error occurred while processing your request. Please try again."
375
 
376
+ def generate_search_query(self, user_request: str) -> str:
377
+ """Convert user request to optimized search terms."""
378
+ logger.info(f"Generating search query for: {user_request}")
379
+
380
+ prompt = f"""
381
+ You are a search expert. Create a search query to find relevant documents about:
382
+ {user_request}
383
+
384
+ Guidelines:
385
+ - Focus on key concepts and synonyms
386
+ - Use simple keywords that would appear in titles or descriptions
387
+ - Avoid complex operators or special characters
388
+ - Return only the search terms, no explanation
389
+ - Include alternative phrasings
390
+ - Keep it concise (max 3-4 key terms/phrases)
391
+
392
+ Return only the search query terms.
393
+ """
394
+
395
+ try:
396
+ response = self.client.chat.completions.create(
397
+ model="gpt-4o-mini",
398
+ messages=[{"role": "user", "content": prompt}],
399
+ temperature=0.3,
400
+ max_tokens=50
401
+ )
402
+ search_query = response.choices[0].message.content.strip()
403
+ logger.info(f"Generated search query: {search_query}")
404
+ return search_query
405
+ except Exception as e:
406
+ logger.error(f"Error generating search query: {e}")
407
+ return user_request
408
+
409
  # Initialize bot
410
  bot = RaindropSearchBot()
411
 
 
448
 
449
  # Launch the interface
450
  if __name__ == "__main__":
451
+ demo.launch(share=True)