Guiyom commited on
Commit
a3d7f9f
Β·
verified Β·
1 Parent(s): 20c30a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -47
app.py CHANGED
@@ -4,7 +4,9 @@ import requests
4
  import json
5
  import os
6
  import logging
7
- from typing import Dict, List
 
 
8
  from datetime import datetime
9
  from bs4 import BeautifulSoup
10
  from googlesearch import search
@@ -32,6 +34,68 @@ class RaindropSearchBot:
32
  self.client = OpenAI(api_key=self.openai_api_key)
33
  self.newsapi = NewsApiClient(api_key=self.newsapi_key)
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def get_google_results(self, query: str, num_results: int = 5) -> List[Dict]:
36
  """Get Google search results using googlesearch-python."""
37
  try:
@@ -210,59 +274,139 @@ class RaindropSearchBot:
210
  logger.error(f"Analysis generation error: {e}")
211
  return "Error generating analysis."
212
 
213
- def format_results(self, results: List[Dict], google_results: List[Dict],
214
- news_results: List[Dict], analysis: str) -> str:
215
- """Format all search results with analysis."""
216
- output = f"{analysis}\n\n"
217
- output += "-------\n\n"
218
 
 
 
 
 
 
 
219
  # Format Raindrop results
220
- if results:
221
- output += "πŸ” Bookmarked Sources:\n\n"
222
- for idx, item in enumerate(results, 1):
223
- if item.get('title') or item.get('link'):
224
- output += f"{idx}. {item.get('title', 'No Title')}\n"
225
- if item.get('link'):
226
- output += f" Link: {item['link']}\n"
227
- if item.get('tags'):
228
- output += f" Tags: {', '.join(item['tags'])}\n"
229
- if item.get('excerpt'):
230
- output += f" Description: {item['excerpt'][:200]}...\n"
231
- if item.get('created'):
232
- created_date = item['created'][:10]
233
- output += f" Created: {created_date}\n"
234
- output += "\n"
235
 
236
  # Format Google results
237
  if google_results:
238
- output += "🌐 Web Sources:\n\n"
239
- for idx, item in enumerate(google_results, 1):
240
- output += f"{idx}. {item.get('title', 'No Title')}\n"
241
- if item.get('link'):
242
- output += f" Link: {item['link']}\n"
243
- if item.get('snippet'):
244
- output += f" Description: {item['snippet']}\n"
245
- output += "\n"
246
 
247
  # Format News results
248
  if news_results:
249
- output += "πŸ“° Recent News:\n\n"
250
- for idx, item in enumerate(news_results, 1):
251
- output += f"{idx}. {item.get('title', 'No Title')}\n"
252
- if item.get('url'):
253
- output += f" Link: {item['url']}\n"
254
- if item.get('description'):
255
- output += f" Description: {item['description']}\n"
256
- if item.get('publishedAt'):
257
- output += f" Published: {item['publishedAt'][:10]}\n"
258
  if item.get('source', {}).get('name'):
259
- output += f" Source: {item['source']['name']}\n"
260
- output += "\n"
 
 
 
 
261
 
262
  return output
263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  def process_request(self, user_request: str) -> str:
265
- """Process the user request with enhanced error handling."""
266
  try:
267
  logger.info(f"Processing request: {user_request}")
268
 
@@ -275,15 +419,16 @@ class RaindropSearchBot:
275
  google_results = self.get_google_results(search_query)
276
  news_results = self.get_news_results(search_query)
277
 
278
- logger.info(f"Found {len(raindrop_results)} Raindrop results")
279
- logger.info(f"Found {len(google_results)} Google results")
280
- logger.info(f"Found {len(news_results)} News results")
 
281
 
282
- # Generate analysis
283
- analysis = self.analyze_results(raindrop_results, google_results, news_results, user_request)
284
 
285
  # Format and return results
286
- return self.format_results(raindrop_results, google_results, news_results, analysis)
287
 
288
  except Exception as e:
289
  logger.error(f"Error processing request: {e}", exc_info=True)
 
4
  import json
5
  import os
6
  import logging
7
+ from typing import Dict, List, Tuple, Optional
8
+ from newspaper import Article
9
+ import markdown
10
  from datetime import datetime
11
  from bs4 import BeautifulSoup
12
  from googlesearch import search
 
34
  self.client = OpenAI(api_key=self.openai_api_key)
35
  self.newsapi = NewsApiClient(api_key=self.newsapi_key)
36
 
37
+ def extract_content_from_url(self, url: str) -> Optional[str]:
38
+ """Extract main content from a URL using newspaper3k."""
39
+ try:
40
+ article = Article(url)
41
+ article.download()
42
+ time.sleep(1) # Polite delay between requests
43
+ article.parse()
44
+
45
+ # Combine title and text
46
+ content = f"{article.title}\n\n{article.text}"
47
+ return content if content.strip() else None
48
+
49
+ except Exception as e:
50
+ logger.error(f"Error extracting content from {url}: {e}")
51
+ return None
52
+
53
+ def get_content_and_summary(self, item: Dict, source_type: str) -> Dict:
54
+ """Get content and generate summary for a single item."""
55
+ try:
56
+ # Get URL based on source type
57
+ url = item.get('link') or item.get('url')
58
+ if not url:
59
+ return item
60
+
61
+ # For Raindrop items, use existing excerpt if available
62
+ if source_type == 'raindrop' and item.get('excerpt'):
63
+ content = item['excerpt']
64
+ else:
65
+ content = self.extract_content_from_url(url)
66
+
67
+ if not content:
68
+ return item
69
+
70
+ # Generate summary focused on the query topic
71
+ try:
72
+ prompt = f"""
73
+ Analyze this content and provide a detailed summary focusing on key points relevant
74
+ to our topic. Include specific details, data, and quotes if relevant.
75
+
76
+ Content: {content[:4000]} # Limit content length for token constraints
77
+
78
+ Provide a concise but detailed summary in 2-3 paragraphs.
79
+ """
80
+
81
+ response = self.client.chat.completions.create(
82
+ model="gpt-4o-mini",
83
+ messages=[{"role": "user", "content": prompt}],
84
+ temperature=0.3,
85
+ max_tokens=300
86
+ )
87
+
88
+ item['detailed_summary'] = response.choices[0].message.content
89
+ except Exception as e:
90
+ logger.error(f"Error generating summary: {e}")
91
+ item['detailed_summary'] = "Summary generation failed."
92
+
93
+ return item
94
+
95
+ except Exception as e:
96
+ logger.error(f"Error processing item: {e}")
97
+ return item
98
+
99
  def get_google_results(self, query: str, num_results: int = 5) -> List[Dict]:
100
  """Get Google search results using googlesearch-python."""
101
  try:
 
274
  logger.error(f"Analysis generation error: {e}")
275
  return "Error generating analysis."
276
 
277
+ def format_results(self, results: Tuple[List[Dict], List[Dict], List[Dict]],
278
+ essay: str) -> str:
279
+ """Format the essay and results with detailed summaries."""
280
+ raindrop_results, google_results, news_results = results
 
281
 
282
+ output = f"{essay}\n\n"
283
+ output += "---\n\n"
284
+ output += "# References and Detailed Summaries\n\n"
285
+
286
+ ref_counter = 1
287
+
288
  # Format Raindrop results
289
+ if raindrop_results:
290
+ output += "## πŸ” Bookmarked Sources\n\n"
291
+ for item in raindrop_results:
292
+ output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
293
+ output += f"**Link**: {item.get('link')}\n"
294
+ if item.get('tags'):
295
+ output += f"**Tags**: {', '.join(item['tags'])}\n"
296
+ if item.get('created'):
297
+ output += f"**Created**: {item['created'][:10]}\n"
298
+ output += "\n**Summary**:\n"
299
+ output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
300
+ ref_counter += 1
 
 
 
301
 
302
  # Format Google results
303
  if google_results:
304
+ output += "## 🌐 Web Sources\n\n"
305
+ for item in google_results:
306
+ output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
307
+ output += f"**Link**: {item.get('link')}\n"
308
+ output += "\n**Summary**:\n"
309
+ output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
310
+ ref_counter += 1
 
311
 
312
  # Format News results
313
  if news_results:
314
+ output += "## πŸ“° Recent News\n\n"
315
+ for item in news_results:
316
+ output += f"### [{ref_counter}] {item.get('title', 'No Title')}\n"
317
+ output += f"**Link**: {item.get('url')}\n"
 
 
 
 
 
318
  if item.get('source', {}).get('name'):
319
+ output += f"**Source**: {item['source']['name']}\n"
320
+ if item.get('publishedAt'):
321
+ output += f"**Published**: {item['publishedAt'][:10]}\n"
322
+ output += "\n**Summary**:\n"
323
+ output += f"{item.get('detailed_summary', 'No summary available.')}\n\n"
324
+ ref_counter += 1
325
 
326
  return output
327
 
328
+ def process_all_results(self, raindrop_results: List[Dict],
329
+ google_results: List[Dict],
330
+ news_results: List[Dict]) -> Tuple[List[Dict], List[Dict], List[Dict]]:
331
+ """Process and enrich all results with content and summaries."""
332
+
333
+ processed_raindrop = []
334
+ for item in raindrop_results:
335
+ processed_item = self.get_content_and_summary(item, 'raindrop')
336
+ if processed_item.get('detailed_summary'):
337
+ processed_raindrop.append(processed_item)
338
+
339
+ processed_google = []
340
+ for item in google_results:
341
+ processed_item = self.get_content_and_summary(item, 'google')
342
+ if processed_item.get('detailed_summary'):
343
+ processed_google.append(processed_item)
344
+
345
+ processed_news = []
346
+ for item in news_results:
347
+ processed_item = self.get_content_and_summary(item, 'news')
348
+ if processed_item.get('detailed_summary'):
349
+ processed_news.append(processed_item)
350
+
351
+ return processed_raindrop, processed_google, processed_news
352
+
353
+ def generate_essay_response(self, results: Tuple[List[Dict], List[Dict], List[Dict]],
354
+ user_query: str) -> str:
355
+ """Generate a structured essay-style response with references."""
356
+ raindrop_results, google_results, news_results = results
357
+
358
+ # Collect all content for analysis
359
+ all_content = ""
360
+ reference_map = {}
361
+ ref_counter = 1
362
+
363
+ for source_list in [raindrop_results, google_results, news_results]:
364
+ for item in source_list:
365
+ if item.get('detailed_summary'):
366
+ all_content += f"\n{item['detailed_summary']}\n"
367
+ reference_map[item['link']] = ref_counter
368
+ ref_counter += 1
369
+
370
+ try:
371
+ prompt = f"""
372
+ Create a comprehensive essay-style analysis about: {user_query}
373
+
374
+ Use this content as your source material:
375
+ {all_content}
376
+
377
+ Requirements:
378
+ 1. Structure the response in clear sections with markdown headers
379
+ 2. Include an introduction and conclusion
380
+ 3. Use reference numbers [n] to cite sources
381
+ 4. Make connections between different sources
382
+ 5. Highlight key findings and trends
383
+ 6. Address any contradictions or gaps
384
+ 7. Use markdown formatting for better readability
385
+
386
+ Format the response as a proper academic essay with sections.
387
+ """
388
+
389
+ response = self.client.chat.completions.create(
390
+ model="gpt-4o-mini",
391
+ messages=[{"role": "user", "content": prompt}],
392
+ temperature=0.5,
393
+ max_tokens=1500
394
+ )
395
+
396
+ essay = response.choices[0].message.content
397
+
398
+ # Replace reference placeholders with actual reference numbers
399
+ for url, ref_num in reference_map.items():
400
+ essay = essay.replace(f'[URL:{url}]', f'[{ref_num}]')
401
+
402
+ return essay
403
+
404
+ except Exception as e:
405
+ logger.error(f"Error generating essay: {e}")
406
+ return "Error generating analysis."
407
+
408
  def process_request(self, user_request: str) -> str:
409
+ """Process the user request with enhanced content collection and analysis."""
410
  try:
411
  logger.info(f"Processing request: {user_request}")
412
 
 
419
  google_results = self.get_google_results(search_query)
420
  news_results = self.get_news_results(search_query)
421
 
422
+ # Process all results to get content and summaries
423
+ processed_results = self.process_all_results(
424
+ raindrop_results, google_results, news_results
425
+ )
426
 
427
+ # Generate essay-style analysis
428
+ essay = self.generate_essay_response(processed_results, user_request)
429
 
430
  # Format and return results
431
+ return self.format_results(processed_results, essay)
432
 
433
  except Exception as e:
434
  logger.error(f"Error processing request: {e}", exc_info=True)