Shreyas094 commited on
Commit
773f976
·
verified ·
1 Parent(s): a89fe32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -5
app.py CHANGED
@@ -276,13 +276,19 @@ def summarize_news_content(content, model):
276
 
277
  return summary, cleaned_summary
278
 
279
- def process_google_news_rss(query, temperature, top_p, repetition_penalty):
280
  model = get_model(temperature, top_p, repetition_penalty)
281
  embed = get_embeddings()
282
 
283
- articles = fetch_google_news_rss(query)
 
 
 
 
 
 
284
  if not articles:
285
- return "No news articles found for the given query."
286
 
287
  processed_articles = []
288
 
@@ -309,7 +315,7 @@ def process_google_news_rss(query, temperature, top_p, repetition_penalty):
309
  print(f"Error processing article: {str(e)}")
310
 
311
  if not processed_articles:
312
- return "Failed to process any news articles. Please try a different query or check the summarization process."
313
 
314
  # Add processed articles to the database
315
  docs = [Document(page_content=article["cleaned_summary"], metadata={
@@ -331,10 +337,55 @@ def process_google_news_rss(query, temperature, top_p, repetition_penalty):
331
  global news_database
332
  news_database.extend(processed_articles)
333
 
334
- return f"Processed and added {len(processed_articles)} news articles to the database."
335
  except Exception as e:
336
  return f"Error adding articles to the database: {str(e)}"
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  def export_news_to_excel():
339
  global news_database
340
  df = pd.DataFrame(news_database)
@@ -531,6 +582,26 @@ with gr.Blocks() as demo:
531
  fetch_news_button = gr.Button("Fetch News")
532
 
533
  news_fetch_output = gr.Textbox(label="News Fetch Status")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
  def chat(question, history, temperature, top_p, repetition_penalty, web_search, google_news_rss):
536
  answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, google_news_rss)
 
276
 
277
  return summary, cleaned_summary
278
 
279
+ def process_news(query, temperature, top_p, repetition_penalty, news_source):
280
  model = get_model(temperature, top_p, repetition_penalty)
281
  embed = get_embeddings()
282
 
283
+ if news_source == "Google News RSS":
284
+ articles = fetch_google_news_rss(query)
285
+ elif news_source == "Golomt Bank":
286
+ articles = fetch_golomt_bank_news()
287
+ else:
288
+ return "Invalid news source selected."
289
+
290
  if not articles:
291
+ return f"No news articles found for the given {news_source}."
292
 
293
  processed_articles = []
294
 
 
315
  print(f"Error processing article: {str(e)}")
316
 
317
  if not processed_articles:
318
+ return f"Failed to process any news articles from {news_source}. Please try again or check the summarization process."
319
 
320
  # Add processed articles to the database
321
  docs = [Document(page_content=article["cleaned_summary"], metadata={
 
337
  global news_database
338
  news_database.extend(processed_articles)
339
 
340
+ return f"Processed and added {len(processed_articles)} news articles from {news_source} to the database."
341
  except Exception as e:
342
  return f"Error adding articles to the database: {str(e)}"
343
 
344
+ def fetch_golomt_bank_news(num_results=10):
345
+ base_url = "https://golomtbank.com/en/investor-relations"
346
+
347
+ try:
348
+ response = requests.get(base_url)
349
+ response.raise_for_status()
350
+ soup = BeautifulSoup(response.content, 'html.parser')
351
+
352
+ articles = soup.find_all('article', class_='gt-post')
353
+ news_items = []
354
+
355
+ for article in articles[:num_results]:
356
+ title_div = article.find('h5', class_='gt-carousel-title')
357
+ title = title_div.get_text(strip=True) if title_div else "No Title"
358
+
359
+ date_div = article.find('div', class_='entry-date gt-meta')
360
+ date = date_div.get_text(strip=True) if date_div else "No Date"
361
+
362
+ link_tag = article.find('a')
363
+ link = link_tag['href'] if link_tag else "No Link"
364
+ if not link.startswith('http'):
365
+ link = "https://golomtbank.com" + link
366
+
367
+ try:
368
+ article_response = requests.get(link)
369
+ article_response.raise_for_status()
370
+ article_soup = BeautifulSoup(article_response.content, 'html.parser')
371
+ article_content_div = article_soup.find('div', class_='entry-post')
372
+ content = article_content_div.get_text(strip=True) if article_content_div else "No content found"
373
+ except Exception as e:
374
+ content = f"Error fetching article content: {str(e)}"
375
+
376
+ news_item = {
377
+ "published_date": date,
378
+ "title": title,
379
+ "url": link,
380
+ "content": content
381
+ }
382
+ news_items.append(news_item)
383
+
384
+ return news_items
385
+ except Exception as e:
386
+ print(f"Error fetching Golomt Bank news: {str(e)}")
387
+ return []
388
+
389
  def export_news_to_excel():
390
  global news_database
391
  df = pd.DataFrame(news_database)
 
582
  fetch_news_button = gr.Button("Fetch News")
583
 
584
  news_fetch_output = gr.Textbox(label="News Fetch Status")
585
+
586
+ with gr.Row():
587
+ news_source_dropdown = gr.Dropdown(
588
+ choices=["Google News RSS", "Golomt Bank"],
589
+ label="Select News Source",
590
+ value="Google News RSS"
591
+ )
592
+ news_query_input = gr.Textbox(label="Enter news query (for Google News RSS)")
593
+ fetch_news_button = gr.Button("Fetch News")
594
+
595
+ news_fetch_output = gr.Textbox(label="News Fetch Status")
596
+
597
+ def fetch_news(query, temperature, top_p, repetition_penalty, news_source):
598
+ return process_news(query, temperature, top_p, repetition_penalty, news_source)
599
+
600
+ fetch_news_button.click(
601
+ fetch_news,
602
+ inputs=[news_query_input, temperature_slider, top_p_slider, repetition_penalty_slider, news_source_dropdown],
603
+ outputs=news_fetch_output
604
+ )
605
 
606
  def chat(question, history, temperature, top_p, repetition_penalty, web_search, google_news_rss):
607
  answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, google_news_rss)