Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -276,13 +276,19 @@ def summarize_news_content(content, model):
|
|
276 |
|
277 |
return summary, cleaned_summary
|
278 |
|
279 |
-
def
|
280 |
model = get_model(temperature, top_p, repetition_penalty)
|
281 |
embed = get_embeddings()
|
282 |
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
if not articles:
|
285 |
-
return "No news articles found for the given
|
286 |
|
287 |
processed_articles = []
|
288 |
|
@@ -309,7 +315,7 @@ def process_google_news_rss(query, temperature, top_p, repetition_penalty):
|
|
309 |
print(f"Error processing article: {str(e)}")
|
310 |
|
311 |
if not processed_articles:
|
312 |
-
return "Failed to process any news articles. Please try
|
313 |
|
314 |
# Add processed articles to the database
|
315 |
docs = [Document(page_content=article["cleaned_summary"], metadata={
|
@@ -331,10 +337,55 @@ def process_google_news_rss(query, temperature, top_p, repetition_penalty):
|
|
331 |
global news_database
|
332 |
news_database.extend(processed_articles)
|
333 |
|
334 |
-
return f"Processed and added {len(processed_articles)} news articles to the database."
|
335 |
except Exception as e:
|
336 |
return f"Error adding articles to the database: {str(e)}"
|
337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
def export_news_to_excel():
|
339 |
global news_database
|
340 |
df = pd.DataFrame(news_database)
|
@@ -531,6 +582,26 @@ with gr.Blocks() as demo:
|
|
531 |
fetch_news_button = gr.Button("Fetch News")
|
532 |
|
533 |
news_fetch_output = gr.Textbox(label="News Fetch Status")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
|
535 |
def chat(question, history, temperature, top_p, repetition_penalty, web_search, google_news_rss):
|
536 |
answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, google_news_rss)
|
|
|
276 |
|
277 |
return summary, cleaned_summary
|
278 |
|
279 |
+
def process_news(query, temperature, top_p, repetition_penalty, news_source):
|
280 |
model = get_model(temperature, top_p, repetition_penalty)
|
281 |
embed = get_embeddings()
|
282 |
|
283 |
+
if news_source == "Google News RSS":
|
284 |
+
articles = fetch_google_news_rss(query)
|
285 |
+
elif news_source == "Golomt Bank":
|
286 |
+
articles = fetch_golomt_bank_news()
|
287 |
+
else:
|
288 |
+
return "Invalid news source selected."
|
289 |
+
|
290 |
if not articles:
|
291 |
+
return f"No news articles found for the given {news_source}."
|
292 |
|
293 |
processed_articles = []
|
294 |
|
|
|
315 |
print(f"Error processing article: {str(e)}")
|
316 |
|
317 |
if not processed_articles:
|
318 |
+
return f"Failed to process any news articles from {news_source}. Please try again or check the summarization process."
|
319 |
|
320 |
# Add processed articles to the database
|
321 |
docs = [Document(page_content=article["cleaned_summary"], metadata={
|
|
|
337 |
global news_database
|
338 |
news_database.extend(processed_articles)
|
339 |
|
340 |
+
return f"Processed and added {len(processed_articles)} news articles from {news_source} to the database."
|
341 |
except Exception as e:
|
342 |
return f"Error adding articles to the database: {str(e)}"
|
343 |
|
344 |
+
def fetch_golomt_bank_news(num_results=10):
|
345 |
+
base_url = "https://golomtbank.com/en/investor-relations"
|
346 |
+
|
347 |
+
try:
|
348 |
+
response = requests.get(base_url)
|
349 |
+
response.raise_for_status()
|
350 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
351 |
+
|
352 |
+
articles = soup.find_all('article', class_='gt-post')
|
353 |
+
news_items = []
|
354 |
+
|
355 |
+
for article in articles[:num_results]:
|
356 |
+
title_div = article.find('h5', class_='gt-carousel-title')
|
357 |
+
title = title_div.get_text(strip=True) if title_div else "No Title"
|
358 |
+
|
359 |
+
date_div = article.find('div', class_='entry-date gt-meta')
|
360 |
+
date = date_div.get_text(strip=True) if date_div else "No Date"
|
361 |
+
|
362 |
+
link_tag = article.find('a')
|
363 |
+
link = link_tag['href'] if link_tag else "No Link"
|
364 |
+
if not link.startswith('http'):
|
365 |
+
link = "https://golomtbank.com" + link
|
366 |
+
|
367 |
+
try:
|
368 |
+
article_response = requests.get(link)
|
369 |
+
article_response.raise_for_status()
|
370 |
+
article_soup = BeautifulSoup(article_response.content, 'html.parser')
|
371 |
+
article_content_div = article_soup.find('div', class_='entry-post')
|
372 |
+
content = article_content_div.get_text(strip=True) if article_content_div else "No content found"
|
373 |
+
except Exception as e:
|
374 |
+
content = f"Error fetching article content: {str(e)}"
|
375 |
+
|
376 |
+
news_item = {
|
377 |
+
"published_date": date,
|
378 |
+
"title": title,
|
379 |
+
"url": link,
|
380 |
+
"content": content
|
381 |
+
}
|
382 |
+
news_items.append(news_item)
|
383 |
+
|
384 |
+
return news_items
|
385 |
+
except Exception as e:
|
386 |
+
print(f"Error fetching Golomt Bank news: {str(e)}")
|
387 |
+
return []
|
388 |
+
|
389 |
def export_news_to_excel():
|
390 |
global news_database
|
391 |
df = pd.DataFrame(news_database)
|
|
|
582 |
fetch_news_button = gr.Button("Fetch News")
|
583 |
|
584 |
news_fetch_output = gr.Textbox(label="News Fetch Status")
|
585 |
+
|
586 |
+
with gr.Row():
|
587 |
+
news_source_dropdown = gr.Dropdown(
|
588 |
+
choices=["Google News RSS", "Golomt Bank"],
|
589 |
+
label="Select News Source",
|
590 |
+
value="Google News RSS"
|
591 |
+
)
|
592 |
+
news_query_input = gr.Textbox(label="Enter news query (for Google News RSS)")
|
593 |
+
fetch_news_button = gr.Button("Fetch News")
|
594 |
+
|
595 |
+
news_fetch_output = gr.Textbox(label="News Fetch Status")
|
596 |
+
|
597 |
+
def fetch_news(query, temperature, top_p, repetition_penalty, news_source):
|
598 |
+
return process_news(query, temperature, top_p, repetition_penalty, news_source)
|
599 |
+
|
600 |
+
fetch_news_button.click(
|
601 |
+
fetch_news,
|
602 |
+
inputs=[news_query_input, temperature_slider, top_p_slider, repetition_penalty_slider, news_source_dropdown],
|
603 |
+
outputs=news_fetch_output
|
604 |
+
)
|
605 |
|
606 |
def chat(question, history, temperature, top_p, repetition_penalty, web_search, google_news_rss):
|
607 |
answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, google_news_rss)
|