Spaces:
Paused
Paused
Shreyas094
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -341,47 +341,105 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
|
|
341 |
except Exception as e:
|
342 |
return f"Error adding articles to the database: {str(e)}"
|
343 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
def fetch_golomt_bank_news(num_results=10):
|
345 |
base_url = "https://golomtbank.com/en/investor-relations"
|
|
|
|
|
346 |
|
347 |
try:
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
if not
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
content
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
news_item = {
|
377 |
-
"published_date": date,
|
378 |
-
"title": title,
|
379 |
-
"url": link,
|
380 |
-
"content": content
|
381 |
-
}
|
382 |
-
news_items.append(news_item)
|
383 |
-
|
384 |
-
return news_items
|
385 |
except Exception as e:
|
386 |
print(f"Error fetching Golomt Bank news: {str(e)}")
|
387 |
return []
|
@@ -598,7 +656,7 @@ with gr.Blocks() as demo:
|
|
598 |
|
599 |
def fetch_news(query, temperature, top_p, repetition_penalty, news_source):
|
600 |
return process_news(query, temperature, top_p, repetition_penalty, news_source)
|
601 |
-
|
602 |
fetch_news_button.click(
|
603 |
fetch_news,
|
604 |
inputs=[news_query_input, temperature_slider, top_p_slider, repetition_penalty_slider, news_source_dropdown],
|
|
|
341 |
except Exception as e:
|
342 |
return f"Error adding articles to the database: {str(e)}"
|
343 |
|
344 |
+
|
345 |
+
def fetch_articles_from_page(url):
|
346 |
+
response = requests.get(url)
|
347 |
+
response.raise_for_status()
|
348 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
349 |
+
articles = soup.find_all('div', class_='gt-carousel-item gt-box-shadow-2')
|
350 |
+
return articles, soup
|
351 |
+
|
352 |
+
def extract_articles(articles):
|
353 |
+
article_data = []
|
354 |
+
for article in articles:
|
355 |
+
title_div = article.find('h5', class_='gt-carousel-title')
|
356 |
+
title = title_div.get_text(strip=True) if title_div else "No Title"
|
357 |
+
date_div = article.find('div', class_='entry-date gt-meta')
|
358 |
+
date = date_div.get_text(strip=True) if date_div else "No Date"
|
359 |
+
link_tag = article.find('a')
|
360 |
+
link = link_tag['href'] if link_tag else "No Link"
|
361 |
+
if not link.startswith('http'):
|
362 |
+
link = "https://golomtbank.com" + link
|
363 |
+
article_response = requests.get(link)
|
364 |
+
article_response.raise_for_status()
|
365 |
+
article_soup = BeautifulSoup(article_response.content, 'html.parser')
|
366 |
+
article_content_div = article_soup.find('div', class_='entry-post')
|
367 |
+
article_content = article_content_div.get_text(strip=True) if article_content_div else "No content found"
|
368 |
+
article_data.append({
|
369 |
+
'title': title,
|
370 |
+
'date': date,
|
371 |
+
'link': link,
|
372 |
+
'content': article_content
|
373 |
+
})
|
374 |
+
return article_data
|
375 |
+
|
376 |
+
pythonCopyimport requests
|
377 |
+
from bs4 import BeautifulSoup
|
378 |
+
|
379 |
+
def fetch_articles_from_page(url):
|
380 |
+
response = requests.get(url)
|
381 |
+
response.raise_for_status()
|
382 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
383 |
+
articles = soup.find_all('div', class_='gt-carousel-item gt-box-shadow-2')
|
384 |
+
return articles, soup
|
385 |
+
|
386 |
+
def extract_articles(articles):
|
387 |
+
article_data = []
|
388 |
+
for article in articles:
|
389 |
+
title_div = article.find('h5', class_='gt-carousel-title')
|
390 |
+
title = title_div.get_text(strip=True) if title_div else "No Title"
|
391 |
+
date_div = article.find('div', class_='entry-date gt-meta')
|
392 |
+
date = date_div.get_text(strip=True) if date_div else "No Date"
|
393 |
+
link_tag = article.find('a')
|
394 |
+
link = link_tag['href'] if link_tag else "No Link"
|
395 |
+
if not link.startswith('http'):
|
396 |
+
link = "https://golomtbank.com" + link
|
397 |
+
article_response = requests.get(link)
|
398 |
+
article_response.raise_for_status()
|
399 |
+
article_soup = BeautifulSoup(article_response.content, 'html.parser')
|
400 |
+
article_content_div = article_soup.find('div', class_='entry-post')
|
401 |
+
article_content = article_content_div.get_text(strip=True) if article_content_div else "No content found"
|
402 |
+
article_data.append({
|
403 |
+
'title': title,
|
404 |
+
'date': date,
|
405 |
+
'link': link,
|
406 |
+
'content': article_content
|
407 |
+
})
|
408 |
+
return article_data
|
409 |
+
|
410 |
def fetch_golomt_bank_news(num_results=10):
|
411 |
base_url = "https://golomtbank.com/en/investor-relations"
|
412 |
+
current_page_url = base_url
|
413 |
+
all_articles = []
|
414 |
|
415 |
try:
|
416 |
+
while len(all_articles) < num_results:
|
417 |
+
print(f"Fetching articles from: {current_page_url}")
|
418 |
+
articles, soup = fetch_articles_from_page(current_page_url)
|
419 |
+
if not articles:
|
420 |
+
print("No articles found on this page.")
|
421 |
+
break
|
422 |
+
all_articles.extend(extract_articles(articles))
|
423 |
+
print(f"Total articles fetched so far: {len(all_articles)}")
|
424 |
+
if len(all_articles) >= num_results:
|
425 |
+
all_articles = all_articles[:num_results]
|
426 |
+
break
|
427 |
+
next_page_link = soup.find('a', class_='next')
|
428 |
+
if not next_page_link:
|
429 |
+
print("No next page link found.")
|
430 |
+
break
|
431 |
+
current_page_url = next_page_link['href']
|
432 |
+
if not current_page_url.startswith('http'):
|
433 |
+
current_page_url = "https://golomtbank.com" + current_page_url
|
434 |
+
|
435 |
+
return [
|
436 |
+
{
|
437 |
+
"published_date": article['date'],
|
438 |
+
"title": article['title'],
|
439 |
+
"url": article['link'],
|
440 |
+
"content": article['content']
|
441 |
+
} for article in all_articles
|
442 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
443 |
except Exception as e:
|
444 |
print(f"Error fetching Golomt Bank news: {str(e)}")
|
445 |
return []
|
|
|
656 |
|
657 |
def fetch_news(query, temperature, top_p, repetition_penalty, news_source):
|
658 |
return process_news(query, temperature, top_p, repetition_penalty, news_source)
|
659 |
+
|
660 |
fetch_news_button.click(
|
661 |
fetch_news,
|
662 |
inputs=[news_query_input, temperature_slider, top_p_slider, repetition_penalty_slider, news_source_dropdown],
|