SentimentNews / src /utils.py
bijayjr's picture
updates
5884827
import requests
import io
from bs4 import BeautifulSoup
from gtts import gTTS
from deep_translator import GoogleTranslator
from transformers import pipeline
from keybert import KeyBERT
# News Extraction
def extract_news(topic):
"""
Extracts news articles related to the given topic from the Economic Times website.
Args:
topic (str): The topic for which news articles need to be extracted.
Returns:
list[dict]: A list of dictionaries containing news titles and summaries.
"""
url = f"https://economictimes.indiatimes.com/topic/{topic}"
headers = {"User-Agent": "Mozilla/5.0"}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx)
except requests.RequestException as e:
print(f"Error fetching news: {e}")
return []
soup = BeautifulSoup(response.text, "html.parser")
articles = []
article_blocks = soup.find_all("div", class_="clr flt topicstry story_list")
for article in article_blocks:
title_tag = article.find("a", class_="wrapLines l2")
summary_tag = article.find("p", class_="wrapLines l3")
title = title_tag.text.strip() if title_tag else "Title not found"
summary = summary_tag.text.strip() if summary_tag else "Summary not found"
articles.append({"title": title, "summary": summary})
return articles
# Sentiment Analysis Pipeline
sentiment_pipeline = pipeline(
"sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"
)
def analyze_sentiment(text):
"""
Analyzes the sentiment of the given text using a pre-trained model.
Args:
text (str): The input text to analyze.
Returns:
str: Sentiment label ('POSITIVE' or 'NEGATIVE').
"""
result = sentiment_pipeline(text)[0] # Process text through the model
return result["label"] # Extract and return sentiment label
# Keyword Extraction using KeyBERT
kw_model = KeyBERT("distilbert-base-nli-mean-tokens")
def extract_keywords_keybert(text):
"""
Extracts keywords from the given text using KeyBERT.
Args:
text (str): The input text for keyword extraction.
Returns:
list[str]: A list of extracted keywords (title-cased).
"""
keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=3)
return [kw[0].title() for kw in keywords]
# Hindi Speech Generation
def generate_hindi_speech(text):
"""
Converts the given text into Hindi speech.
Args:
text (str): The input text to be translated and converted to speech.
Returns:
io.BytesIO: A buffer containing the generated speech audio.
"""
# Translate text to Hindi
hindi_text = GoogleTranslator(source="auto", target="hi").translate(text)
# Convert translated text to speech
tts = gTTS(hindi_text, lang="hi")
# Store the generated speech in memory
audio_buffer = io.BytesIO()
tts.write_to_fp(audio_buffer)
audio_buffer.seek(0) # Reset buffer position for playback
return audio_buffer