Spaces:

bijayjr
/

SentimentNews

Sleeping

App Files Files Community

SentimentNews / src /utils.py

bijayjr

updates

5884827 5 months ago

raw

history blame contribute delete

3.14 kB

	import requests
	import io
	from bs4 import BeautifulSoup
	from gtts import gTTS
	from deep_translator import GoogleTranslator
	from transformers import pipeline
	from keybert import KeyBERT

	# News Extraction
	def extract_news(topic):
	"""
	Extracts news articles related to the given topic from the Economic Times website.

	Args:
	topic (str): The topic for which news articles need to be extracted.

	Returns:
	list[dict]: A list of dictionaries containing news titles and summaries.
	"""
	url = f"https://economictimes.indiatimes.com/topic/{topic}"
	headers = {"User-Agent": "Mozilla/5.0"}

	try:
	response = requests.get(url, headers=headers)
	response.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx)
	except requests.RequestException as e:
	print(f"Error fetching news: {e}")
	return []

	soup = BeautifulSoup(response.text, "html.parser")

	articles = []
	article_blocks = soup.find_all("div", class_="clr flt topicstry story_list")

	for article in article_blocks:
	title_tag = article.find("a", class_="wrapLines l2")
	summary_tag = article.find("p", class_="wrapLines l3")

	title = title_tag.text.strip() if title_tag else "Title not found"
	summary = summary_tag.text.strip() if summary_tag else "Summary not found"

	articles.append({"title": title, "summary": summary})

	return articles


	# Sentiment Analysis Pipeline
	sentiment_pipeline = pipeline(
	"sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"
	)

	def analyze_sentiment(text):
	"""
	Analyzes the sentiment of the given text using a pre-trained model.

	Args:
	text (str): The input text to analyze.

	Returns:
	str: Sentiment label ('POSITIVE' or 'NEGATIVE').
	"""
	result = sentiment_pipeline(text)[0] # Process text through the model
	return result["label"] # Extract and return sentiment label


	# Keyword Extraction using KeyBERT
	kw_model = KeyBERT("distilbert-base-nli-mean-tokens")

	def extract_keywords_keybert(text):
	"""
	Extracts keywords from the given text using KeyBERT.

	Args:
	text (str): The input text for keyword extraction.

	Returns:
	list[str]: A list of extracted keywords (title-cased).
	"""
	keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=3)
	return [kw[0].title() for kw in keywords]


	# Hindi Speech Generation
	def generate_hindi_speech(text):
	"""
	Converts the given text into Hindi speech.

	Args:
	text (str): The input text to be translated and converted to speech.

	Returns:
	io.BytesIO: A buffer containing the generated speech audio.
	"""
	# Translate text to Hindi
	hindi_text = GoogleTranslator(source="auto", target="hi").translate(text)

	# Convert translated text to speech
	tts = gTTS(hindi_text, lang="hi")

	# Store the generated speech in memory
	audio_buffer = io.BytesIO()
	tts.write_to_fp(audio_buffer)
	audio_buffer.seek(0) # Reset buffer position for playback

	return audio_buffer