import requests import io from bs4 import BeautifulSoup from gtts import gTTS from deep_translator import GoogleTranslator from transformers import pipeline from keybert import KeyBERT # News Extraction def extract_news(topic): """ Extracts news articles related to the given topic from the Economic Times website. Args: topic (str): The topic for which news articles need to be extracted. Returns: list[dict]: A list of dictionaries containing news titles and summaries. """ url = f"https://economictimes.indiatimes.com/topic/{topic}" headers = {"User-Agent": "Mozilla/5.0"} try: response = requests.get(url, headers=headers) response.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx) except requests.RequestException as e: print(f"Error fetching news: {e}") return [] soup = BeautifulSoup(response.text, "html.parser") articles = [] article_blocks = soup.find_all("div", class_="clr flt topicstry story_list") for article in article_blocks: title_tag = article.find("a", class_="wrapLines l2") summary_tag = article.find("p", class_="wrapLines l3") title = title_tag.text.strip() if title_tag else "Title not found" summary = summary_tag.text.strip() if summary_tag else "Summary not found" articles.append({"title": title, "summary": summary}) return articles # Sentiment Analysis Pipeline sentiment_pipeline = pipeline( "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english" ) def analyze_sentiment(text): """ Analyzes the sentiment of the given text using a pre-trained model. Args: text (str): The input text to analyze. Returns: str: Sentiment label ('POSITIVE' or 'NEGATIVE'). """ result = sentiment_pipeline(text)[0] # Process text through the model return result["label"] # Extract and return sentiment label # Keyword Extraction using KeyBERT kw_model = KeyBERT("distilbert-base-nli-mean-tokens") def extract_keywords_keybert(text): """ Extracts keywords from the given text using KeyBERT. Args: text (str): The input text for keyword extraction. Returns: list[str]: A list of extracted keywords (title-cased). """ keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=3) return [kw[0].title() for kw in keywords] # Hindi Speech Generation def generate_hindi_speech(text): """ Converts the given text into Hindi speech. Args: text (str): The input text to be translated and converted to speech. Returns: io.BytesIO: A buffer containing the generated speech audio. """ # Translate text to Hindi hindi_text = GoogleTranslator(source="auto", target="hi").translate(text) # Convert translated text to speech tts = gTTS(hindi_text, lang="hi") # Store the generated speech in memory audio_buffer = io.BytesIO() tts.write_to_fp(audio_buffer) audio_buffer.seek(0) # Reset buffer position for playback return audio_buffer