import requests
import io
from bs4 import BeautifulSoup
from gtts import gTTS
from deep_translator import GoogleTranslator
from transformers import pipeline
from keybert import KeyBERT

# News Extraction
def extract_news(topic):
    """
    Extracts news articles related to the given topic from the Economic Times website.

    Args:
        topic (str): The topic for which news articles need to be extracted.

    Returns:
        list[dict]: A list of dictionaries containing news titles and summaries.
    """
    url = f"https://economictimes.indiatimes.com/topic/{topic}"
    headers = {"User-Agent": "Mozilla/5.0"}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
    except requests.RequestException as e:
        print(f"Error fetching news: {e}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")

    articles = []
    article_blocks = soup.find_all("div", class_="clr flt topicstry story_list")  

    for article in article_blocks:
        title_tag = article.find("a", class_="wrapLines l2")
        summary_tag = article.find("p", class_="wrapLines l3")

        title = title_tag.text.strip() if title_tag else "Title not found"
        summary = summary_tag.text.strip() if summary_tag else "Summary not found"

        articles.append({"title": title, "summary": summary})

    return articles


# Sentiment Analysis Pipeline
sentiment_pipeline = pipeline(
    "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"
)

def analyze_sentiment(text):
    """
    Analyzes the sentiment of the given text using a pre-trained model.

    Args:
        text (str): The input text to analyze.

    Returns:
        str: Sentiment label ('POSITIVE' or 'NEGATIVE').
    """
    result = sentiment_pipeline(text)[0]  # Process text through the model
    return result["label"]  # Extract and return sentiment label


# Keyword Extraction using KeyBERT
kw_model = KeyBERT("distilbert-base-nli-mean-tokens")

def extract_keywords_keybert(text):
    """
    Extracts keywords from the given text using KeyBERT.

    Args:
        text (str): The input text for keyword extraction.

    Returns:
        list[str]: A list of extracted keywords (title-cased).
    """
    keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=3)
    return [kw[0].title() for kw in keywords]


# Hindi Speech Generation
def generate_hindi_speech(text):
    """
    Converts the given text into Hindi speech.

    Args:
        text (str): The input text to be translated and converted to speech.

    Returns:
        io.BytesIO: A buffer containing the generated speech audio.
    """
    # Translate text to Hindi
    hindi_text = GoogleTranslator(source="auto", target="hi").translate(text)

    # Convert translated text to speech
    tts = gTTS(hindi_text, lang="hi")

    # Store the generated speech in memory
    audio_buffer = io.BytesIO()
    tts.write_to_fp(audio_buffer)
    audio_buffer.seek(0)  # Reset buffer position for playback

    return audio_buffer