import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
from youtube_transcript_api import YouTubeTranscriptApi

# Download NLTK data
nltk.download('punkt')

# Load models and tokenizers
summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name)
summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name)

tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")

captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

# Function to summarize text
def summarize_text(text, prefix):
    src_text = prefix + text
    input_ids = summary_tokenizer(src_text, return_tensors="pt")
    generated_tokens = summary_model.generate(**input_ids)
    result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    return result[0]

# Function to fetch YouTube transcript
def fetch_transcript(url):
    video_id = url.split('watch?v=')[-1]
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = ' '.join([entry['text'] for entry in transcript])
        return transcript_text
    except Exception as e:
        return str(e)

# Streamlit app title
st.title("Multi-purpose Machine Learning App: WAVE_AI")

# Create tabs for different functionalities
tab1, tab2, tab3, tab4 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript"])

# Text Summarization Tab
with tab1:
    st.header("Text Summarization")

    input_text = st.text_area("Enter the text to summarize:", height=300)

    if st.button("Generate Summaries"):
        if input_text:
            title1 = summarize_text(input_text, 'summary: ')
            title2 = summarize_text(input_text, 'summary brief: ')
            st.write("### Title 1")
            st.write(title1)
            st.write("### Title 2")
            st.write(title2)
        else:
            st.warning("Please enter some text to summarize.")

# Text Tag Generation Tab
with tab2:
    st.header("Text Tag Generation")
    
    text = st.text_area("Enter the text for tag extraction:", height=200)
    
    if st.button("Generate Tags"):
        if text:
            try:
                inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
                output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
                decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
                tags = list(set(decoded_output.strip().split(", ")))
                st.write("**Generated Tags:**")
                st.write(tags)
            except Exception as e:
                st.error(f"An error occurred: {e}")
        else:
            st.warning("Please enter some text to generate tags.")

# Image Captioning Tab
with tab3:
    st.header("Image Captioning Extractor")
    
    image_url = st.text_input("Enter the URL of the image:")
    
    if image_url:
        try:
            st.image(image_url, caption="Provided Image", use_column_width=True)
            caption = captioner(image_url)
            st.write("**Generated Caption:**")
            st.write(caption[0]['generated_text'])
        except Exception as e:
            st.error(f"An error occurred: {e}")

# YouTube Transcript Tab
with tab4:
    st.header("YouTube Video Transcript Extractor")
    
    youtube_url = st.text_input("Enter YouTube URL:")
    
    if st.button("Get Transcript"):
        if youtube_url:
            transcript = fetch_transcript(youtube_url)
            if "error" not in transcript.lower():
                st.success("Transcript successfully fetched!")
                st.text_area("Transcript", transcript, height=300)
            else:
                st.error(f"An error occurred: {transcript}")
        else:
            st.warning("Please enter a URL.")