import streamlit as st from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM import nltk from youtube_transcript_api import YouTubeTranscriptApi # Download NLTK data nltk.download('punkt') # Load models and tokenizers summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048' summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name) summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name) tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation") tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation") captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") # Function to summarize text def summarize_text(text, prefix): src_text = prefix + text input_ids = summary_tokenizer(src_text, return_tensors="pt") generated_tokens = summary_model.generate(**input_ids) result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) return result[0] # Function to fetch YouTube transcript def fetch_transcript(url): video_id = url.split('watch?v=')[-1] try: transcript = YouTubeTranscriptApi.get_transcript(video_id) transcript_text = ' '.join([entry['text'] for entry in transcript]) return transcript_text except Exception as e: return str(e) # Streamlit app title st.title("Multi-purpose Machine Learning App: WAVE_AI") # Create tabs for different functionalities tab1, tab2, tab3, tab4 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript"]) # Text Summarization Tab with tab1: st.header("Text Summarization") input_text = st.text_area("Enter the text to summarize:", height=300) if st.button("Generate Summaries"): if input_text: title1 = summarize_text(input_text, 'summary: ') title2 = summarize_text(input_text, 'summary brief: ') st.write("### Title 1") st.write(title1) st.write("### Title 2") st.write(title2) else: st.warning("Please enter some text to summarize.") # Text Tag Generation Tab with tab2: st.header("Text Tag Generation") text = st.text_area("Enter the text for tag extraction:", height=200) if st.button("Generate Tags"): if text: try: inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt") output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64) decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0] tags = list(set(decoded_output.strip().split(", "))) st.write("**Generated Tags:**") st.write(tags) except Exception as e: st.error(f"An error occurred: {e}") else: st.warning("Please enter some text to generate tags.") # Image Captioning Tab with tab3: st.header("Image Captioning Extractor") image_url = st.text_input("Enter the URL of the image:") if image_url: try: st.image(image_url, caption="Provided Image", use_column_width=True) caption = captioner(image_url) st.write("**Generated Caption:**") st.write(caption[0]['generated_text']) except Exception as e: st.error(f"An error occurred: {e}") # YouTube Transcript Tab with tab4: st.header("YouTube Video Transcript Extractor") youtube_url = st.text_input("Enter YouTube URL:") if st.button("Get Transcript"): if youtube_url: transcript = fetch_transcript(youtube_url) if "error" not in transcript.lower(): st.success("Transcript successfully fetched!") st.text_area("Transcript", transcript, height=300) else: st.error(f"An error occurred: {transcript}") else: st.warning("Please enter a URL.")