import gradio as gr from transformers import pipeline import youtube_dl # Load the 'falcon-7b-instruct' model for summarization model_name = 'falcon-7b-instruct' summarizer = pipeline("summarization", model=model_name, tokenizer=model_name) # Function to extract YouTube transcript given a video URL def extract_youtube_transcript(url): ydl_opts = { 'writesubtitles': True, 'subtitleslangs': ['en'], 'skip_download': True, 'ignoreerrors': True } try: with youtube_dl.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) if 'subtitles' in info: for subtitle in info['subtitles']: if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en': return subtitle['url'] except Exception as e: print(f"An error occurred while fetching the transcript: {e}") return None # Define the Gradio interface def summarize_youtube_transcript(url): transcript_url = extract_youtube_transcript(url) if transcript_url: # Fetch the transcript try: with youtube_dl.YoutubeDL({}) as ydl: transcript_info = ydl.extract_info(transcript_url, download=False) transcript = transcript_info['subtitles']['en'][0]['text'] summary = summarizer(transcript, max_length=300, min_length=50, do_sample=False) return summary[0]['summary_text'] except Exception as e: print(f"An error occurred while processing the transcript: {e}") return "Unable to fetch or process the transcript." # Create the Gradio interface iface = gr.Interface( fn=summarize_youtube_transcript, inputs=gr.inputs.Textbox(label="YouTube URL"), outputs=gr.outputs.Textbox(label="Summary"), title="YouTube Transcript Summarizer", description="Enter a YouTube URL and get a summary of the transcript.", theme="huggingface", ) # Launch the interface iface.launch(share=True)