Spaces:
Sleeping
Sleeping
File size: 2,030 Bytes
4d281ef 84f933b 4d281ef 84f933b 4d281ef 84f933b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
from transformers import pipeline
import youtube_dl
# Load the 'falcon-7b-instruct' model for summarization
model_name = 'falcon-7b-instruct'
summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
# Function to extract YouTube transcript given a video URL
def extract_youtube_transcript(url):
ydl_opts = {
'writesubtitles': True,
'subtitleslangs': ['en'],
'skip_download': True,
'ignoreerrors': True
}
try:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
if 'subtitles' in info:
for subtitle in info['subtitles']:
if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en':
return subtitle['url']
except Exception as e:
print(f"An error occurred while fetching the transcript: {e}")
return None
# Define the Gradio interface
def summarize_youtube_transcript(url):
transcript_url = extract_youtube_transcript(url)
if transcript_url:
# Fetch the transcript
try:
with youtube_dl.YoutubeDL({}) as ydl:
transcript_info = ydl.extract_info(transcript_url, download=False)
transcript = transcript_info['subtitles']['en'][0]['text']
summary = summarizer(transcript, max_length=500, min_length=50, do_sample=False)
return summary[0]['summary_text']
except Exception as e:
print(f"An error occurred while processing the transcript: {e}")
return "Unable to fetch or process the transcript."
# Create the Gradio interface
iface = gr.Interface(
fn=summarize_youtube_transcript,
inputs=gr.inputs.Textbox(label="YouTube URL"),
outputs=gr.outputs.Textbox(label="Summary"),
title="YouTube Transcript Summarizer",
description="Enter a YouTube URL and get a summary of the transcript.",
theme="huggingface",
)
# Launch the interface
iface.launch(share=True)
|