File size: 2,030 Bytes
4d281ef
84f933b
 
4d281ef
84f933b
 
 
4d281ef
84f933b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
from transformers import pipeline
import youtube_dl

# Load the 'falcon-7b-instruct' model for summarization
model_name = 'falcon-7b-instruct'
summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)

# Function to extract YouTube transcript given a video URL
def extract_youtube_transcript(url):
    ydl_opts = {
        'writesubtitles': True,
        'subtitleslangs': ['en'],
        'skip_download': True,
        'ignoreerrors': True
    }
    try:
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
            if 'subtitles' in info:
                for subtitle in info['subtitles']:
                    if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en':
                        return subtitle['url']
    except Exception as e:
        print(f"An error occurred while fetching the transcript: {e}")
    return None

# Define the Gradio interface
def summarize_youtube_transcript(url):
    transcript_url = extract_youtube_transcript(url)
    if transcript_url:
        # Fetch the transcript
        try:
            with youtube_dl.YoutubeDL({}) as ydl:
                transcript_info = ydl.extract_info(transcript_url, download=False)
                transcript = transcript_info['subtitles']['en'][0]['text']
                summary = summarizer(transcript, max_length=500, min_length=50, do_sample=False)
                return summary[0]['summary_text']
        except Exception as e:
            print(f"An error occurred while processing the transcript: {e}")
    return "Unable to fetch or process the transcript."

# Create the Gradio interface
iface = gr.Interface(
    fn=summarize_youtube_transcript,
    inputs=gr.inputs.Textbox(label="YouTube URL"),
    outputs=gr.outputs.Textbox(label="Summary"),
    title="YouTube Transcript Summarizer",
    description="Enter a YouTube URL and get a summary of the transcript.",
    theme="huggingface",
)

# Launch the interface
iface.launch(share=True)