Spaces:

rajesh1729
/

youtube-video-transcription-with-whisper

Running

youtube-video-transcription-with-whisper

File size: 2,603 Bytes

import yt_dlp
import os
import gradio as gr
from transformers import pipeline
import whisper

def get_audio(url):
    try:
        # Configure yt-dlp options
        ydl_opts = {
            'format': 'bestaudio/best',  # Choose best quality audio
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'outtmpl': 'audio_download.%(ext)s',  # Output template
            'quiet': True,  # Less output
            'no_warnings': True  # No warnings
        }
        
        # Download the audio
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
            
        return 'audio_download.mp3'  # Return the filename
        
    except Exception as e:
        raise gr.Error(f"Error downloading audio: {str(e)}")

# Load models
model = whisper.load_model("base")
summarizer = pipeline("summarization")

def get_text(url):
    try:
        audio_file = get_audio(url)
        result = model.transcribe(audio_file)
        
        # Cleanup
        try:
            os.remove(audio_file)
        except:
            pass
            
        return result['text']
    except Exception as e:
        return f"Error: {str(e)}"

def get_summary(url):
    try:
        article = get_text(url)
        summary = summarizer(article)
        return summary[0]['summary_text']
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>")
    gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary.</center>")
    
    with gr.Tab('Get the transcription of any Youtube video'):
        with gr.Row():
            input_text_1 = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
            output_text_1 = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
        result_button_1 = gr.Button('Get Transcription')
        
    with gr.Tab('Summary of Youtube video'):
        with gr.Row():
            input_text = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
            output_text = gr.Textbox(placeholder='Summary text of the Youtube Video', label='Summary')
        result_button = gr.Button('Get Summary')

    result_button.click(get_summary, inputs=input_text, outputs=output_text)
    result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)

demo.launch(debug=True)