rajesh1729's picture
Update app.py
f702841 verified
raw
history blame
5.05 kB
import yt_dlp
import os
import gradio as gr
from transformers import pipeline
import whisper
import random
import time
def get_audio(url):
try:
# Configure yt-dlp options without browser cookies
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'outtmpl': 'audio_download.%(ext)s',
'quiet': True,
'no_warnings': True,
# Add basic user agent
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
# Add other options to help avoid restrictions
'extractor_args': {'youtube': {
'player_client': ['android', 'web'],
'skip': ['dash', 'hls']
}},
# Add network options
'socket_timeout': 30,
'retries': 3,
}
# Add small delay to avoid rate limiting
time.sleep(random.uniform(1, 2))
# Download the audio
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
duration = info.get('duration', 0)
# Check video duration (optional)
if duration > 1800: # 30 minutes
raise gr.Error("Video is too long. Please use videos under 30 minutes.")
ydl.download([url])
return 'audio_download.mp3'
except Exception as e:
if 'Sign in to confirm' in str(e):
raise gr.Error("This video requires age verification. Please try a different video.")
elif 'Private video' in str(e):
raise gr.Error("This video is private. Please try a public video.")
elif 'Video unavailable' in str(e):
raise gr.Error("This video is unavailable. Please check the URL and try again.")
else:
raise gr.Error(f"Error downloading audio: {str(e)}")
# Load models
model = whisper.load_model("base")
summarizer = pipeline("summarization")
def get_text(url):
try:
# Validate URL
if not url.startswith('https://www.youtube.com/') and not url.startswith('https://youtu.be/'):
raise gr.Error("Please enter a valid YouTube URL")
audio_file = get_audio(url)
result = model.transcribe(audio_file)
# Cleanup
try:
os.remove(audio_file)
except:
pass
return result['text']
except Exception as e:
return f"Error: {str(e)}"
def get_summary(url):
try:
article = get_text(url)
if isinstance(article, str) and article.startswith("Error:"):
return article
# Handle empty or short text
if not article or len(article.split()) < 30:
return "Text too short to summarize. Please try a longer video."
# Split long text into chunks
max_chunk_length = 1000
chunks = [article[i:i+max_chunk_length] for i in range(0, len(article), max_chunk_length)]
summaries = []
for chunk in chunks:
summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
summaries.append(summary[0]['summary_text'])
return " ".join(summaries)
except Exception as e:
return f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>")
gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary. Please use videos under 30 minutes in length.</center>")
with gr.Tab('Get the transcription of any Youtube video'):
with gr.Row():
input_text_1 = gr.Textbox(
placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
label='URL'
)
output_text_1 = gr.Textbox(
placeholder='Transcription of the video',
label='Transcription'
)
result_button_1 = gr.Button('Get Transcription')
with gr.Tab('Summary of Youtube video'):
with gr.Row():
input_text = gr.Textbox(
placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
label='URL'
)
output_text = gr.Textbox(
placeholder='Summary text of the Youtube Video',
label='Summary'
)
result_button = gr.Button('Get Summary')
result_button.click(get_summary, inputs=input_text, outputs=output_text)
result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)
# Launch with appropriate settings
demo.launch(debug=True)