|
import yt_dlp |
|
import os |
|
import gradio as gr |
|
from transformers import pipeline |
|
import whisper |
|
import random |
|
import time |
|
|
|
def get_audio(url): |
|
try: |
|
|
|
ydl_opts = { |
|
'format': 'bestaudio/best', |
|
'postprocessors': [{ |
|
'key': 'FFmpegExtractAudio', |
|
'preferredcodec': 'mp3', |
|
'preferredquality': '192', |
|
}], |
|
'outtmpl': 'audio_download.%(ext)s', |
|
'quiet': True, |
|
'no_warnings': True, |
|
|
|
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36', |
|
|
|
'extractor_args': {'youtube': { |
|
'player_client': ['android', 'web'], |
|
'skip': ['dash', 'hls'] |
|
}}, |
|
|
|
'socket_timeout': 30, |
|
'retries': 3, |
|
} |
|
|
|
|
|
time.sleep(random.uniform(1, 2)) |
|
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info = ydl.extract_info(url, download=False) |
|
duration = info.get('duration', 0) |
|
|
|
|
|
if duration > 1800: |
|
raise gr.Error("Video is too long. Please use videos under 30 minutes.") |
|
|
|
ydl.download([url]) |
|
|
|
return 'audio_download.mp3' |
|
|
|
except Exception as e: |
|
if 'Sign in to confirm' in str(e): |
|
raise gr.Error("This video requires age verification. Please try a different video.") |
|
elif 'Private video' in str(e): |
|
raise gr.Error("This video is private. Please try a public video.") |
|
elif 'Video unavailable' in str(e): |
|
raise gr.Error("This video is unavailable. Please check the URL and try again.") |
|
else: |
|
raise gr.Error(f"Error downloading audio: {str(e)}") |
|
|
|
|
|
model = whisper.load_model("base") |
|
summarizer = pipeline("summarization") |
|
|
|
def get_text(url): |
|
try: |
|
|
|
if not url.startswith('https://www.youtube.com/') and not url.startswith('https://youtu.be/'): |
|
raise gr.Error("Please enter a valid YouTube URL") |
|
|
|
audio_file = get_audio(url) |
|
result = model.transcribe(audio_file) |
|
|
|
|
|
try: |
|
os.remove(audio_file) |
|
except: |
|
pass |
|
|
|
return result['text'] |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
def get_summary(url): |
|
try: |
|
article = get_text(url) |
|
if isinstance(article, str) and article.startswith("Error:"): |
|
return article |
|
|
|
|
|
if not article or len(article.split()) < 30: |
|
return "Text too short to summarize. Please try a longer video." |
|
|
|
|
|
max_chunk_length = 1000 |
|
chunks = [article[i:i+max_chunk_length] for i in range(0, len(article), max_chunk_length)] |
|
summaries = [] |
|
|
|
for chunk in chunks: |
|
summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False) |
|
summaries.append(summary[0]['summary_text']) |
|
|
|
return " ".join(summaries) |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>") |
|
gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary. Please use videos under 30 minutes in length.</center>") |
|
|
|
with gr.Tab('Get the transcription of any Youtube video'): |
|
with gr.Row(): |
|
input_text_1 = gr.Textbox( |
|
placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)', |
|
label='URL' |
|
) |
|
output_text_1 = gr.Textbox( |
|
placeholder='Transcription of the video', |
|
label='Transcription' |
|
) |
|
result_button_1 = gr.Button('Get Transcription') |
|
|
|
with gr.Tab('Summary of Youtube video'): |
|
with gr.Row(): |
|
input_text = gr.Textbox( |
|
placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)', |
|
label='URL' |
|
) |
|
output_text = gr.Textbox( |
|
placeholder='Summary text of the Youtube Video', |
|
label='Summary' |
|
) |
|
result_button = gr.Button('Get Summary') |
|
|
|
result_button.click(get_summary, inputs=input_text, outputs=output_text) |
|
result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1) |
|
|
|
|
|
demo.launch(debug=True) |