import yt_dlp import os import gradio as gr from transformers import pipeline import whisper import random import time def get_audio(url): try: # Configure yt-dlp options without browser cookies ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], 'outtmpl': 'audio_download.%(ext)s', 'quiet': True, 'no_warnings': True, # Add basic user agent 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36', # Add other options to help avoid restrictions 'extractor_args': {'youtube': { 'player_client': ['android', 'web'], 'skip': ['dash', 'hls'] }}, # Add network options 'socket_timeout': 30, 'retries': 3, } # Add small delay to avoid rate limiting time.sleep(random.uniform(1, 2)) # Download the audio with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) duration = info.get('duration', 0) # Check video duration (optional) if duration > 1800: # 30 minutes raise gr.Error("Video is too long. Please use videos under 30 minutes.") ydl.download([url]) return 'audio_download.mp3' except Exception as e: if 'Sign in to confirm' in str(e): raise gr.Error("This video requires age verification. Please try a different video.") elif 'Private video' in str(e): raise gr.Error("This video is private. Please try a public video.") elif 'Video unavailable' in str(e): raise gr.Error("This video is unavailable. Please check the URL and try again.") else: raise gr.Error(f"Error downloading audio: {str(e)}") # Load models model = whisper.load_model("base") summarizer = pipeline("summarization") def get_text(url): try: # Validate URL if not url.startswith('https://www.youtube.com/') and not url.startswith('https://youtu.be/'): raise gr.Error("Please enter a valid YouTube URL") audio_file = get_audio(url) result = model.transcribe(audio_file) # Cleanup try: os.remove(audio_file) except: pass return result['text'] except Exception as e: return f"Error: {str(e)}" def get_summary(url): try: article = get_text(url) if isinstance(article, str) and article.startswith("Error:"): return article # Handle empty or short text if not article or len(article.split()) < 30: return "Text too short to summarize. Please try a longer video." # Split long text into chunks max_chunk_length = 1000 chunks = [article[i:i+max_chunk_length] for i in range(0, len(article), max_chunk_length)] summaries = [] for chunk in chunks: summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False) summaries.append(summary[0]['summary_text']) return " ".join(summaries) except Exception as e: return f"Error: {str(e)}" # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("