Spaces:

rajesh1729
/

youtube-video-transcription-with-whisper

Running

App Files Files Community

rajesh1729 commited on Oct 28, 2024

Commit

342ece4

verified ·

1 Parent(s): db69fcf

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -35

app.py CHANGED Viewed

@@ -1,46 +1,78 @@
-import whisper
-from pytube import YouTube
-from transformers import pipeline
-import gradio as gr
 import os
 model = whisper.load_model("base")
 summarizer = pipeline("summarization")
-def get_audio(url):
-  yt = YouTube(url)
-  video = yt.streams.filter(only_audio=True).first()
-  out_file=video.download(output_path=".")
-  base, ext = os.path.splitext(out_file)
-  new_file = base+'.mp3'
-  os.rename(out_file, new_file)
-  a = new_file
-  return a
 def get_text(url):
-  result = model.transcribe(get_audio(url))
-  return result['text']
 def get_summary(url):
-  article = get_text(url)
-  b = summarizer(article)
-  b = b[0]['summary_text']
-  return b
 with gr.Blocks() as demo:
-  gr.Markdown("<h1><center>Youtube video transcription with OpenAI's Whisper</center></h1>")
-  gr.Markdown("<center>Enter the link of any youtube video to get the transcription of the video and a summary of the video in the form of text.</center>")
-  with gr.Tab('Get the transcription of any Youtube video'):
-    with gr.Row():
-      input_text_1 = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
-      output_text_1 = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
-    result_button_1 = gr.Button('Get Transcription')
-  with gr.Tab('Summary of Youtube video'):
-    with gr.Row():
-      input_text = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
-      output_text = gr.Textbox(placeholder='Summary text of the Youtube Video', label='Summary')
-    result_button = gr.Button('Get Summary')
-  result_button.click(get_summary, inputs = input_text, outputs = output_text)
-  result_button_1.click(get_text, inputs = input_text_1, outputs = output_text_1)
 demo.launch(debug=True)

+import yt_dlp
 import os
+import gradio as gr
+from transformers import pipeline
+import whisper
+def get_audio(url):
+    try:
+        # Configure yt-dlp options
+        ydl_opts = {
+            'format': 'bestaudio/best',  # Choose best quality audio
+            'postprocessors': [{
+                'key': 'FFmpegExtractAudio',
+                'preferredcodec': 'mp3',
+                'preferredquality': '192',
+            }],
+            'outtmpl': 'audio_download.%(ext)s',  # Output template
+            'quiet': True,  # Less output
+            'no_warnings': True  # No warnings
+        }
+        # Download the audio
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+        return 'audio_download.mp3'  # Return the filename
+    except Exception as e:
+        raise gr.Error(f"Error downloading audio: {str(e)}")
+# Load models
 model = whisper.load_model("base")
 summarizer = pipeline("summarization")
 def get_text(url):
+    try:
+        audio_file = get_audio(url)
+        result = model.transcribe(audio_file)
+        # Cleanup
+        try:
+            os.remove(audio_file)
+        except:
+            pass
+        return result['text']
+    except Exception as e:
+        return f"Error: {str(e)}"
 def get_summary(url):
+    try:
+        article = get_text(url)
+        summary = summarizer(article)
+        return summary[0]['summary_text']
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>")
+    gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary.</center>")
+    with gr.Tab('Get the transcription of any Youtube video'):
+        with gr.Row():
+            input_text_1 = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
+            output_text_1 = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
+        result_button_1 = gr.Button('Get Transcription')
+    with gr.Tab('Summary of Youtube video'):
+        with gr.Row():
+            input_text = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
+            output_text = gr.Textbox(placeholder='Summary text of the Youtube Video', label='Summary')
+        result_button = gr.Button('Get Summary')
+    result_button.click(get_summary, inputs=input_text, outputs=output_text)
+    result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)
 demo.launch(debug=True)