rajesh1729 commited on
Commit
d256ad9
1 Parent(s): 1d8e620

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -119
app.py CHANGED
@@ -1,139 +1,46 @@
1
- import yt_dlp
2
- import os
3
- import gradio as gr
4
- from transformers import pipeline
5
  import whisper
6
- import random
7
- import time
8
-
9
- def get_audio(url):
10
- try:
11
- # Configure yt-dlp options without browser cookies
12
- ydl_opts = {
13
- 'format': 'bestaudio/best',
14
- 'postprocessors': [{
15
- 'key': 'FFmpegExtractAudio',
16
- 'preferredcodec': 'mp3',
17
- 'preferredquality': '192',
18
- }],
19
- 'outtmpl': 'audio_download.%(ext)s',
20
- 'quiet': True,
21
- 'no_warnings': True,
22
- # Add basic user agent
23
- 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
24
- # Add other options to help avoid restrictions
25
- 'extractor_args': {'youtube': {
26
- 'player_client': ['android', 'web'],
27
- 'skip': ['dash', 'hls']
28
- }},
29
- # Add network options
30
- 'socket_timeout': 30,
31
- 'retries': 3,
32
- }
33
-
34
- # Add small delay to avoid rate limiting
35
- time.sleep(random.uniform(1, 2))
36
-
37
- # Download the audio
38
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
39
- info = ydl.extract_info(url, download=False)
40
- duration = info.get('duration', 0)
41
-
42
- # Check video duration (optional)
43
- if duration > 1800: # 30 minutes
44
- raise gr.Error("Video is too long. Please use videos under 30 minutes.")
45
-
46
- ydl.download([url])
47
-
48
- return 'audio_download.mp3'
49
-
50
- except Exception as e:
51
- if 'Sign in to confirm' in str(e):
52
- raise gr.Error("This video requires age verification. Please try a different video.")
53
- elif 'Private video' in str(e):
54
- raise gr.Error("This video is private. Please try a public video.")
55
- elif 'Video unavailable' in str(e):
56
- raise gr.Error("This video is unavailable. Please check the URL and try again.")
57
- else:
58
- raise gr.Error(f"Error downloading audio: {str(e)}")
59
 
60
- # Load models
61
  model = whisper.load_model("base")
62
- summarizer = pipeline("summarization")
 
 
 
 
 
 
63
 
64
  def get_text(url):
65
- try:
66
- # Validate URL
67
- if not url.startswith('https://www.youtube.com/') and not url.startswith('https://youtu.be/'):
68
- raise gr.Error("Please enter a valid YouTube URL")
69
-
70
- audio_file = get_audio(url)
71
- result = model.transcribe(audio_file)
72
-
73
- # Cleanup
74
- try:
75
- os.remove(audio_file)
76
- except:
77
- pass
78
-
79
- return result['text']
80
- except Exception as e:
81
- return f"Error: {str(e)}"
82
 
83
  def get_summary(url):
84
- try:
85
- article = get_text(url)
86
- if isinstance(article, str) and article.startswith("Error:"):
87
- return article
88
-
89
- # Handle empty or short text
90
- if not article or len(article.split()) < 30:
91
- return "Text too short to summarize. Please try a longer video."
92
-
93
- # Split long text into chunks
94
- max_chunk_length = 1000
95
- chunks = [article[i:i+max_chunk_length] for i in range(0, len(article), max_chunk_length)]
96
- summaries = []
97
-
98
- for chunk in chunks:
99
- summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
100
- summaries.append(summary[0]['summary_text'])
101
-
102
- return " ".join(summaries)
103
- except Exception as e:
104
- return f"Error: {str(e)}"
105
 
106
- # Create Gradio interface
107
  with gr.Blocks() as demo:
108
- gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>")
109
- gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary. Please use videos under 30 minutes in length.</center>")
110
 
111
  with gr.Tab('Get the transcription of any Youtube video'):
112
  with gr.Row():
113
- input_text_1 = gr.Textbox(
114
- placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
115
- label='URL'
116
- )
117
- output_text_1 = gr.Textbox(
118
- placeholder='Transcription of the video',
119
- label='Transcription'
120
- )
121
  result_button_1 = gr.Button('Get Transcription')
122
 
123
  with gr.Tab('Summary of Youtube video'):
124
  with gr.Row():
125
- input_text = gr.Textbox(
126
- placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
127
- label='URL'
128
- )
129
- output_text = gr.Textbox(
130
- placeholder='Summary text of the Youtube Video',
131
- label='Summary'
132
- )
133
  result_button = gr.Button('Get Summary')
134
 
135
- result_button.click(get_summary, inputs=input_text, outputs=output_text)
136
- result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)
137
 
138
- # Launch with appropriate settings
139
  demo.launch(debug=True)
 
 
 
 
 
1
  import whisper
2
+ from pytubefix import YouTube
3
+ from pytubefix.cli import on_progress
4
+ from transformers import pipeline
5
+ import gradio as gr
6
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
 
8
  model = whisper.load_model("base")
9
+ summarizer = pipeline("summarization")
10
+
11
+ def get_audio(url):
12
+ yt = YouTube(url, on_progress_callback=on_progress)
13
+ audio_stream = yt.streams.get_audio_only()
14
+ out_file = audio_stream.download(mp3=True) # This will directly download as mp3
15
+ return out_file # Returns the path to the mp3 file
16
 
17
  def get_text(url):
18
+ result = model.transcribe(get_audio(url))
19
+ return result['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def get_summary(url):
22
+ article = get_text(url)
23
+ b = summarizer(article)
24
+ b = b[0]['summary_text']
25
+ return b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
 
27
  with gr.Blocks() as demo:
28
+ gr.Markdown("<h1><center>Youtube video transcription with OpenAI's Whisper</center></h1>")
29
+ gr.Markdown("<center>Enter the link of any youtube video to get the transcription of the video and a summary of the video in the form of text.</center>")
30
 
31
  with gr.Tab('Get the transcription of any Youtube video'):
32
  with gr.Row():
33
+ input_text_1 = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
34
+ output_text_1 = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
 
 
 
 
 
 
35
  result_button_1 = gr.Button('Get Transcription')
36
 
37
  with gr.Tab('Summary of Youtube video'):
38
  with gr.Row():
39
+ input_text = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
40
+ output_text = gr.Textbox(placeholder='Summary text of the Youtube Video', label='Summary')
 
 
 
 
 
 
41
  result_button = gr.Button('Get Summary')
42
 
43
+ result_button.click(get_summary, inputs = input_text, outputs = output_text)
44
+ result_button_1.click(get_text, inputs = input_text_1, outputs = output_text_1)
45
 
 
46
  demo.launch(debug=True)