razhan commited on
Commit
3a88da4
·
verified ·
1 Parent(s): e8e46c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -13
app.py CHANGED
@@ -170,6 +170,7 @@ import yt_dlp as youtube_dl
170
  import pytube as pt
171
  from transformers import pipeline
172
  from transformers.pipelines.audio_utils import ffmpeg_read
 
173
 
174
  import tempfile
175
  import os
@@ -240,22 +241,45 @@ def download_yt_audio(yt_url, filename):
240
  stream = yt.streams.filter(only_audio=True)[0]
241
  stream.download(filename=filename)
242
 
243
- @spaces.GPU
244
- def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  html_embed_str = _return_yt_html_embed(yt_url)
 
 
 
 
 
246
 
247
- with tempfile.TemporaryDirectory() as tmpdirname:
248
- # filepath = os.path.join(tmpdirname, "video.mp4")
249
- filepath = os.path.join(tmpdirname, "audio.mp3")
250
- download_yt_audio(yt_url, filepath)
251
- with open(filepath, "rb") as f:
252
- inputs = f.read()
253
 
254
- inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
255
- inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
256
 
257
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
 
258
 
 
 
 
259
  return html_embed_str, text
260
 
261
 
@@ -310,7 +334,7 @@ yt_transcribe = gr.Interface(
310
  )
311
 
312
  with demo:
313
- # gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
314
- gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
315
 
316
  demo.queue().launch(ssr_mode=False)
 
170
  import pytube as pt
171
  from transformers import pipeline
172
  from transformers.pipelines.audio_utils import ffmpeg_read
173
+ import pytube
174
 
175
  import tempfile
176
  import os
 
241
  stream = yt.streams.filter(only_audio=True)[0]
242
  stream.download(filename=filename)
243
 
244
+ # @spaces.GPU
245
+ # def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
246
+ # html_embed_str = _return_yt_html_embed(yt_url)
247
+
248
+ # with tempfile.TemporaryDirectory() as tmpdirname:
249
+ # # filepath = os.path.join(tmpdirname, "video.mp4")
250
+ # filepath = os.path.join(tmpdirname, "audio.mp3")
251
+ # download_yt_audio(yt_url, filepath)
252
+ # with open(filepath, "rb") as f:
253
+ # inputs = f.read()
254
+
255
+ # inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
256
+ # inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
257
+
258
+ # text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
259
+
260
+ # return html_embed_str, text
261
+
262
+
263
+ def yt_transcribe(yt_url, task="transcribe", progress=gr.Progress(), max_filesize=75.0):
264
+ progress(0, desc="Loading audio file...")
265
  html_embed_str = _return_yt_html_embed(yt_url)
266
+ try:
267
+ yt = pytube.YouTube(yt_url)
268
+ stream = yt.streams.filter(only_audio=True)[0]
269
+ except:
270
+ raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
271
 
272
+ if stream.filesize_mb > max_filesize:
273
+ raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
 
 
 
 
274
 
275
+ stream.download(filename="audio.mp3")
 
276
 
277
+ with open("audio.mp3", "rb") as f:
278
+ inputs = f.read()
279
 
280
+ inputs = ffmpeg_read(inputs, processor.feature_extractor.sampling_rate)
281
+ inputs = {"array": inputs, "sampling_rate": processor.feature_extractor.sampling_rate}
282
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
283
  return html_embed_str, text
284
 
285
 
 
334
  )
335
 
336
  with demo:
337
+ gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
338
+ # gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
339
 
340
  demo.queue().launch(ssr_mode=False)