Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -170,6 +170,7 @@ import yt_dlp as youtube_dl
|
|
170 |
import pytube as pt
|
171 |
from transformers import pipeline
|
172 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
|
|
173 |
|
174 |
import tempfile
|
175 |
import os
|
@@ -240,22 +241,45 @@ def download_yt_audio(yt_url, filename):
|
|
240 |
stream = yt.streams.filter(only_audio=True)[0]
|
241 |
stream.download(filename=filename)
|
242 |
|
243 |
-
@spaces.GPU
|
244 |
-
def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
html_embed_str = _return_yt_html_embed(yt_url)
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
-
|
248 |
-
|
249 |
-
filepath = os.path.join(tmpdirname, "audio.mp3")
|
250 |
-
download_yt_audio(yt_url, filepath)
|
251 |
-
with open(filepath, "rb") as f:
|
252 |
-
inputs = f.read()
|
253 |
|
254 |
-
|
255 |
-
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
256 |
|
257 |
-
|
|
|
258 |
|
|
|
|
|
|
|
259 |
return html_embed_str, text
|
260 |
|
261 |
|
@@ -310,7 +334,7 @@ yt_transcribe = gr.Interface(
|
|
310 |
)
|
311 |
|
312 |
with demo:
|
313 |
-
|
314 |
-
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
|
315 |
|
316 |
demo.queue().launch(ssr_mode=False)
|
|
|
170 |
import pytube as pt
|
171 |
from transformers import pipeline
|
172 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
173 |
+
import pytube
|
174 |
|
175 |
import tempfile
|
176 |
import os
|
|
|
241 |
stream = yt.streams.filter(only_audio=True)[0]
|
242 |
stream.download(filename=filename)
|
243 |
|
244 |
+
# @spaces.GPU
|
245 |
+
# def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
|
246 |
+
# html_embed_str = _return_yt_html_embed(yt_url)
|
247 |
+
|
248 |
+
# with tempfile.TemporaryDirectory() as tmpdirname:
|
249 |
+
# # filepath = os.path.join(tmpdirname, "video.mp4")
|
250 |
+
# filepath = os.path.join(tmpdirname, "audio.mp3")
|
251 |
+
# download_yt_audio(yt_url, filepath)
|
252 |
+
# with open(filepath, "rb") as f:
|
253 |
+
# inputs = f.read()
|
254 |
+
|
255 |
+
# inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
256 |
+
# inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
257 |
+
|
258 |
+
# text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
259 |
+
|
260 |
+
# return html_embed_str, text
|
261 |
+
|
262 |
+
|
263 |
+
def yt_transcribe(yt_url, task="transcribe", progress=gr.Progress(), max_filesize=75.0):
|
264 |
+
progress(0, desc="Loading audio file...")
|
265 |
html_embed_str = _return_yt_html_embed(yt_url)
|
266 |
+
try:
|
267 |
+
yt = pytube.YouTube(yt_url)
|
268 |
+
stream = yt.streams.filter(only_audio=True)[0]
|
269 |
+
except:
|
270 |
+
raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
|
271 |
|
272 |
+
if stream.filesize_mb > max_filesize:
|
273 |
+
raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
|
|
|
|
|
|
|
|
|
274 |
|
275 |
+
stream.download(filename="audio.mp3")
|
|
|
276 |
|
277 |
+
with open("audio.mp3", "rb") as f:
|
278 |
+
inputs = f.read()
|
279 |
|
280 |
+
inputs = ffmpeg_read(inputs, processor.feature_extractor.sampling_rate)
|
281 |
+
inputs = {"array": inputs, "sampling_rate": processor.feature_extractor.sampling_rate}
|
282 |
+
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
283 |
return html_embed_str, text
|
284 |
|
285 |
|
|
|
334 |
)
|
335 |
|
336 |
with demo:
|
337 |
+
gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
|
338 |
+
# gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
|
339 |
|
340 |
demo.queue().launch(ssr_mode=False)
|