Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
import spaces
|
2 |
import torch
|
3 |
-
|
4 |
import gradio as gr
|
5 |
import yt_dlp as youtube_dl
|
6 |
from transformers import pipeline
|
7 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
8 |
-
|
9 |
import tempfile
|
10 |
import os
|
11 |
|
@@ -23,7 +21,6 @@ pipe = pipeline(
|
|
23 |
device=device,
|
24 |
)
|
25 |
|
26 |
-
|
27 |
@spaces.GPU
|
28 |
def transcribe(inputs, task):
|
29 |
if inputs is None:
|
@@ -37,10 +34,48 @@ def transcribe(inputs, task):
|
|
37 |
word_timestamps = []
|
38 |
for chunk in timestamps:
|
39 |
for word_info in chunk["words"]:
|
40 |
-
word_timestamps.append(f"{word_info['word']} [{word_info['start']}-{word_info['end']}]")
|
41 |
|
42 |
return "\n".join(word_timestamps)
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
@spaces.GPU
|
46 |
def yt_transcribe(yt_url, task, max_filesize=75.0):
|
@@ -63,12 +98,11 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
|
|
63 |
word_timestamps = []
|
64 |
for chunk in timestamps:
|
65 |
for word_info in chunk["words"]:
|
66 |
-
word_timestamps.append(f"{word_info['word']} [{word_info['start']}-{word_info['end']}]")
|
67 |
|
68 |
return html_embed_str, "\n".join(word_timestamps)
|
69 |
|
70 |
|
71 |
-
|
72 |
demo = gr.Blocks()
|
73 |
|
74 |
mf_transcribe = gr.Interface(
|
@@ -77,7 +111,7 @@ mf_transcribe = gr.Interface(
|
|
77 |
gr.Audio(sources="microphone", type="filepath"),
|
78 |
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
79 |
],
|
80 |
-
outputs=
|
81 |
title="Whisper Large V3: Transcribe Audio",
|
82 |
description=(
|
83 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
@@ -93,7 +127,7 @@ file_transcribe = gr.Interface(
|
|
93 |
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
94 |
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
95 |
],
|
96 |
-
outputs=
|
97 |
title="Whisper Large V3: Transcribe Audio",
|
98 |
description=(
|
99 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
@@ -109,7 +143,7 @@ yt_transcribe = gr.Interface(
|
|
109 |
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
110 |
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
|
111 |
],
|
112 |
-
outputs=["html", "text"
|
113 |
title="Whisper Large V3: Transcribe YouTube",
|
114 |
description=(
|
115 |
"Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
|
|
|
1 |
import spaces
|
2 |
import torch
|
|
|
3 |
import gradio as gr
|
4 |
import yt_dlp as youtube_dl
|
5 |
from transformers import pipeline
|
6 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
|
|
7 |
import tempfile
|
8 |
import os
|
9 |
|
|
|
21 |
device=device,
|
22 |
)
|
23 |
|
|
|
24 |
@spaces.GPU
|
25 |
def transcribe(inputs, task):
|
26 |
if inputs is None:
|
|
|
34 |
word_timestamps = []
|
35 |
for chunk in timestamps:
|
36 |
for word_info in chunk["words"]:
|
37 |
+
word_timestamps.append(f"{word_info['word']} [{word_info['start']:.2f}-{word_info['end']:.2f}]")
|
38 |
|
39 |
return "\n".join(word_timestamps)
|
40 |
|
41 |
+
def _return_yt_html_embed(yt_url):
|
42 |
+
video_id = yt_url.split("?v=")[-1]
|
43 |
+
HTML_str = (
|
44 |
+
f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
|
45 |
+
" </center>"
|
46 |
+
)
|
47 |
+
return HTML_str
|
48 |
+
|
49 |
+
def download_yt_audio(yt_url, filename):
|
50 |
+
info_loader = youtube_dl.YoutubeDL()
|
51 |
+
|
52 |
+
try:
|
53 |
+
info = info_loader.extract_info(yt_url, download=False)
|
54 |
+
except youtube_dl.utils.DownloadError as err:
|
55 |
+
raise gr.Error(str(err))
|
56 |
+
|
57 |
+
file_length = info["duration_string"]
|
58 |
+
file_h_m_s = file_length.split(":")
|
59 |
+
file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
|
60 |
+
|
61 |
+
if len(file_h_m_s) == 1:
|
62 |
+
file_h_m_s.insert(0, 0)
|
63 |
+
if len(file_h_m_s) == 2:
|
64 |
+
file_h_m_s.insert(0, 0)
|
65 |
+
file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
|
66 |
+
|
67 |
+
if file_length_s > YT_LENGTH_LIMIT_S:
|
68 |
+
yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
|
69 |
+
file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
|
70 |
+
raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
|
71 |
+
|
72 |
+
ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
|
73 |
+
|
74 |
+
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
75 |
+
try:
|
76 |
+
ydl.download([yt_url])
|
77 |
+
except youtube_dl.utils.ExtractorError as err:
|
78 |
+
raise gr.Error(str(err))
|
79 |
|
80 |
@spaces.GPU
|
81 |
def yt_transcribe(yt_url, task, max_filesize=75.0):
|
|
|
98 |
word_timestamps = []
|
99 |
for chunk in timestamps:
|
100 |
for word_info in chunk["words"]:
|
101 |
+
word_timestamps.append(f"{word_info['word']} [{word_info['start']:.2f}-{word_info['end']:.2f}]")
|
102 |
|
103 |
return html_embed_str, "\n".join(word_timestamps)
|
104 |
|
105 |
|
|
|
106 |
demo = gr.Blocks()
|
107 |
|
108 |
mf_transcribe = gr.Interface(
|
|
|
111 |
gr.Audio(sources="microphone", type="filepath"),
|
112 |
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
113 |
],
|
114 |
+
outputs="text",
|
115 |
title="Whisper Large V3: Transcribe Audio",
|
116 |
description=(
|
117 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
|
|
127 |
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
128 |
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
129 |
],
|
130 |
+
outputs="text",
|
131 |
title="Whisper Large V3: Transcribe Audio",
|
132 |
description=(
|
133 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
|
|
143 |
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
144 |
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
|
145 |
],
|
146 |
+
outputs=["html", "text"],
|
147 |
title="Whisper Large V3: Transcribe YouTube",
|
148 |
description=(
|
149 |
"Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
|