switch video
Browse files
app.py
CHANGED
@@ -28,7 +28,15 @@ def load_model(model_path="facebook/wav2vec2-large-robust-ft-swbd-300h"):
|
|
28 |
model = AutoModelForCTC.from_pretrained(model_path).to(device)
|
29 |
return processor, model
|
30 |
|
31 |
-
model_path = st.radio(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
processor, model = load_model(model_path)
|
34 |
|
@@ -58,7 +66,10 @@ def stream_text(url, chunk_duration_ms, pad_duration_ms):
|
|
58 |
# concat the last word (or its part) from the last frame with the current text
|
59 |
text = processor.decode(leftover_ids + predicted_ids)
|
60 |
# don't return the last word in case it's just partially recognized
|
61 |
-
|
|
|
|
|
|
|
62 |
yield text
|
63 |
else:
|
64 |
yield leftover_text
|
@@ -71,19 +82,8 @@ def main():
|
|
71 |
|
72 |
with st.form(key="inputs_form"):
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
ytSamHarris="https://www.youtube.com/watch?v=4dC_nRYIDZU&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=2"
|
77 |
-
ytJohnAbramson="https://www.youtube.com/watch?v=arrokG3wCdE&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=3"
|
78 |
-
ytElonMusk="https://www.youtube.com/watch?v=DxREm3s1scA&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=4"
|
79 |
-
ytJeffreyShainline="https://www.youtube.com/watch?v=EwueqdgIvq4&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=5"
|
80 |
-
ytJeffHawkins="https://www.youtube.com/watch?v=Z1KwkpTUbkg&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=6"
|
81 |
-
ytSamHarris="https://youtu.be/Ui38ZzTymDY?list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L"
|
82 |
-
ytSamHarris="https://youtu.be/4dC_nRYIDZU?list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&t=7809"
|
83 |
-
ytSamHarris="https://youtu.be/4dC_nRYIDZU?list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&t=7809"
|
84 |
-
ytSamHarris="https://youtu.be/4dC_nRYIDZU?list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&t=7809"
|
85 |
-
state.youtube_url = st.text_input("YouTube URL", ytSeanKelly)
|
86 |
-
|
87 |
|
88 |
state.chunk_duration_ms = st.slider("Audio chunk duration (ms)", 2000, 10000, 3000, 100)
|
89 |
state.pad_duration_ms = st.slider("Padding duration (ms)", 100, 5000, 1000, 100)
|
@@ -101,7 +101,7 @@ def main():
|
|
101 |
state.chunks_taken = 0
|
102 |
|
103 |
|
104 |
-
state.lines = deque([], maxlen=
|
105 |
|
106 |
|
107 |
player = st_player(state.youtube_url, **player_options, key="youtube_player")
|
|
|
28 |
model = AutoModelForCTC.from_pretrained(model_path).to(device)
|
29 |
return processor, model
|
30 |
|
31 |
+
model_path = st.radio(
|
32 |
+
"Select a model", (
|
33 |
+
"jonatasgrosman/wav2vec2-xls-r-1b-spanish",
|
34 |
+
"jonatasgrosman/wav2vec2-large-xlsr-53-spanish",
|
35 |
+
"patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm",
|
36 |
+
"facebook/wav2vec2-large-xlsr-53-spanish",
|
37 |
+
"glob-asr/xls-r-es-test-lm"
|
38 |
+
)
|
39 |
+
)
|
40 |
|
41 |
processor, model = load_model(model_path)
|
42 |
|
|
|
66 |
# concat the last word (or its part) from the last frame with the current text
|
67 |
text = processor.decode(leftover_ids + predicted_ids)
|
68 |
# don't return the last word in case it's just partially recognized
|
69 |
+
if " " in text:
|
70 |
+
text, leftover_text = text.rsplit(" ", 1)
|
71 |
+
else:
|
72 |
+
leftover_text = ""
|
73 |
yield text
|
74 |
else:
|
75 |
yield leftover_text
|
|
|
82 |
|
83 |
with st.form(key="inputs_form"):
|
84 |
|
85 |
+
initial_url = "https://youtu.be/ghOqTkGzX7I?t=60"
|
86 |
+
state.youtube_url = st.text_input("YouTube URL", initial_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
state.chunk_duration_ms = st.slider("Audio chunk duration (ms)", 2000, 10000, 3000, 100)
|
89 |
state.pad_duration_ms = st.slider("Padding duration (ms)", 100, 5000, 1000, 100)
|
|
|
101 |
state.chunks_taken = 0
|
102 |
|
103 |
|
104 |
+
state.lines = deque([], maxlen=5) # limit to the last n lines of subs
|
105 |
|
106 |
|
107 |
player = st_player(state.youtube_url, **player_options, key="youtube_player")
|