Spaces:

CVMX-jaca-tonos
/

YouTube-Video-Streaming-Spanish-ASR

Build error

App Files Files Community

lucio commited on May 5, 2022

Commit

e4a22f3

•

1 Parent(s): 4e336ff

switch video

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -28,7 +28,15 @@ def load_model(model_path="facebook/wav2vec2-large-robust-ft-swbd-300h"):
     model = AutoModelForCTC.from_pretrained(model_path).to(device)
     return processor, model
-model_path = st.radio("Select a model", ("jonatasgrosman/wav2vec2-xls-r-1b-spanish", "jonatasgrosman/wav2vec2-large-xlsr-53-spanish", "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm", "facebook/wav2vec2-large-xlsr-53-spanish", "glob-asr/xls-r-es-test-lm"))
 processor, model = load_model(model_path)
@@ -58,7 +66,10 @@ def stream_text(url, chunk_duration_ms, pad_duration_ms):
                 # concat the last word (or its part) from the last frame with the current text
                 text = processor.decode(leftover_ids + predicted_ids)
                 # don't return the last word in case it's just partially recognized
-                text, leftover_text = text.rsplit(" ", 1)
                 yield text
             else:
                 yield leftover_text
@@ -71,19 +82,8 @@ def main():
     with st.form(key="inputs_form"):
-        # Our worlds best teachers on subjects of AI, Cognitive, Neuroscience for our Behavioral and Medical Health
-        ytSeanKelly="https://www.youtube.com/watch?v=85n-EEqKMSY"
-        ytSamHarris="https://www.youtube.com/watch?v=4dC_nRYIDZU&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=2"
-        ytJohnAbramson="https://www.youtube.com/watch?v=arrokG3wCdE&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=3"
-        ytElonMusk="https://www.youtube.com/watch?v=DxREm3s1scA&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=4"
-        ytJeffreyShainline="https://www.youtube.com/watch?v=EwueqdgIvq4&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=5"
-        ytJeffHawkins="https://www.youtube.com/watch?v=Z1KwkpTUbkg&list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&index=6"
-        ytSamHarris="https://youtu.be/Ui38ZzTymDY?list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L"
-        ytSamHarris="https://youtu.be/4dC_nRYIDZU?list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&t=7809"
-        ytSamHarris="https://youtu.be/4dC_nRYIDZU?list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&t=7809"
-        ytSamHarris="https://youtu.be/4dC_nRYIDZU?list=PLHgX2IExbFouJoqEr8JMF5MbZSbyC91-L&t=7809"
-        state.youtube_url = st.text_input("YouTube URL", ytSeanKelly)
         state.chunk_duration_ms = st.slider("Audio chunk duration (ms)", 2000, 10000, 3000, 100)
         state.pad_duration_ms = st.slider("Padding duration (ms)", 100, 5000, 1000, 100)
@@ -101,7 +101,7 @@ def main():
         state.chunks_taken = 0
-        state.lines = deque([], maxlen=100)  # limit to the last n lines of subs
     player = st_player(state.youtube_url, **player_options, key="youtube_player")

     model = AutoModelForCTC.from_pretrained(model_path).to(device)
     return processor, model
+model_path = st.radio(
+    "Select a model", (
+        "jonatasgrosman/wav2vec2-xls-r-1b-spanish",
+        "jonatasgrosman/wav2vec2-large-xlsr-53-spanish",
+        "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm",
+        "facebook/wav2vec2-large-xlsr-53-spanish",
+        "glob-asr/xls-r-es-test-lm"
+    )
+)
 processor, model = load_model(model_path)
                 # concat the last word (or its part) from the last frame with the current text
                 text = processor.decode(leftover_ids + predicted_ids)
                 # don't return the last word in case it's just partially recognized
+                if " " in text:
+                    text, leftover_text = text.rsplit(" ", 1)
+                else:
+                    leftover_text = ""
                 yield text
             else:
                 yield leftover_text
     with st.form(key="inputs_form"):
+        initial_url = "https://youtu.be/ghOqTkGzX7I?t=60"
+        state.youtube_url = st.text_input("YouTube URL", initial_url)
         state.chunk_duration_ms = st.slider("Audio chunk duration (ms)", 2000, 10000, 3000, 100)
         state.pad_duration_ms = st.slider("Padding duration (ms)", 100, 5000, 1000, 100)
         state.chunks_taken = 0
+        state.lines = deque([], maxlen=5)  # limit to the last n lines of subs
     player = st_player(state.youtube_url, **player_options, key="youtube_player")