Spaces:

acmc
/

grammASRian

Sleeping

App Files Files Community

aldan.creo commited on 24 days ago

Commit

7e7acc6

•

1 Parent(s): b99bb69

Bugfix

Browse files

Files changed (1) hide show

app.py +40 -32

app.py CHANGED Viewed

@@ -2,18 +2,27 @@ import gradio as gr
 from transformers import pipeline
 import numpy as np
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
-def transcribe(state, words_list, new_chunk):
     print(f"state: {state}")
-    if state is None:
-        state = {}
     stream = state.get("stream", None)
     previous_transcription = state.get("full_transcription", "")
-    previous_counts_of_words = state.get("counts_of_words", {})
     if new_chunk is None:
         gr.Info("You can start transcribing by clicking on the Record button")
@@ -22,12 +31,6 @@ def transcribe(state, words_list, new_chunk):
     sr, y = new_chunk
-    try:
-        words_to_check_for = [word.strip() for word in words_list.split(",")]
-    except:
-        gr.Warning("Please enter a valid list of words to check for")
-        words_to_check_for = []
     # Convert to mono if stereo
     if y.ndim > 1:
         y = y.mean(axis=1)
@@ -48,18 +51,12 @@ def transcribe(state, words_list, new_chunk):
         return state, previous_counts_of_words, previous_transcription
     print(f"new transcription: {new_transcription}")
-    new_transcription_text = new_transcription["text"]
-    full_transcription_text = f"{previous_transcription} {new_transcription_text}"
-    new_transcription_text_lower = new_transcription_text.lower()
-    new_counts_of_words = {
-        word: new_transcription_text_lower.count(word) for word in words_to_check_for
-    }
     new_counts_of_words = {
-        word: new_counts_of_words.get(word, 0) + previous_counts_of_words.get(word, 0)
-        for word in words_to_check_for
     }
     new_state = {
@@ -73,15 +70,26 @@ def transcribe(state, words_list, new_chunk):
     return new_state, new_counts_of_words, full_transcription_text
-demo = gr.Interface(
-    transcribe,
-    [
-        "state",
-        gr.Textbox(label="List of filer words"),
-        gr.Audio(sources=["microphone"], streaming=True),
-    ],
-    ["state", gr.JSON(label="Filler words count"), gr.Text(label="Transcription")],
-    live=True,
-)
-demo.launch()

 from transformers import pipeline
 import numpy as np
+transcriber = pipeline(
+    "automatic-speech-recognition",
+    model="openai/whisper-base.en",
+    return_timestamps=True,
+)
+def transcribe_live(state, words_list, new_chunk):
     print(f"state: {state}")
+    try:
+        words_to_check_for = [word.strip() for word in words_list.split(",")]
+    except:
+        gr.Warning("Please enter a valid list of words to check for")
+        words_to_check_for = []
     stream = state.get("stream", None)
     previous_transcription = state.get("full_transcription", "")
+    previous_counts_of_words = state.get(
+        "counts_of_words", {word: 0 for word in words_to_check_for}
+    )
     if new_chunk is None:
         gr.Info("You can start transcribing by clicking on the Record button")
     sr, y = new_chunk
     # Convert to mono if stereo
     if y.ndim > 1:
         y = y.mean(axis=1)
         return state, previous_counts_of_words, previous_transcription
     print(f"new transcription: {new_transcription}")
+    full_transcription_text = new_transcription["text"]
+    full_transcription_text_lower = full_transcription_text.lower()
     new_counts_of_words = {
+        word: full_transcription_text_lower.count(word) for word in words_to_check_for
     }
     new_state = {
     return new_state, new_counts_of_words, full_transcription_text
+with gr.Blocks() as demo:
+    state = gr.State(
+        value={
+            "stream": None,
+            "full_transcription": "",
+            "counts_of_words": {},
+        }
+    )
+    filler_words = gr.Textbox(label="List of filer words", value="like, so, you know")
+    recording = gr.Audio(streaming=True, label="Recording")
+    word_counts = gr.JSON(label="Filler words count", value={})
+    transcription = gr.Textbox(label="Transcription", value="")
+    recording.stream(
+        transcribe_live,
+        inputs=[state, filler_words, recording],
+        outputs=[state, word_counts, transcription],
+        stream_every=5,
+        time_limit=60,
+    )
+demo.launch(show_error=True)