Spaces:

geokanaan
/

arabeasy

Sleeping

geokanaan commited on Aug 15, 2024

Commit

95ce231

verified ·

1 Parent(s): 3581f0e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from transformers import pipeline
 import os
-pipe = pipeline(task="automatic-speech-recognition", model="geokanaan/Whisper_Base_Lebanese_Arabizi")
@@ -15,17 +15,25 @@ def transcribe(audio, actual_transcription):
 HF_TOKEN = os.getenv('WRITE')
 hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "flagged_Audio_Lebanese")
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(sources="microphone", type="filepath"),
-        gr.Textbox(label="Actual Transcription")
-    ],
-    outputs="text",
     title="arabeasy",
     description="Realtime demo for Lebanese Arabizi speech recognition",
     allow_flagging='manual',  # Enable manual flagging
-    flagging_callback=hf_writer
 )
-iface.launch(share=True)

 import os
+transcriber = pipeline(task="automatic-speech-recognition", model="geokanaan/Whisper_Base_Lebanese_Arabizi")
 HF_TOKEN = os.getenv('WRITE')
 hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "flagged_Audio_Lebanese")
+def transcribe(stream, new_chunk):
+    sr, y = new_chunk
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    if stream is not None:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
+demo = gr.Interface(
+    transcribe,
+    ["state", gr.Audio(sources=["microphone"], streaming=True)],
+    ["state", "text"],
+    live=True,
     title="arabeasy",
     description="Realtime demo for Lebanese Arabizi speech recognition",
     allow_flagging='manual',  # Enable manual flagging
 )
+demo.launch()