Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,11 @@ hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "flagged_Audio_Lebanese")
|
|
12 |
|
13 |
def transcribe(stream, new_chunk):
|
14 |
sr, y = new_chunk
|
|
|
|
|
|
|
|
|
|
|
15 |
y = y.astype(np.float32)
|
16 |
y /= np.max(np.abs(y))
|
17 |
|
@@ -21,6 +26,7 @@ def transcribe(stream, new_chunk):
|
|
21 |
stream = y
|
22 |
return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
|
23 |
|
|
|
24 |
demo = gr.Interface(
|
25 |
transcribe,
|
26 |
["state", gr.Audio(sources=["microphone"], streaming=True)],
|
|
|
12 |
|
13 |
def transcribe(stream, new_chunk):
|
14 |
sr, y = new_chunk
|
15 |
+
|
16 |
+
# Convert to mono if stereo
|
17 |
+
if y.ndim > 1:
|
18 |
+
y = y.mean(axis=1)
|
19 |
+
|
20 |
y = y.astype(np.float32)
|
21 |
y /= np.max(np.abs(y))
|
22 |
|
|
|
26 |
stream = y
|
27 |
return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
|
28 |
|
29 |
+
|
30 |
demo = gr.Interface(
|
31 |
transcribe,
|
32 |
["state", gr.Audio(sources=["microphone"], streaming=True)],
|