Spaces:
Runtime error
Runtime error
Upload with huggingface_hub
Browse files
README.md
CHANGED
@@ -5,7 +5,7 @@ emoji: 🔥
|
|
5 |
colorFrom: indigo
|
6 |
colorTo: indigo
|
7 |
sdk: gradio
|
8 |
-
sdk_version: 3.
|
9 |
app_file: run.py
|
10 |
pinned: false
|
11 |
---
|
|
|
5 |
colorFrom: indigo
|
6 |
colorTo: indigo
|
7 |
sdk: gradio
|
8 |
+
sdk_version: 3.11.0
|
9 |
app_file: run.py
|
10 |
pinned: false
|
11 |
---
|
run.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: streaming_stt"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio deepspeech==0.9.3"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["from deepspeech import Model\n", "import gradio as gr\n", "import numpy as np\n", "import urllib.request\n", "\n", "model_file_path = \"deepspeech-0.9.3-models.pbmm\"\n", "lm_file_path = \"deepspeech-0.9.3-models.scorer\"\n", "url = \"https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/\"\n", "\n", "urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)\n", "urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)\n", "\n", "beam_width = 100\n", "lm_alpha = 0.93\n", "lm_beta = 1.18\n", "\n", "model = Model(model_file_path)\n", "model.enableExternalScorer(lm_file_path)\n", "model.setScorerAlphaBeta(lm_alpha, lm_beta)\n", "model.setBeamWidth(beam_width)\n", "\n", "\n", "def reformat_freq(sr, y):\n", " if sr not in (\n", " 48000,\n", " 16000,\n", " ): # Deepspeech only supports 16k, (we convert 48k -> 16k)\n", " raise ValueError(\"Unsupported rate\", sr)\n", " if sr == 48000:\n", " y = (\n", " ((y / max(np.max(y), 1)) * 32767)\n", " .reshape((-1, 3))\n", " .mean(axis=1)\n", " .astype(\"int16\")\n", " )\n", " sr = 16000\n", " return sr, y\n", "\n", "\n", "def transcribe(speech, stream):\n", " _, y = reformat_freq(*speech)\n", " if stream is None:\n", " stream = model.createStream()\n", " stream.feedAudioContent(y)\n", " text = stream.intermediateDecode()\n", " return text, stream\n", "\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " [gr.Audio(source=\"microphone\", streaming=True), \"state\"],\n", " [\"text\", \"state\"],\n", " live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|