aliabd HF staff commited on
Commit
53f1a7d
·
1 Parent(s): af6ac14

Upload with huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. run.ipynb +1 -0
README.md CHANGED
@@ -5,7 +5,7 @@ emoji: 🔥
5
  colorFrom: indigo
6
  colorTo: indigo
7
  sdk: gradio
8
- sdk_version: 3.10.1
9
  app_file: run.py
10
  pinned: false
11
  ---
 
5
  colorFrom: indigo
6
  colorTo: indigo
7
  sdk: gradio
8
+ sdk_version: 3.11.0
9
  app_file: run.py
10
  pinned: false
11
  ---
run.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: streaming_stt"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio deepspeech==0.9.3"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["from deepspeech import Model\n", "import gradio as gr\n", "import numpy as np\n", "import urllib.request\n", "\n", "model_file_path = \"deepspeech-0.9.3-models.pbmm\"\n", "lm_file_path = \"deepspeech-0.9.3-models.scorer\"\n", "url = \"https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/\"\n", "\n", "urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)\n", "urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)\n", "\n", "beam_width = 100\n", "lm_alpha = 0.93\n", "lm_beta = 1.18\n", "\n", "model = Model(model_file_path)\n", "model.enableExternalScorer(lm_file_path)\n", "model.setScorerAlphaBeta(lm_alpha, lm_beta)\n", "model.setBeamWidth(beam_width)\n", "\n", "\n", "def reformat_freq(sr, y):\n", " if sr not in (\n", " 48000,\n", " 16000,\n", " ): # Deepspeech only supports 16k, (we convert 48k -> 16k)\n", " raise ValueError(\"Unsupported rate\", sr)\n", " if sr == 48000:\n", " y = (\n", " ((y / max(np.max(y), 1)) * 32767)\n", " .reshape((-1, 3))\n", " .mean(axis=1)\n", " .astype(\"int16\")\n", " )\n", " sr = 16000\n", " return sr, y\n", "\n", "\n", "def transcribe(speech, stream):\n", " _, y = reformat_freq(*speech)\n", " if stream is None:\n", " stream = model.createStream()\n", " stream.feedAudioContent(y)\n", " text = stream.intermediateDecode()\n", " return text, stream\n", "\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " [gr.Audio(source=\"microphone\", streaming=True), \"state\"],\n", " [\"text\", \"state\"],\n", " live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}