saattrupdan commited on
Commit
17cb7d3
·
1 Parent(s): 48609c3

feat: Add demo

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +59 -0
  3. requirements.txt +74 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv/
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Røst ASR demo."""
2
+
3
+ import warnings
4
+
5
+ import gradio as gr
6
+ import numpy as np
7
+ import samplerate
8
+ import torch
9
+ from punctfix import PunctFixer
10
+ from transformers import pipeline
11
+
12
+ warnings.filterwarnings("ignore", category=FutureWarning)
13
+
14
+ TITLE = "Røst ASR Demo"
15
+ DESCRIPTION = """
16
+ This is a demo of the Danish speech recognition model Røst. Speak into the microphone
17
+ and see the text appear on the screen!
18
+ """
19
+
20
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
21
+ transcriber = pipeline(
22
+ task="automatic-speech-recognition",
23
+ model="alexandrainst/roest-315m",
24
+ device=device
25
+ )
26
+ transcription_fixer = PunctFixer(language="da", device=device)
27
+
28
+ def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
29
+ """Transcribe the audio.
30
+
31
+ Args:
32
+ sampling_rate_and_audio:
33
+ A tuple with the sampling rate and the audio.
34
+
35
+ Returns:
36
+ The transcription.
37
+ """
38
+ sampling_rate, audio = sampling_rate_and_audio
39
+ if audio.ndim > 1:
40
+ audio = np.mean(audio, axis=1)
41
+ audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
42
+ transcription = transcriber(inputs=audio)
43
+ if not isinstance(transcription, dict):
44
+ return ""
45
+ cleaned_transcription = transcription_fixer.punctuate(
46
+ text=transcription["text"]
47
+ )
48
+ return cleaned_transcription
49
+
50
+ demo = gr.Interface(
51
+ fn=transcribe_audio,
52
+ inputs=gr.Audio(sources=["microphone", "upload"]),
53
+ outputs="textbox",
54
+ title=TITLE,
55
+ description=DESCRIPTION,
56
+ allow_flagging="never",
57
+ )
58
+
59
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.4.0
4
+ attrs==24.2.0
5
+ certifi==2024.8.30
6
+ charset-normalizer==3.3.2
7
+ click==8.1.7
8
+ contourpy==1.3.0
9
+ cycler==0.12.1
10
+ exceptiongroup==1.2.2
11
+ fastapi==0.115.0
12
+ ffmpy==0.4.0
13
+ filelock==3.16.1
14
+ fonttools==4.53.1
15
+ fsspec==2024.9.0
16
+ gradio==4.44.0
17
+ gradio_client==1.3.0
18
+ h11==0.14.0
19
+ httpcore==1.0.5
20
+ httpx==0.27.2
21
+ huggingface-hub==0.25.0
22
+ hypothesis==6.112.1
23
+ idna==3.10
24
+ importlib_resources==6.4.5
25
+ Jinja2==3.1.4
26
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip#sha256=d23d300d559a45a5e3ede958dbbf2395231119c0b8cd97a1ea43480625894ff4
27
+ kiwisolver==1.4.7
28
+ markdown-it-py==3.0.0
29
+ MarkupSafe==2.1.5
30
+ matplotlib==3.9.2
31
+ mdurl==0.1.2
32
+ mpmath==1.3.0
33
+ networkx==3.3
34
+ numpy==1.26.4
35
+ orjson==3.10.7
36
+ packaging==24.1
37
+ pandas==2.2.2
38
+ pillow==10.4.0
39
+ punctfix==0.11.1
40
+ pyctcdecode==0.5.0
41
+ pydantic==2.9.2
42
+ pydantic_core==2.23.4
43
+ pydub==0.25.1
44
+ Pygments==2.18.0
45
+ pygtrie==2.5.0
46
+ pyparsing==3.1.4
47
+ python-dateutil==2.9.0.post0
48
+ python-multipart==0.0.9
49
+ pytz==2024.2
50
+ PyYAML==6.0.2
51
+ regex==2024.9.11
52
+ requests==2.32.3
53
+ rich==13.8.1
54
+ ruff==0.6.5
55
+ safetensors==0.4.5
56
+ samplerate==0.2.1
57
+ semantic-version==2.10.0
58
+ shellingham==1.5.4
59
+ six==1.16.0
60
+ sniffio==1.3.1
61
+ sortedcontainers==2.4.0
62
+ starlette==0.38.5
63
+ sympy==1.13.2
64
+ tokenizers==0.19.1
65
+ tomlkit==0.12.0
66
+ torch==2.4.1
67
+ tqdm==4.66.5
68
+ transformers==4.44.2
69
+ typer==0.12.5
70
+ typing_extensions==4.12.2
71
+ tzdata==2024.1
72
+ urllib3==2.2.3
73
+ uvicorn==0.30.6
74
+ websockets==12.0