ndelworth08 commited on
Commit
d93a410
1 Parent(s): e1f1492

demo first commit

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -0
  2. rev_app.py +96 -0
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ wenet @ git+https://github.com/revdotcom/rev-wenet@feature/NERD-3007
2
+ gradio==3.14.0
rev_app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Horizon Robotics. (authors: Binbin Zhang)
2
+ # 2022 Chengdong Liang ([email protected])
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import gradio as gr
17
+ import torch
18
+ from wenet.cli.model import load_model
19
+
20
+
21
+
22
+ def process_cat_embs(cat_embs):
23
+ device = "cpu"
24
+ cat_embs = torch.tensor(
25
+ [float(c) for c in cat_embs.split(',')]).to(device)
26
+ return cat_embs
27
+
28
+
29
+ def download_rev_models():
30
+ # from huggingface_hub import hf_hub_download
31
+ # import joblib
32
+
33
+ # REPO_ID = "Revai/reginald"
34
+ # FILENAME = "sklearn_model.joblib"
35
+
36
+ # model = joblib.load(
37
+ # hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
38
+ # )
39
+ model_path = "/Users/natalie/NERD-2941/reginald/10.jit.zip"
40
+ units_path = "/Users/natalie/NERD-2941/reginald/tk.units.txt"
41
+ audio_path = "/Users/natalie/NERD-2941/rev-wenet/runtime/web/fdhc0_si1559.wav"
42
+ cat_embs = "1,0"
43
+ device = "cpu"
44
+ cat_embs = process_cat_embs
45
+ model = load_model(model_path, units_path)
46
+ return model
47
+
48
+ model = download_rev_models()
49
+
50
+
51
+ def recognition(audio, style=0):
52
+ if audio is None:
53
+ return "Input Error! Please enter one audio!"
54
+ # NOTE: model supports 16k sample_rate
55
+
56
+ cat_embs = ','.join([str(s) for s in (1-style, style)])
57
+ cat_embs = process_cat_embs(cat_embs)
58
+ ans = model.transcribe(audio, cat_embs = cat_embs)
59
+
60
+ if ans is None:
61
+ return "ERROR! No text output! Please try again!"
62
+ txt = ans['text']
63
+ return txt
64
+
65
+
66
+ # input
67
+ inputs = [
68
+ gr.inputs.Audio(source="microphone", type="filepath", label='Input audio'),
69
+ gr.Slider(0, 1, value=0, label="Style", info="Choose between verbatim and NV"),
70
+ ]
71
+
72
+ output = gr.outputs.Textbox(label="Output Text")
73
+
74
+ text = "Reginald Demo"
75
+
76
+ # description
77
+ description = (
78
+ "This is a speech recognition demo that supports verbatim and non-verbatim transcription. Try recording an audio with disfluencies (ex: \'uh\', \'um\') and testing both transcription styles." # noqa
79
+ )
80
+
81
+ article = (
82
+ "<p style='text-align: center'>"
83
+ "<a href='https://rev.com' target='_blank'>Github: Learn more about Rev</a>" # noqa
84
+ "</p>")
85
+
86
+ interface = gr.Interface(
87
+ fn=recognition,
88
+ inputs=inputs,
89
+ outputs=output,
90
+ title=text,
91
+ description=description,
92
+ article=article,
93
+ theme='huggingface',
94
+ )
95
+
96
+ interface.launch(enable_queue=True)