ahmedJaafari commited on
Commit
cebdabc
·
1 Parent(s): ced0328

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -18,7 +18,8 @@ def speech_file_to_array_fn(path, max_seconds=10):
18
  batch = {"file": path}
19
  speech_array, sampling_rate = torchaudio.load(batch["file"])
20
  if sampling_rate != 16000:
21
- transform = torchaudio.transforms.Resample(orig_freq=sampling_rate,new_freq=16000)
 
22
  speech_array = transform(speech_array)
23
  speech_array = speech_array[0]
24
  if max_seconds > 0:
@@ -42,17 +43,15 @@ def inference(audio):
42
  with torch.no_grad():
43
  logits = model(input_values).logits
44
 
45
- #pred_ids = torch.argmax(logits, dim=-1)
46
- h = logits.numpy()[0,:,:]
47
- v = np.pad(h, [0, 2], mode='constant')
48
 
49
- output = processor.decode(v).text
50
-
51
- return output[:-4]
52
 
53
  inputs = gr.inputs.Audio(label="Record Audio", source="microphone", type='file')
54
  outputs = gr.outputs.Textbox(label="Output Text")
55
  title = "Annarabic Speech Recognition System"
56
- description = "Gradio demo for Annarabic ASR. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
57
- examples=[['Aya.mp3'], ['Loubna.mp3']]
58
- gr.Interface(inference, inputs, outputs, title=title, description=description, examples=examples).launch()
 
 
18
  batch = {"file": path}
19
  speech_array, sampling_rate = torchaudio.load(batch["file"])
20
  if sampling_rate != 16000:
21
+ transform = torchaudio.transforms.Resample(orig_freq=sampling_rate,
22
+ new_freq=16000)
23
  speech_array = transform(speech_array)
24
  speech_array = speech_array[0]
25
  if max_seconds > 0:
 
43
  with torch.no_grad():
44
  logits = model(input_values).logits
45
 
46
+ output = processor.decode(logits.numpy()[0]).text
47
+ print(output)
 
48
 
49
+ return output
 
 
50
 
51
  inputs = gr.inputs.Audio(label="Record Audio", source="microphone", type='file')
52
  outputs = gr.outputs.Textbox(label="Output Text")
53
  title = "Annarabic Speech Recognition System"
54
+ description = 'Demo for <b>Annarabic ASR</b>. To use it, simply upload your audio, or click on one of the examples to load them. Only the 10 first seconds of the audio will be transcribed and GPU runtime is not used. For more information, contact Ahmed Jaafari via email: <a href = "mailto: a.jaafari@aui.ma">a.jaafari@aui.ma</a> or phone: <a href = "tel: +212658537105">+212658537105</a>.'
55
+ examples=[['Aya.mp3'], ['Loubna.mp3'], ['Omar.wav'], ['Yassir.wav']]
56
+ article="* The ASR never trained on the given examples."
57
+ gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()