yaya-sy commited on
Commit
a55cb0d
·
1 Parent(s): 06844e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -13
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from transformers import Wav2Vec2ForCTC, AutoProcessor
 
3
  import torch
4
  import librosa
5
  import json
@@ -9,9 +10,10 @@ with open('ISO_codes.json', 'r') as file:
9
 
10
  languages = list(iso_codes.keys())
11
 
12
- model_id = "facebook/mms-1b-all"
13
  processor = AutoProcessor.from_pretrained(model_id)
14
  model = Wav2Vec2ForCTC.from_pretrained(model_id)
 
15
 
16
  def transcribe(audio_file_mic=None, audio_file_upload=None, language="English (eng)"):
17
  if audio_file_mic:
@@ -29,19 +31,10 @@ def transcribe(audio_file_mic=None, audio_file_upload=None, language="English (e
29
  # Keep the same model in memory and simply switch out the language adapters by calling load_adapter() for the model and set_target_lang() for the tokenizer
30
  language_code = iso_codes[language]
31
  processor.tokenizer.set_target_lang(language_code)
32
- model.load_adapter(language_code)
33
 
34
- inputs = processor(speech, sampling_rate=16_000, return_tensors="pt")
35
 
36
- with torch.no_grad():
37
- outputs = model(**inputs).logits
38
-
39
- ids = torch.argmax(outputs, dim=-1)[0]
40
- transcription = processor.decode(ids)
41
- return transcription
42
-
43
- examples = [["kab_1.mp3", None, "Amazigh (kab)"],
44
- ["kab_2.mp3", None, "Amazigh (kab)"]]
45
 
46
  description = '''Automatic Speech Recognition with [MMS](https://ai.facebook.com/blog/multilingual-model-speech-recognition/) (Massively Multilingual Speech) by Meta.
47
  Supports [1162 languages](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html). Read the paper for more details: [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516).'''
@@ -53,7 +46,6 @@ iface = gr.Interface(fn=transcribe,
53
  gr.Dropdown(choices=languages, label="Language", value="English (eng)")
54
  ],
55
  outputs=gr.Textbox(label="Transcription"),
56
- examples=examples,
57
  description=description
58
  )
59
  iface.launch()
 
1
  import gradio as gr
2
  from transformers import Wav2Vec2ForCTC, AutoProcessor
3
+ from optimum.bettertransformer import BetterTransformer
4
  import torch
5
  import librosa
6
  import json
 
10
 
11
  languages = list(iso_codes.keys())
12
 
13
+ model_id = "cawoylel/windanam_mms-1b-tts_v2"
14
  processor = AutoProcessor.from_pretrained(model_id)
15
  model = Wav2Vec2ForCTC.from_pretrained(model_id)
16
+ model = BetterTransformer.transform(model)
17
 
18
  def transcribe(audio_file_mic=None, audio_file_upload=None, language="English (eng)"):
19
  if audio_file_mic:
 
31
  # Keep the same model in memory and simply switch out the language adapters by calling load_adapter() for the model and set_target_lang() for the tokenizer
32
  language_code = iso_codes[language]
33
  processor.tokenizer.set_target_lang(language_code)
34
+ pipe = pipeline("automatic-speech-recognition", model=model)
35
 
36
+ return pipe(audio_file)["text"]
37
 
 
 
 
 
 
 
 
 
 
38
 
39
  description = '''Automatic Speech Recognition with [MMS](https://ai.facebook.com/blog/multilingual-model-speech-recognition/) (Massively Multilingual Speech) by Meta.
40
  Supports [1162 languages](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html). Read the paper for more details: [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516).'''
 
46
  gr.Dropdown(choices=languages, label="Language", value="English (eng)")
47
  ],
48
  outputs=gr.Textbox(label="Transcription"),
 
49
  description=description
50
  )
51
  iface.launch()