OpenSound commited on
Commit
41c3bad
·
1 Parent(s): 788a499
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -127,24 +127,27 @@ class WhisperxModel:
127
  @spaces.GPU
128
  def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, ssrspeech_model_name):
129
  global transcribe_model, align_model, ssrspeech_model
 
130
 
131
  if ssrspeech_model_name == "English":
132
  ssrspeech_model_name = "English"
133
  text_tokenizer = TextTokenizer(backend="espeak")
 
134
  elif ssrspeech_model_name == "Mandarin":
135
  ssrspeech_model_name = "Mandarin"
136
  text_tokenizer = TextTokenizer(backend="espeak", language='cmn')
 
137
 
138
  if alignment_model_name is not None:
139
- align_model = WhisperxAlignModel()
140
 
141
  if whisper_model_name is not None:
142
  if whisper_backend_name == "whisper":
143
- transcribe_model = WhisperModel(whisper_model_name)
144
  else:
145
  if align_model is None:
146
  raise gr.Error("Align model required for whisperx backend")
147
- transcribe_model = WhisperxModel(whisper_model_name, align_model)
148
 
149
  ssrspeech_fn = f"{MODELS_PATH}/{ssrspeech_model_name}.pth"
150
  if not os.path.exists(ssrspeech_fn):
 
127
  @spaces.GPU
128
  def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, ssrspeech_model_name):
129
  global transcribe_model, align_model, ssrspeech_model
130
+
131
 
132
  if ssrspeech_model_name == "English":
133
  ssrspeech_model_name = "English"
134
  text_tokenizer = TextTokenizer(backend="espeak")
135
+ language = "en"
136
  elif ssrspeech_model_name == "Mandarin":
137
  ssrspeech_model_name = "Mandarin"
138
  text_tokenizer = TextTokenizer(backend="espeak", language='cmn')
139
+ language = "zh"
140
 
141
  if alignment_model_name is not None:
142
+ align_model = WhisperxAlignModel(language)
143
 
144
  if whisper_model_name is not None:
145
  if whisper_backend_name == "whisper":
146
+ transcribe_model = WhisperModel(whisper_model_name, language)
147
  else:
148
  if align_model is None:
149
  raise gr.Error("Align model required for whisperx backend")
150
+ transcribe_model = WhisperxModel(whisper_model_name, align_model, language)
151
 
152
  ssrspeech_fn = f"{MODELS_PATH}/{ssrspeech_model_name}.pth"
153
  if not os.path.exists(ssrspeech_fn):