OpenSound commited on
Commit
e33f8aa
·
verified ·
1 Parent(s): 8abe49d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -177,7 +177,7 @@ def get_transcribe_state(segments):
177
  @spaces.GPU
178
  def transcribe_en(audio_path):
179
  language = "en"
180
- transcribe_model_name = "base.en"
181
  transcribe_model = load_model(transcribe_model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None}, language=language)
182
  segments = transcribe_model.transcribe(audio_path, batch_size=8)["segments"]
183
  for segment in segments:
@@ -194,7 +194,7 @@ def transcribe_en(audio_path):
194
  @spaces.GPU
195
  def transcribe_zh(audio_path):
196
  language = "zh"
197
- transcribe_model_name = "base"
198
  transcribe_model = load_model(transcribe_model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None}, language=language)
199
  segments = transcribe_model.transcribe(audio_path, batch_size=8)["segments"]
200
  for segment in segments:
@@ -466,7 +466,7 @@ def run_edit_zh(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
466
  target_transcript = replace_numbers_with_words(transcript).replace(" ", " ").replace(" ", " ").replace("\n", " ")
467
  orig_transcript = replace_numbers_with_words(original_transcript).replace(" ", " ").replace(" ", " ").replace("\n", " ")
468
 
469
- [orig_transcript, segments, _] = transcribe_zh(audio_path)
470
 
471
  converter = opencc.OpenCC('t2s')
472
  orig_transcript = converter.convert(orig_transcript)
@@ -564,7 +564,7 @@ def run_tts_zh(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
564
  target_transcript = replace_numbers_with_words(transcript).replace(" ", " ").replace(" ", " ").replace("\n", " ")
565
  orig_transcript = replace_numbers_with_words(original_transcript).replace(" ", " ").replace(" ", " ").replace("\n", " ")
566
 
567
- [orig_transcript, segments, _] = transcribe_zh(audio_path)
568
 
569
  converter = opencc.OpenCC('t2s')
570
  orig_transcript = converter.convert(orig_transcript)
 
177
  @spaces.GPU
178
  def transcribe_en(audio_path):
179
  language = "en"
180
+ transcribe_model_name = "medium.en"
181
  transcribe_model = load_model(transcribe_model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None}, language=language)
182
  segments = transcribe_model.transcribe(audio_path, batch_size=8)["segments"]
183
  for segment in segments:
 
194
  @spaces.GPU
195
  def transcribe_zh(audio_path):
196
  language = "zh"
197
+ transcribe_model_name = "medium"
198
  transcribe_model = load_model(transcribe_model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None}, language=language)
199
  segments = transcribe_model.transcribe(audio_path, batch_size=8)["segments"]
200
  for segment in segments:
 
466
  target_transcript = replace_numbers_with_words(transcript).replace(" ", " ").replace(" ", " ").replace("\n", " ")
467
  orig_transcript = replace_numbers_with_words(original_transcript).replace(" ", " ").replace(" ", " ").replace("\n", " ")
468
 
469
+ [orig_transcript, segments, _, _] = transcribe_zh(audio_path)
470
 
471
  converter = opencc.OpenCC('t2s')
472
  orig_transcript = converter.convert(orig_transcript)
 
564
  target_transcript = replace_numbers_with_words(transcript).replace(" ", " ").replace(" ", " ").replace("\n", " ")
565
  orig_transcript = replace_numbers_with_words(original_transcript).replace(" ", " ").replace(" ", " ").replace("\n", " ")
566
 
567
+ [orig_transcript, segments, _, _] = transcribe_zh(audio_path)
568
 
569
  converter = opencc.OpenCC('t2s')
570
  orig_transcript = converter.convert(orig_transcript)