Update app.py
Browse files
app.py
CHANGED
@@ -93,7 +93,7 @@ def text_to_text_translation(text, src_lang, tgt_lang):
|
|
93 |
if src_lang == tgt_lang:
|
94 |
return text
|
95 |
text_inputs = processor(text = text, src_lang=src_lang, return_tensors="pt").to(device)
|
96 |
-
output_tokens = text_to_text_model.generate(**text_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
|
97 |
translated_text_from_text = processor.decode(output_tokens.tolist(), skip_special_tokens=True)
|
98 |
|
99 |
return translated_text_from_text
|
@@ -237,14 +237,14 @@ def transcribe(numpy_array):
|
|
237 |
array = torchaudio.functional.resample(torch.tensor(numpy_array[1]).float(), src_sr, tgt_sr)
|
238 |
|
239 |
audio_inputs = processor(audios=array, return_tensors="pt").to(device)
|
240 |
-
text = speech_to_text_model.generate(**audio_inputs, tgt_lang="eng")[0].cpu().numpy().squeeze()
|
241 |
text = processor.decode(text.tolist(), skip_special_tokens=True).strip()
|
242 |
|
243 |
|
244 |
src_lang = detect_language_from_audio(numpy_array)
|
245 |
|
246 |
if src_lang != "eng":
|
247 |
-
original_text = speech_to_text_model.generate(**audio_inputs, tgt_lang=src_lang)[0].cpu().numpy().squeeze()
|
248 |
original_text = processor.decode(original_text.tolist(), skip_special_tokens=True).strip()
|
249 |
else:
|
250 |
original_text = text
|
|
|
93 |
if src_lang == tgt_lang:
|
94 |
return text
|
95 |
text_inputs = processor(text = text, src_lang=src_lang, return_tensors="pt").to(device)
|
96 |
+
output_tokens = text_to_text_model.generate(**text_inputs, tgt_lang=tgt_lang, max_new_tokens=1024)[0].cpu().numpy().squeeze()
|
97 |
translated_text_from_text = processor.decode(output_tokens.tolist(), skip_special_tokens=True)
|
98 |
|
99 |
return translated_text_from_text
|
|
|
237 |
array = torchaudio.functional.resample(torch.tensor(numpy_array[1]).float(), src_sr, tgt_sr)
|
238 |
|
239 |
audio_inputs = processor(audios=array, return_tensors="pt").to(device)
|
240 |
+
text = speech_to_text_model.generate(**audio_inputs, tgt_lang="eng", max_new_tokens=1024)[0].cpu().numpy().squeeze()
|
241 |
text = processor.decode(text.tolist(), skip_special_tokens=True).strip()
|
242 |
|
243 |
|
244 |
src_lang = detect_language_from_audio(numpy_array)
|
245 |
|
246 |
if src_lang != "eng":
|
247 |
+
original_text = speech_to_text_model.generate(**audio_inputs, tgt_lang=src_lang, max_new_tokens=1024)[0].cpu().numpy().squeeze()
|
248 |
original_text = processor.decode(original_text.tolist(), skip_special_tokens=True).strip()
|
249 |
else:
|
250 |
original_text = text
|