Spaces:

balaramas
/

s2t_translator

Runtime error

App Files Files Community

balaramas commited on Jul 4, 2023

Commit

8c29416

•

1 Parent(s): 6cc98e6

Upload app.py

Browse files

Files changed (1) hide show

app.py +44 -8

app.py CHANGED Viewed

@@ -10,7 +10,19 @@ import sys
 import os
 import subprocess
 from pydub import AudioSegment
-from huggingface_hub import snapshot_download
 def install_fairseq():
     try:
@@ -45,35 +57,59 @@ def run_my_code(input_text, language):
     audio=convert_audio_to_16k_wav(input_text)
     hi_wav = audio
     data_root=""
     model_checkpoint=""
     d_r=""
     if(language=="Hindi"):
         model_checkpoint = "./models/hindi_model.pt"
         data_root="./MUSTC_ROOT_hindi/en-hi/"
         d_r="MUSTC_ROOT_hindi/"
     if(language=="French"):
         model_checkpoint = "./models/french_model.pt"
         data_root="./MUSTC_ROOT_french/en-fr/"
         d_r="MUSTC_ROOT_french/"
     os.system(f"cp {hi_wav} {data_root}data/tst-COMMON/wav/test.wav")
-    print("------Starting data prepration...")
     subprocess.run(["python", "prep_mustc_data_hindi_single.py", "--data-root", d_r, "--task", "st", "--vocab-type", "unigram", "--vocab-size", "8000"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    print("------Performing translation...")
-    translation_result = subprocess.run(["fairseq-generate", data_root, "--config-yaml", "config_st.yaml", "--gen-subset", "tst-COMMON_st", "--task", "speech_to_text", "--path", model_checkpoint, "--max-tokens", "50000", "--beam", "5", "--scoring", "sacrebleu"], capture_output=True, text=True)
     translation_result_text = translation_result.stdout
     lines = translation_result_text.split("\n")
     output_text=""
-    print("\n\n------Translation results are:")
     for i in lines:
         if (i.startswith("D-0")):
             print(i.split("\t")[2])
@@ -94,14 +130,14 @@ install_fairseq()
 #input_textbox = gr.inputs.Textbox(label="test2.wav")
 #input=gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)...")
 #audio=convert_audio_to_16k_wav(input)
-output_textbox = gr.outputs.Textbox(label="Output Text")
 # Create a Gradio interface
 iface = gr.Interface(
         fn=run_my_code,
-        inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)..."), gr.inputs.Radio(["Hindi", "French"], label="Language")],
         outputs=output_textbox,
-        title="English to Hindi Translator")
 # Launch the interface
 iface.launch()

 import os
 import subprocess
 from pydub import AudioSegment
+import yaml
+import wave
+def get_wav_duration(file_path):
+    with wave.open(file_path, 'rb') as wav_file:
+        frames = wav_file.getnframes()
+        rate = wav_file.getframerate()
+        duration = frames / float(rate)
+        return duration
 def install_fairseq():
     try:
     audio=convert_audio_to_16k_wav(input_text)
     hi_wav = audio
     data_root=""
     model_checkpoint=""
     d_r=""
+    yam=""
     if(language=="Hindi"):
         model_checkpoint = "./models/hindi_model.pt"
         data_root="./MUSTC_ROOT_hindi/en-hi/"
         d_r="MUSTC_ROOT_hindi/"
+        yam="./MUSTC_ROOT_hindi/en-hi/data/tst-COMMON/txt/tst-COMMON.yaml"
     if(language=="French"):
         model_checkpoint = "./models/french_model.pt"
         data_root="./MUSTC_ROOT_french/en-fr/"
         d_r="MUSTC_ROOT_french/"
+        yam="./MUSTC_ROOT_french/en-fr/data/tst-COMMON/txt/tst-COMMON.yaml"
+    if(language=="German"):
+        model_checkpoint = "./models/german_model.pt"
+        data_root="./MUSTC_ROOT_german/en-de/"
+        d_r="MUSTC_ROOT_german/"
+        yam="./MUSTC_ROOT_german/en-de/data/tst-COMMON/txt/tst-COMMON.yaml"
+    #code to change the duration of the yaml file accordign to the audio input
+    with open(yam, 'r') as yaml_file:
+        data = yaml.safe_load(yaml_file)
+    data[0]['duration']=get_wav_duration(hi_wav)
+    with open(yam, 'w') as yaml_file:
+        yaml.dump(data, yaml_file)
     os.system(f"cp {hi_wav} {data_root}data/tst-COMMON/wav/test.wav")
+    print("------Starting data prepration------")
     subprocess.run(["python", "prep_mustc_data_hindi_single.py", "--data-root", d_r, "--task", "st", "--vocab-type", "unigram", "--vocab-size", "8000"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    print("------Performing translation------")
+    translation_result = subprocess.run(["python", "generate.py", data_root, "--config-yaml", "config_st.yaml", "--gen-subset", "tst-COMMON_st", "--task", "speech_to_text", "--path", model_checkpoint], capture_output=True, text=True)
     translation_result_text = translation_result.stdout
     lines = translation_result_text.split("\n")
+    #just for checking the duration from the yaml file of the current input audio
+    with open(yam, 'r') as yaml_file:
+        data = yaml.safe_load(yaml_file)
+    print(data[0]['duration'], " seconds duration")
     output_text=""
+    print("\n\n------Translation results are:\n")
     for i in lines:
         if (i.startswith("D-0")):
             print(i.split("\t")[2])
 #input_textbox = gr.inputs.Textbox(label="test2.wav")
 #input=gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)...")
 #audio=convert_audio_to_16k_wav(input)
+output_textbox = gr.outputs.Textbox(label="The Translated Text is:")
 # Create a Gradio interface
 iface = gr.Interface(
         fn=run_my_code,
+        inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in American/British English Accent)..."), gr.inputs.Radio(["Hindi", "French"], label="Language")],
         outputs=output_textbox,
+        title="English to Hindi/French Translator")
 # Launch the interface
 iface.launch()