Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,19 @@ import sys
|
|
10 |
import os
|
11 |
import subprocess
|
12 |
from pydub import AudioSegment
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
def install_fairseq():
|
16 |
try:
|
@@ -45,35 +57,59 @@ def run_my_code(input_text, language):
|
|
45 |
audio=convert_audio_to_16k_wav(input_text)
|
46 |
hi_wav = audio
|
47 |
|
|
|
48 |
data_root=""
|
49 |
model_checkpoint=""
|
50 |
d_r=""
|
|
|
51 |
|
52 |
if(language=="Hindi"):
|
53 |
model_checkpoint = "./models/hindi_model.pt"
|
54 |
data_root="./MUSTC_ROOT_hindi/en-hi/"
|
55 |
d_r="MUSTC_ROOT_hindi/"
|
|
|
56 |
if(language=="French"):
|
57 |
model_checkpoint = "./models/french_model.pt"
|
58 |
data_root="./MUSTC_ROOT_french/en-fr/"
|
59 |
d_r="MUSTC_ROOT_french/"
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
os.system(f"cp {hi_wav} {data_root}data/tst-COMMON/wav/test.wav")
|
64 |
|
65 |
-
print("------Starting data prepration
|
66 |
subprocess.run(["python", "prep_mustc_data_hindi_single.py", "--data-root", d_r, "--task", "st", "--vocab-type", "unigram", "--vocab-size", "8000"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
67 |
|
68 |
-
print("------Performing translation
|
69 |
|
70 |
-
translation_result = subprocess.run(["
|
71 |
translation_result_text = translation_result.stdout
|
72 |
|
73 |
lines = translation_result_text.split("\n")
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
output_text=""
|
76 |
-
print("\n\n------Translation results are
|
77 |
for i in lines:
|
78 |
if (i.startswith("D-0")):
|
79 |
print(i.split("\t")[2])
|
@@ -94,14 +130,14 @@ install_fairseq()
|
|
94 |
#input_textbox = gr.inputs.Textbox(label="test2.wav")
|
95 |
#input=gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)...")
|
96 |
#audio=convert_audio_to_16k_wav(input)
|
97 |
-
output_textbox = gr.outputs.Textbox(label="
|
98 |
|
99 |
# Create a Gradio interface
|
100 |
iface = gr.Interface(
|
101 |
fn=run_my_code,
|
102 |
-
inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)..."), gr.inputs.Radio(["Hindi", "French"], label="Language")],
|
103 |
outputs=output_textbox,
|
104 |
-
title="English to Hindi Translator")
|
105 |
|
106 |
# Launch the interface
|
107 |
iface.launch()
|
|
|
10 |
import os
|
11 |
import subprocess
|
12 |
from pydub import AudioSegment
|
13 |
+
import yaml
|
14 |
+
import wave
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
def get_wav_duration(file_path):
|
19 |
+
with wave.open(file_path, 'rb') as wav_file:
|
20 |
+
frames = wav_file.getnframes()
|
21 |
+
rate = wav_file.getframerate()
|
22 |
+
duration = frames / float(rate)
|
23 |
+
return duration
|
24 |
+
|
25 |
+
|
26 |
|
27 |
def install_fairseq():
|
28 |
try:
|
|
|
57 |
audio=convert_audio_to_16k_wav(input_text)
|
58 |
hi_wav = audio
|
59 |
|
60 |
+
|
61 |
data_root=""
|
62 |
model_checkpoint=""
|
63 |
d_r=""
|
64 |
+
yam=""
|
65 |
|
66 |
if(language=="Hindi"):
|
67 |
model_checkpoint = "./models/hindi_model.pt"
|
68 |
data_root="./MUSTC_ROOT_hindi/en-hi/"
|
69 |
d_r="MUSTC_ROOT_hindi/"
|
70 |
+
yam="./MUSTC_ROOT_hindi/en-hi/data/tst-COMMON/txt/tst-COMMON.yaml"
|
71 |
if(language=="French"):
|
72 |
model_checkpoint = "./models/french_model.pt"
|
73 |
data_root="./MUSTC_ROOT_french/en-fr/"
|
74 |
d_r="MUSTC_ROOT_french/"
|
75 |
+
yam="./MUSTC_ROOT_french/en-fr/data/tst-COMMON/txt/tst-COMMON.yaml"
|
76 |
+
if(language=="German"):
|
77 |
+
model_checkpoint = "./models/german_model.pt"
|
78 |
+
data_root="./MUSTC_ROOT_german/en-de/"
|
79 |
+
d_r="MUSTC_ROOT_german/"
|
80 |
+
yam="./MUSTC_ROOT_german/en-de/data/tst-COMMON/txt/tst-COMMON.yaml"
|
81 |
|
82 |
|
83 |
|
84 |
+
|
85 |
+
|
86 |
+
#code to change the duration of the yaml file accordign to the audio input
|
87 |
+
with open(yam, 'r') as yaml_file:
|
88 |
+
data = yaml.safe_load(yaml_file)
|
89 |
+
data[0]['duration']=get_wav_duration(hi_wav)
|
90 |
+
with open(yam, 'w') as yaml_file:
|
91 |
+
yaml.dump(data, yaml_file)
|
92 |
+
|
93 |
os.system(f"cp {hi_wav} {data_root}data/tst-COMMON/wav/test.wav")
|
94 |
|
95 |
+
print("------Starting data prepration------")
|
96 |
subprocess.run(["python", "prep_mustc_data_hindi_single.py", "--data-root", d_r, "--task", "st", "--vocab-type", "unigram", "--vocab-size", "8000"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
97 |
|
98 |
+
print("------Performing translation------")
|
99 |
|
100 |
+
translation_result = subprocess.run(["python", "generate.py", data_root, "--config-yaml", "config_st.yaml", "--gen-subset", "tst-COMMON_st", "--task", "speech_to_text", "--path", model_checkpoint], capture_output=True, text=True)
|
101 |
translation_result_text = translation_result.stdout
|
102 |
|
103 |
lines = translation_result_text.split("\n")
|
104 |
|
105 |
+
|
106 |
+
#just for checking the duration from the yaml file of the current input audio
|
107 |
+
with open(yam, 'r') as yaml_file:
|
108 |
+
data = yaml.safe_load(yaml_file)
|
109 |
+
print(data[0]['duration'], " seconds duration")
|
110 |
+
|
111 |
output_text=""
|
112 |
+
print("\n\n------Translation results are:\n")
|
113 |
for i in lines:
|
114 |
if (i.startswith("D-0")):
|
115 |
print(i.split("\t")[2])
|
|
|
130 |
#input_textbox = gr.inputs.Textbox(label="test2.wav")
|
131 |
#input=gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)...")
|
132 |
#audio=convert_audio_to_16k_wav(input)
|
133 |
+
output_textbox = gr.outputs.Textbox(label="The Translated Text is:")
|
134 |
|
135 |
# Create a Gradio interface
|
136 |
iface = gr.Interface(
|
137 |
fn=run_my_code,
|
138 |
+
inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in American/British English Accent)..."), gr.inputs.Radio(["Hindi", "French"], label="Language")],
|
139 |
outputs=output_textbox,
|
140 |
+
title="English to Hindi/French Translator")
|
141 |
|
142 |
# Launch the interface
|
143 |
iface.launch()
|