balaramas commited on
Commit
8c29416
1 Parent(s): 6cc98e6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -8
app.py CHANGED
@@ -10,7 +10,19 @@ import sys
10
  import os
11
  import subprocess
12
  from pydub import AudioSegment
13
- from huggingface_hub import snapshot_download
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def install_fairseq():
16
  try:
@@ -45,35 +57,59 @@ def run_my_code(input_text, language):
45
  audio=convert_audio_to_16k_wav(input_text)
46
  hi_wav = audio
47
 
 
48
  data_root=""
49
  model_checkpoint=""
50
  d_r=""
 
51
 
52
  if(language=="Hindi"):
53
  model_checkpoint = "./models/hindi_model.pt"
54
  data_root="./MUSTC_ROOT_hindi/en-hi/"
55
  d_r="MUSTC_ROOT_hindi/"
 
56
  if(language=="French"):
57
  model_checkpoint = "./models/french_model.pt"
58
  data_root="./MUSTC_ROOT_french/en-fr/"
59
  d_r="MUSTC_ROOT_french/"
 
 
 
 
 
 
60
 
61
 
62
 
 
 
 
 
 
 
 
 
 
63
  os.system(f"cp {hi_wav} {data_root}data/tst-COMMON/wav/test.wav")
64
 
65
- print("------Starting data prepration...")
66
  subprocess.run(["python", "prep_mustc_data_hindi_single.py", "--data-root", d_r, "--task", "st", "--vocab-type", "unigram", "--vocab-size", "8000"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
67
 
68
- print("------Performing translation...")
69
 
70
- translation_result = subprocess.run(["fairseq-generate", data_root, "--config-yaml", "config_st.yaml", "--gen-subset", "tst-COMMON_st", "--task", "speech_to_text", "--path", model_checkpoint, "--max-tokens", "50000", "--beam", "5", "--scoring", "sacrebleu"], capture_output=True, text=True)
71
  translation_result_text = translation_result.stdout
72
 
73
  lines = translation_result_text.split("\n")
74
 
 
 
 
 
 
 
75
  output_text=""
76
- print("\n\n------Translation results are:")
77
  for i in lines:
78
  if (i.startswith("D-0")):
79
  print(i.split("\t")[2])
@@ -94,14 +130,14 @@ install_fairseq()
94
  #input_textbox = gr.inputs.Textbox(label="test2.wav")
95
  #input=gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)...")
96
  #audio=convert_audio_to_16k_wav(input)
97
- output_textbox = gr.outputs.Textbox(label="Output Text")
98
 
99
  # Create a Gradio interface
100
  iface = gr.Interface(
101
  fn=run_my_code,
102
- inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)..."), gr.inputs.Radio(["Hindi", "French"], label="Language")],
103
  outputs=output_textbox,
104
- title="English to Hindi Translator")
105
 
106
  # Launch the interface
107
  iface.launch()
 
10
  import os
11
  import subprocess
12
  from pydub import AudioSegment
13
+ import yaml
14
+ import wave
15
+
16
+
17
+
18
+ def get_wav_duration(file_path):
19
+ with wave.open(file_path, 'rb') as wav_file:
20
+ frames = wav_file.getnframes()
21
+ rate = wav_file.getframerate()
22
+ duration = frames / float(rate)
23
+ return duration
24
+
25
+
26
 
27
  def install_fairseq():
28
  try:
 
57
  audio=convert_audio_to_16k_wav(input_text)
58
  hi_wav = audio
59
 
60
+
61
  data_root=""
62
  model_checkpoint=""
63
  d_r=""
64
+ yam=""
65
 
66
  if(language=="Hindi"):
67
  model_checkpoint = "./models/hindi_model.pt"
68
  data_root="./MUSTC_ROOT_hindi/en-hi/"
69
  d_r="MUSTC_ROOT_hindi/"
70
+ yam="./MUSTC_ROOT_hindi/en-hi/data/tst-COMMON/txt/tst-COMMON.yaml"
71
  if(language=="French"):
72
  model_checkpoint = "./models/french_model.pt"
73
  data_root="./MUSTC_ROOT_french/en-fr/"
74
  d_r="MUSTC_ROOT_french/"
75
+ yam="./MUSTC_ROOT_french/en-fr/data/tst-COMMON/txt/tst-COMMON.yaml"
76
+ if(language=="German"):
77
+ model_checkpoint = "./models/german_model.pt"
78
+ data_root="./MUSTC_ROOT_german/en-de/"
79
+ d_r="MUSTC_ROOT_german/"
80
+ yam="./MUSTC_ROOT_german/en-de/data/tst-COMMON/txt/tst-COMMON.yaml"
81
 
82
 
83
 
84
+
85
+
86
+ #code to change the duration of the yaml file accordign to the audio input
87
+ with open(yam, 'r') as yaml_file:
88
+ data = yaml.safe_load(yaml_file)
89
+ data[0]['duration']=get_wav_duration(hi_wav)
90
+ with open(yam, 'w') as yaml_file:
91
+ yaml.dump(data, yaml_file)
92
+
93
  os.system(f"cp {hi_wav} {data_root}data/tst-COMMON/wav/test.wav")
94
 
95
+ print("------Starting data prepration------")
96
  subprocess.run(["python", "prep_mustc_data_hindi_single.py", "--data-root", d_r, "--task", "st", "--vocab-type", "unigram", "--vocab-size", "8000"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
97
 
98
+ print("------Performing translation------")
99
 
100
+ translation_result = subprocess.run(["python", "generate.py", data_root, "--config-yaml", "config_st.yaml", "--gen-subset", "tst-COMMON_st", "--task", "speech_to_text", "--path", model_checkpoint], capture_output=True, text=True)
101
  translation_result_text = translation_result.stdout
102
 
103
  lines = translation_result_text.split("\n")
104
 
105
+
106
+ #just for checking the duration from the yaml file of the current input audio
107
+ with open(yam, 'r') as yaml_file:
108
+ data = yaml.safe_load(yaml_file)
109
+ print(data[0]['duration'], " seconds duration")
110
+
111
  output_text=""
112
+ print("\n\n------Translation results are:\n")
113
  for i in lines:
114
  if (i.startswith("D-0")):
115
  print(i.split("\t")[2])
 
130
  #input_textbox = gr.inputs.Textbox(label="test2.wav")
131
  #input=gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)...")
132
  #audio=convert_audio_to_16k_wav(input)
133
+ output_textbox = gr.outputs.Textbox(label="The Translated Text is:")
134
 
135
  # Create a Gradio interface
136
  iface = gr.Interface(
137
  fn=run_my_code,
138
+ inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in American/British English Accent)..."), gr.inputs.Radio(["Hindi", "French"], label="Language")],
139
  outputs=output_textbox,
140
+ title="English to Hindi/French Translator")
141
 
142
  # Launch the interface
143
  iface.launch()