Prasada commited on
Commit
450c254
β€’
1 Parent(s): 43c85f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -33,8 +33,8 @@ def create_speaker_embedding(waveform):
33
  speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
34
  return speaker_embeddings
35
 
36
- def prepare_data(temp_text, temp_audio):
37
- rate, audio_data = temp_audio
38
  # new_rate = 16000
39
  # number_of_samples = round(len(audio_data) * float(new_rate) / rate)
40
  # audio_data = sps.resample(audio_data, number_of_samples)
@@ -65,9 +65,13 @@ def generate_gpt4_response(user_text, print_output=False):
65
 
66
 
67
  def predict(temp_text, temp_audio, record_audio_prompt, prompt_text):
68
- audio_prompt = audio_prompt if temp_audio is not None else record_audio_prompt
 
 
 
 
69
  text = generate_gpt4_response(prompt_text)
70
- embeddings=prepare_data(temp_text, temp_audio)
71
  inputs = processor(text=text, return_tensors="pt")
72
  spectrogram = model.generate_speech(inputs["input_ids"], embeddings)
73
 
@@ -87,8 +91,8 @@ with app:
87
 
88
  temp_text=gr.Text(label="Template Text")
89
  temp_audio=gr.Audio(label="Template Speech", type="numpy")
 
90
  prompt_text=gr.Text(label="Input Text")
91
- record_audio_prompt = gr.Audio(label='recorded audio prompt', source='microphone', interactive=True)
92
  with gr.Column():
93
  text = gr.Textbox(label="Message")
94
  speech=gr.Audio(label="Generated Speech", type="numpy")
 
33
  speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
34
  return speaker_embeddings
35
 
36
+ def prepare_data(temp_text, audio_prompt):
37
+ rate, audio_data = audio_prompt
38
  # new_rate = 16000
39
  # number_of_samples = round(len(audio_data) * float(new_rate) / rate)
40
  # audio_data = sps.resample(audio_data, number_of_samples)
 
65
 
66
 
67
  def predict(temp_text, temp_audio, record_audio_prompt, prompt_text):
68
+ if temp_audio is not None :
69
+ audio_prompt = temp_audio
70
+ else:
71
+ audio_prompt = record_audio_prompt
72
+
73
  text = generate_gpt4_response(prompt_text)
74
+ embeddings=prepare_data(temp_text, audio_prompt)
75
  inputs = processor(text=text, return_tensors="pt")
76
  spectrogram = model.generate_speech(inputs["input_ids"], embeddings)
77
 
 
91
 
92
  temp_text=gr.Text(label="Template Text")
93
  temp_audio=gr.Audio(label="Template Speech", type="numpy")
94
+ record_audio_prompt = gr.Audio(label='recorded audio prompt', source='microphone', type="numpy")
95
  prompt_text=gr.Text(label="Input Text")
 
96
  with gr.Column():
97
  text = gr.Textbox(label="Message")
98
  speech=gr.Audio(label="Generated Speech", type="numpy")