Update app.py
Browse files
app.py
CHANGED
@@ -33,8 +33,8 @@ def create_speaker_embedding(waveform):
|
|
33 |
speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
|
34 |
return speaker_embeddings
|
35 |
|
36 |
-
def prepare_data(temp_text,
|
37 |
-
rate, audio_data =
|
38 |
# new_rate = 16000
|
39 |
# number_of_samples = round(len(audio_data) * float(new_rate) / rate)
|
40 |
# audio_data = sps.resample(audio_data, number_of_samples)
|
@@ -65,9 +65,13 @@ def generate_gpt4_response(user_text, print_output=False):
|
|
65 |
|
66 |
|
67 |
def predict(temp_text, temp_audio, record_audio_prompt, prompt_text):
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
text = generate_gpt4_response(prompt_text)
|
70 |
-
embeddings=prepare_data(temp_text,
|
71 |
inputs = processor(text=text, return_tensors="pt")
|
72 |
spectrogram = model.generate_speech(inputs["input_ids"], embeddings)
|
73 |
|
@@ -87,8 +91,8 @@ with app:
|
|
87 |
|
88 |
temp_text=gr.Text(label="Template Text")
|
89 |
temp_audio=gr.Audio(label="Template Speech", type="numpy")
|
|
|
90 |
prompt_text=gr.Text(label="Input Text")
|
91 |
-
record_audio_prompt = gr.Audio(label='recorded audio prompt', source='microphone', interactive=True)
|
92 |
with gr.Column():
|
93 |
text = gr.Textbox(label="Message")
|
94 |
speech=gr.Audio(label="Generated Speech", type="numpy")
|
|
|
33 |
speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
|
34 |
return speaker_embeddings
|
35 |
|
36 |
+
def prepare_data(temp_text, audio_prompt):
|
37 |
+
rate, audio_data = audio_prompt
|
38 |
# new_rate = 16000
|
39 |
# number_of_samples = round(len(audio_data) * float(new_rate) / rate)
|
40 |
# audio_data = sps.resample(audio_data, number_of_samples)
|
|
|
65 |
|
66 |
|
67 |
def predict(temp_text, temp_audio, record_audio_prompt, prompt_text):
|
68 |
+
if temp_audio is not None :
|
69 |
+
audio_prompt = temp_audio
|
70 |
+
else:
|
71 |
+
audio_prompt = record_audio_prompt
|
72 |
+
|
73 |
text = generate_gpt4_response(prompt_text)
|
74 |
+
embeddings=prepare_data(temp_text, audio_prompt)
|
75 |
inputs = processor(text=text, return_tensors="pt")
|
76 |
spectrogram = model.generate_speech(inputs["input_ids"], embeddings)
|
77 |
|
|
|
91 |
|
92 |
temp_text=gr.Text(label="Template Text")
|
93 |
temp_audio=gr.Audio(label="Template Speech", type="numpy")
|
94 |
+
record_audio_prompt = gr.Audio(label='recorded audio prompt', source='microphone', type="numpy")
|
95 |
prompt_text=gr.Text(label="Input Text")
|
|
|
96 |
with gr.Column():
|
97 |
text = gr.Textbox(label="Message")
|
98 |
speech=gr.Audio(label="Generated Speech", type="numpy")
|