Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -104,78 +104,42 @@ def transcribe_and_chat(audio):
|
|
| 104 |
return response, audio_path
|
| 105 |
|
| 106 |
def create_demo():
|
| 107 |
-
with gr.Blocks(
|
| 108 |
gr.Markdown(
|
| 109 |
"""
|
| 110 |
-
#
|
| 111 |
-
Welcome to your personal voice assistant!
|
| 112 |
-
Simply record your voice, and I'll respond with both text and speech.
|
| 113 |
"""
|
| 114 |
)
|
| 115 |
|
| 116 |
with gr.Row():
|
| 117 |
with gr.Column(scale=1):
|
| 118 |
-
audio_input = gr.Audio(
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
elem_id="audio-input"
|
| 122 |
-
)
|
| 123 |
-
voice_volume = gr.Slider(
|
| 124 |
-
minimum=0,
|
| 125 |
-
maximum=2,
|
| 126 |
-
value=1,
|
| 127 |
-
step=0.1,
|
| 128 |
-
label="π Assistant Voice Volume",
|
| 129 |
-
elem_id="voice-volume"
|
| 130 |
-
)
|
| 131 |
|
| 132 |
with gr.Column(scale=1):
|
| 133 |
-
chat_output = gr.
|
| 134 |
-
|
| 135 |
-
elem_id="chat-output",
|
| 136 |
-
height=400
|
| 137 |
-
)
|
| 138 |
-
audio_output = gr.Audio(
|
| 139 |
-
label="π AI Voice Response",
|
| 140 |
-
autoplay=True,
|
| 141 |
-
elem_id="audio-output"
|
| 142 |
-
)
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
submit_button = gr.Button("π Submit", variant="primary", elem_id="submit-button")
|
| 147 |
|
| 148 |
# Processing the audio input
|
| 149 |
-
def process_audio(audio, volume
|
| 150 |
logging.info(f"Received audio: {audio}")
|
| 151 |
if audio is None:
|
| 152 |
-
return
|
| 153 |
-
|
| 154 |
-
transcribed_text = whisper_speech_to_text(audio)
|
| 155 |
-
if not transcribed_text:
|
| 156 |
-
return history + [("Sorry, I couldn't understand the audio. Please try again.", None)], None
|
| 157 |
-
|
| 158 |
-
response, audio_path = asyncio.run(chat_with_ai(transcribed_text))
|
| 159 |
-
|
| 160 |
# Adjust volume for the response audio
|
| 161 |
adjusted_audio_path = asyncio.run(text_to_speech_stream(response, volume))
|
| 162 |
logging.info(f"Response: {response}, Audio path: {adjusted_audio_path}")
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
submit_button.click(
|
| 168 |
-
process_audio,
|
| 169 |
-
inputs=[audio_input, voice_volume, chat_output],
|
| 170 |
-
outputs=[chat_output, audio_output]
|
| 171 |
-
)
|
| 172 |
-
|
| 173 |
-
clear_button.click(
|
| 174 |
-
lambda: ([], None),
|
| 175 |
-
outputs=[chat_output, audio_output]
|
| 176 |
-
)
|
| 177 |
|
| 178 |
-
# JavaScript to handle autoplay and auto-listen
|
| 179 |
demo.load(None, js="""
|
| 180 |
function() {
|
| 181 |
var recordButton;
|
|
@@ -199,17 +163,24 @@ def create_demo():
|
|
| 199 |
}
|
| 200 |
}
|
| 201 |
|
| 202 |
-
document.querySelector("
|
| 203 |
setTimeout(startListening, 500);
|
| 204 |
});
|
| 205 |
|
| 206 |
function playAssistantAudio() {
|
| 207 |
-
var
|
| 208 |
-
if (
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
| 210 |
}
|
| 211 |
}
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
document.addEventListener('gradioUpdated', function(event) {
|
| 214 |
setTimeout(playAssistantAudio, 100);
|
| 215 |
});
|
|
|
|
| 104 |
return response, audio_path
|
| 105 |
|
| 106 |
def create_demo():
|
| 107 |
+
with gr.Blocks() as demo:
|
| 108 |
gr.Markdown(
|
| 109 |
"""
|
| 110 |
+
# π£οΈ AI Voice Assistant
|
| 111 |
+
Welcome to your personal voice assistant! Simply record your voice, and I will respond with both text and speech. The assistant will automatically start listening after playing its response. Powered by advanced AI models.
|
|
|
|
| 112 |
"""
|
| 113 |
)
|
| 114 |
|
| 115 |
with gr.Row():
|
| 116 |
with gr.Column(scale=1):
|
| 117 |
+
audio_input = gr.Audio(type="filepath", label="π€ Record your voice", elem_id="audio-input")
|
| 118 |
+
clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-button")
|
| 119 |
+
voice_volume = gr.Slider(minimum=0, maximum=2, value=1, step=0.1, label="Voice Volume", elem_id="voice-volume")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
with gr.Column(scale=1):
|
| 122 |
+
chat_output = gr.Textbox(label="π¬ AI Response", elem_id="chat-output", lines=5, interactive=False)
|
| 123 |
+
audio_output = gr.Audio(label="π AI Voice Response", autoplay=True, elem_id="audio-output")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
+
# Add some spacing and a divider
|
| 126 |
+
gr.Markdown("---")
|
|
|
|
| 127 |
|
| 128 |
# Processing the audio input
|
| 129 |
+
def process_audio(audio, volume):
|
| 130 |
logging.info(f"Received audio: {audio}")
|
| 131 |
if audio is None:
|
| 132 |
+
return "No audio detected. Please try recording again.", None
|
| 133 |
+
response, audio_path = transcribe_and_chat(audio)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# Adjust volume for the response audio
|
| 135 |
adjusted_audio_path = asyncio.run(text_to_speech_stream(response, volume))
|
| 136 |
logging.info(f"Response: {response}, Audio path: {adjusted_audio_path}")
|
| 137 |
+
return response, adjusted_audio_path
|
| 138 |
+
|
| 139 |
+
audio_input.change(process_audio, inputs=[audio_input, voice_volume], outputs=[chat_output, audio_output])
|
| 140 |
+
clear_button.click(lambda: (None, None), None, [chat_output, audio_output])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
+
# JavaScript to handle autoplay, automatic submission, and auto-listen
|
| 143 |
demo.load(None, js="""
|
| 144 |
function() {
|
| 145 |
var recordButton;
|
|
|
|
| 163 |
}
|
| 164 |
}
|
| 165 |
|
| 166 |
+
document.querySelector("audio").addEventListener("ended", function() {
|
| 167 |
setTimeout(startListening, 500);
|
| 168 |
});
|
| 169 |
|
| 170 |
function playAssistantAudio() {
|
| 171 |
+
var audioElements = document.querySelectorAll('audio');
|
| 172 |
+
if (audioElements.length > 1) {
|
| 173 |
+
var assistantAudio = audioElements[1];
|
| 174 |
+
if (assistantAudio) {
|
| 175 |
+
assistantAudio.play();
|
| 176 |
+
}
|
| 177 |
}
|
| 178 |
}
|
| 179 |
|
| 180 |
+
document.addEventListener('gradioAudioLoaded', function(event) {
|
| 181 |
+
playAssistantAudio();
|
| 182 |
+
});
|
| 183 |
+
|
| 184 |
document.addEventListener('gradioUpdated', function(event) {
|
| 185 |
setTimeout(playAssistantAudio, 100);
|
| 186 |
});
|