Spaces:
Runtime error
Runtime error
File size: 9,004 Bytes
1e40d63 f90f3f5 1ec0028 e2a9b8f f90f3f5 e2a9b8f f90f3f5 1e40d63 5f3740d 1ebd0cd 5f3740d f90f3f5 5f3740d 1ebd0cd 5f3740d c518990 1e40d63 0667117 d588242 1ebd0cd 5f3740d f90f3f5 1e40d63 5f3740d f90f3f5 5f3740d f90f3f5 1ebd0cd f90f3f5 e2a9b8f f90f3f5 1ebd0cd f90f3f5 1ebd0cd f90f3f5 1ebd0cd 5f3740d 1ebd0cd 7662900 5f3740d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import gradio as gr
from transformers import pipeline
from helpers import load_model_file, load_wav_16k_mono_librosa, initialize_text_to_speech_model, load_label_mapping, predict_yamnet, classify, classify_realtime
from helpers import interface, interface_realtime, updateHistory, clearHistory, clear, format_dictionary, format_json
from helpers import generate_audio, TTS, TTS_ASR, TTS_chatbot, transcribe_speech, transcribe_speech_realtime, transcribe_realtime, translate_enpt
from helpers import chatbot_response, add_text
history = ""
last_answer = ""
examples_audio_classification = [
"content/talking-people.mp3",
"content/miaow_16k.wav",
"content/birds-in-forest-loop.wav",
"content/drumming-jungle-music.wav",
"content/driving-in-the-rain.wav",
"content/city-alert-siren.wav",
"content/small-group-applause.wav",
"content/angry-male-crowd-ambience.wav",
"content/slow-typing-on-a-keyboard.wav",
"content/emergency-car-arrival.wav"
]
examples_speech_recognition_en = [
"content/speech1-en.wav",
"content/speech2-en.wav",
"content/speech1-ptbr.wav",
"content/speech2-ptbr.wav",
"content/speech3-ptbr.wav"
]
examples_speech_recognition_ptbr = [
"content/speech1-ptbr.wav",
"content/speech2-ptbr.wav",
"content/speech3-ptbr.wav",
]
examples_chatbot_en = [
['How does SocialEar assist people with hearing disabilities?'],
['Give me suggestions on how to use SocialEar'],
['How does SocialEar work?'],
['Are SocialEar results accurate?'],
['What accessibility features does SocialEar offer?'],
['Does SocialEar collect personal data?'],
['Can I use SocialEar to identify songs and artists from recorded audio?'],
]
examples_chatbot_ptbr = [
['Como o SocialEar auxilia pessoas com deficiência auditiva?'],
['Dê-me sugestões sobre como usar o SocialEar'],
['Como funciona o SocialEar?'],
['Os resultados do SocialEar são precisos?'],
['Quais recursos de acessibilidade o SocialEar oferece?'],
['O SocialEar coleta dados pessoais?'],
['Posso usar o SocialEar para identificar músicas e artistas de áudio gravado?'],
]
def to_audioClassification():
return {
audio_classification: gr.Row(visible=True),
realtime_classification: gr.Row(visible=False),
speech_recognition: gr.Row(visible=False),
chatbot_qa: gr.Row(visible=False),
}
def to_realtimeAudioClassification():
return {
audio_classification: gr.Row(visible=False),
realtime_classification: gr.Row(visible=True),
speech_recognition: gr.Row(visible=False),
chatbot_qa: gr.Row(visible=False),
}
def to_speechRecognition():
return {
audio_classification: gr.Row(visible=False),
realtime_classification: gr.Row(visible=False),
speech_recognition: gr.Row(visible=True),
chatbot_qa: gr.Row(visible=False),
}
def to_chatbot():
return {
audio_classification: gr.Row(visible=False),
realtime_classification: gr.Row(visible=False),
speech_recognition: gr.Row(visible=False),
chatbot_qa: gr.Row(visible=True),
}
with gr.Blocks() as demo:
with gr.Accordion("Language Output", open=False):
language = gr.Radio(["en-us", "pt-br"], label="Language", info="Choose the language to display the classification result and audio", value='en-us', interactive=True)
with gr.Row():
btn0 = gr.Button("Audio Classification", scale=1, icon='content/Audio Classification.png', size='lg')
btn1 = gr.Button("Realtime Audio Classification", scale=1, icon='content/Realtime Audio Classification.png', size='lg')
btn2 = gr.Button("Speech Recognition", scale=1, icon='content/Speech Recognition.png', size='lg')
btn3 = gr.Button("Help", scale=1, icon='content/Chatbot.png', size='lg')
with gr.Row(visible=False) as audio_classification:
with gr.Column(min_width=700):
with gr.Accordion("Record an Audio", open=True):
inputRecord = gr.Audio(label="Audio Input", source="microphone", type="filepath")
with gr.Accordion("Upload a file", open=False):
inputUpload = gr.Audio(label="Audio Input", source="upload", type="filepath")
clearBtn = gr.ClearButton([inputRecord, inputUpload])
with gr.Column(min_width=700):
output = gr.Label(label="Audio Classification")
btn = gr.Button(value="Generate Audio")
audioOutput = gr.Audio(label="Audio Output", interactive=False)
inputRecord.stop_recording(interface, [inputRecord, language], [output])
inputUpload.upload(interface, [inputUpload, language], [output])
btn.click(fn=TTS, inputs=[output, language], outputs=audioOutput)
examples = gr.Examples(fn=interface, examples=examples_audio_classification, inputs=[inputRecord], outputs=[output], run_on_click=True)
with gr.Row(visible=False) as realtime_classification:
with gr.Column(min_width=700):
input = gr.Audio(label="Audio Input", source="microphone", type="filepath",streaming=True, every=10)
historyOutput = gr.Textbox(label="History", interactive=False)
# historyOutput = gr.Label(label="History")
with gr.Column(min_width=700):
output = gr.Label(label="Audio Classification")
input.change(interface_realtime, [input, language], output)
input.change(updateHistory, None, historyOutput)
input.start_recording(clearHistory, None, historyOutput)
with gr.Row(visible=False) as speech_recognition:
with gr.Column(min_width=700):
with gr.Accordion("Record an Audio", open=True):
inputRecord = gr.Audio(label="Audio Input", source="microphone", type="filepath")
with gr.Accordion("Upload a file", open=False):
inputUpload = gr.Audio(label="Audio Input", source="upload", type="filepath")
clearBtn = gr.ClearButton([inputRecord])
with gr.Column(min_width=700):
output = gr.Label(label="Transcription")
inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [output])
inputUpload.upload(transcribe_speech, [inputUpload, language], [output])
examplesSpeechEn = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_en, inputs=[inputRecord], outputs=[output], run_on_click=True, label="Examples")
# examplesSpeechPtbr = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_ptbr, inputs=[inputRecord], outputs=[output], run_on_click=True, label="Portuguese Examples")
with gr.Row(visible=False) as chatbot_qa:
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
bubble_full_width=False,
avatar_images=(None, "content/avatar-socialear.png"),
min_width=2000
)
with gr.Row(min_width=2000):
txt = gr.Textbox(
scale=4,
show_label=False,
placeholder="Enter text and press enter",
container=False,
min_width=1000
)
submit = gr.Button(value="", size='sm', scale=1, icon='content/send-icon.png')
inputRecord = gr.Audio(label="Record a question", source="microphone", type="filepath", min_width=600)
btn = gr.Button(value="Listen the answer")
audioOutput = gr.Audio(interactive=False, min_width=600)
txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
chatbot_response, [chatbot, language], chatbot)
txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
submit.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
chatbot_response, [chatbot, language], chatbot).then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [txt])
btn.click(fn=TTS_chatbot, inputs=[language], outputs=audioOutput)
with gr.Row(min_width=2000):
examplesChatbotEn = gr.Examples(examples=examples_chatbot_en, inputs=[txt], label="English Examples")
examplesChatbotPtbr = gr.Examples(examples=examples_chatbot_ptbr, inputs=[txt], label="Portuguese Examples")
btn0.click(fn=to_audioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
btn1.click(fn=to_realtimeAudioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
btn2.click(fn=to_speechRecognition, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
btn3.click(fn=to_chatbot, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
if __name__ == "__main__":
demo.queue()
demo.launch() |