Spaces:
Runtime error
Runtime error
File size: 9,061 Bytes
1e40d63 f90f3f5 1ec0028 e2a9b8f f90f3f5 e2a9b8f f90f3f5 1e40d63 5f3740d 1ebd0cd 5f3740d f90f3f5 5f3740d 1ebd0cd 5f3740d c518990 1e40d63 0667117 bf61dc0 fa39c38 1ebd0cd 5f3740d 111bf40 1e40d63 5f3740d 111bf40 5f3740d 111bf40 5f3740d bf61dc0 f90f3f5 5f3740d 111bf40 5f3740d 111bf40 5f3740d bf61dc0 f90f3f5 1ebd0cd 111bf40 1ebd0cd 111bf40 1ebd0cd f90f3f5 bf61dc0 f90f3f5 111bf40 f90f3f5 1ebd0cd f90f3f5 1ebd0cd 111bf40 1ebd0cd f90f3f5 111bf40 f90f3f5 bf61dc0 f90f3f5 bf61dc0 f90f3f5 111bf40 1ebd0cd 5f3740d 1ebd0cd 7662900 5f3740d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import gradio as gr
from transformers import pipeline
from helpers import load_model_file, load_wav_16k_mono_librosa, initialize_text_to_speech_model, load_label_mapping, predict_yamnet, classify, classify_realtime
from helpers import interface, interface_realtime, updateHistory, clearHistory, clear, format_dictionary, format_json
from helpers import generate_audio, TTS, TTS_ASR, TTS_chatbot, transcribe_speech, transcribe_speech_realtime, transcribe_realtime, translate_enpt
from helpers import chatbot_response, add_text
history = ""
last_answer = ""
examples_audio_classification = [
"content/talking-people.mp3",
"content/miaow_16k.wav",
"content/birds-in-forest-loop.wav",
"content/drumming-jungle-music.wav",
"content/driving-in-the-rain.wav",
"content/city-alert-siren.wav",
"content/small-group-applause.wav",
"content/angry-male-crowd-ambience.wav",
"content/slow-typing-on-a-keyboard.wav",
"content/emergency-car-arrival.wav"
]
examples_speech_recognition_en = [
"content/speech1-en.wav",
"content/speech2-en.wav",
"content/speech1-ptbr.wav",
"content/speech2-ptbr.wav",
"content/speech3-ptbr.wav"
]
examples_speech_recognition_ptbr = [
"content/speech1-ptbr.wav",
"content/speech2-ptbr.wav",
"content/speech3-ptbr.wav",
]
examples_chatbot_en = [
['How does SocialEar assist people with hearing disabilities?'],
['Give me suggestions on how to use SocialEar'],
['How does SocialEar work?'],
['Are SocialEar results accurate?'],
['What accessibility features does SocialEar offer?'],
['Does SocialEar collect personal data?'],
['Can I use SocialEar to identify songs and artists from recorded audio?'],
]
examples_chatbot_ptbr = [
['Como o SocialEar auxilia pessoas com deficiência auditiva?'],
['Dê-me sugestões sobre como usar o SocialEar'],
['Como funciona o SocialEar?'],
['Os resultados do SocialEar são precisos?'],
['Quais recursos de acessibilidade o SocialEar oferece?'],
['O SocialEar coleta dados pessoais?'],
['Posso usar o SocialEar para identificar músicas e artistas de áudio gravado?'],
]
def to_audioClassification():
return {
audio_classification: gr.Row(visible=True),
realtime_classification: gr.Row(visible=False),
speech_recognition: gr.Row(visible=False),
chatbot_qa: gr.Row(visible=False),
}
def to_realtimeAudioClassification():
return {
audio_classification: gr.Row(visible=False),
realtime_classification: gr.Row(visible=True),
speech_recognition: gr.Row(visible=False),
chatbot_qa: gr.Row(visible=False),
}
def to_speechRecognition():
return {
audio_classification: gr.Row(visible=False),
realtime_classification: gr.Row(visible=False),
speech_recognition: gr.Row(visible=True),
chatbot_qa: gr.Row(visible=False),
}
def to_chatbot():
return {
audio_classification: gr.Row(visible=False),
realtime_classification: gr.Row(visible=False),
speech_recognition: gr.Row(visible=False),
chatbot_qa: gr.Row(visible=True),
}
with gr.Blocks() as demo:
with gr.Accordion("Idioma de saída", open=False):
language = gr.Radio(["en-us", "pt-br"], label="Idioma", info="Escolha o idioma de saída para os resultados", value='pt-br', interactive=True)
with gr.Row():
btn0 = gr.Button("Classificação de áudio", scale=1, icon='content/Audio Classification.png', size='lg')
btn1 = gr.Button("Classificação de áudio em tempo real", scale=1, icon='content/Realtime Audio Classification.png', size='lg')
btn2 = gr.Button("Reconhecimento de Fala", scale=1, icon='content/Speech Recognition.png', size='lg')
btn3 = gr.Button("Ajuda Q&A", scale=1, icon='content/Chatbot.png', size='lg')
with gr.Row(visible=False) as audio_classification:
with gr.Column(min_width=700):
with gr.Accordion("Grave um áudio", open=True):
inputRecord = gr.Audio(label="Entrada de áudio", source="microphone", type="filepath")
with gr.Accordion("Carregue um arquivo", open=False):
inputUpload = gr.Audio(label="Entrada de áudio", source="upload", type="filepath")
clearBtn = gr.ClearButton([inputRecord, inputUpload])
with gr.Column(min_width=700):
output = gr.Label(label="Classificação de Áudio")
btn = gr.Button(value="Gerar áudio")
audioOutput = gr.Audio(label="Saída de áudio", interactive=False)
inputRecord.stop_recording(interface, [inputRecord, language], [output])
inputUpload.upload(interface, [inputUpload, language], [output])
btn.click(fn=TTS, inputs=[output, language], outputs=audioOutput)
examples = gr.Examples(fn=interface, examples=examples_audio_classification, inputs=[inputRecord], outputs=[output], run_on_click=True)
with gr.Row(visible=False) as realtime_classification:
with gr.Column(min_width=700):
input = gr.Audio(label="Entrada de áudio", source="microphone", type="filepath",streaming=True, every=10)
historyOutput = gr.Textbox(label="Histórico", interactive=False)
# historyOutput = gr.Label(label="History")
with gr.Column(min_width=700):
output = gr.Label(label="Classificação de Áudio")
input.change(interface_realtime, [input, language], output)
input.change(updateHistory, None, historyOutput)
input.start_recording(clearHistory, None, historyOutput)
with gr.Row(visible=False) as speech_recognition:
with gr.Column(min_width=700):
with gr.Accordion("Grave um áudio", open=True):
inputRecord = gr.Audio(label="Entrada de áudio", source="microphone", type="filepath")
with gr.Accordion("Carregue um arquivo", open=False):
inputUpload = gr.Audio(label="Entrada de áudio", source="upload", type="filepath")
clearBtn = gr.ClearButton([inputRecord])
with gr.Column(min_width=700):
output = gr.Label(label="Transcrição")
inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [output])
inputUpload.upload(transcribe_speech, [inputUpload, language], [output])
# examplesSpeechEn = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_en, inputs=[inputRecord], outputs=[output], run_on_click=True, label="Examples")
examplesSpeechPtbr = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_ptbr, inputs=[inputRecord], outputs=[output], run_on_click=True, label="Portuguese Examples")
with gr.Row(visible=False) as chatbot_qa:
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
bubble_full_width=False,
avatar_images=(None, "content/avatar-socialear.png"),
min_width=2000
)
with gr.Row(min_width=2000):
txt = gr.Textbox(
scale=4,
show_label=False,
placeholder="Escreva o texto e precione enter",
container=False,
min_width=1000
)
submit = gr.Button(value="", size='sm', scale=1, icon='content/send-icon.png')
inputRecord = gr.Audio(label="Grave uma pergunta", source="microphone", type="filepath", min_width=600)
btn = gr.Button(value="Escute a resposta")
audioOutput = gr.Audio(interactive=False, min_width=600)
txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
chatbot_response, [chatbot, language], chatbot)
txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
submit.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
chatbot_response, [chatbot, language], chatbot).then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [txt])
btn.click(fn=TTS_chatbot, inputs=[language], outputs=audioOutput)
with gr.Row(min_width=2000):
# examplesChatbotEn = gr.Examples(examples=examples_chatbot_en, inputs=[txt], label="English Examples")
examplesChatbotPtbr = gr.Examples(examples=examples_chatbot_ptbr, inputs=[txt], label="Exemplos")
btn0.click(fn=to_audioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
btn1.click(fn=to_realtimeAudioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
btn2.click(fn=to_speechRecognition, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
btn3.click(fn=to_chatbot, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
if __name__ == "__main__":
demo.queue()
demo.launch() |