pedropauletti commited on
Commit
f90f3f5
·
1 Parent(s): 740c9e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -8
app.py CHANGED
@@ -1,4 +1,49 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def to_audioClassification():
4
  return {
@@ -7,7 +52,7 @@ def to_audioClassification():
7
  speech_recognition: gr.Row(visible=False),
8
  chatbot_qa: gr.Row(visible=False),
9
  }
10
-
11
  def to_realtimeAudioClassification():
12
  return {
13
  audio_classification: gr.Row(visible=False),
@@ -39,10 +84,10 @@ with gr.Blocks() as demo:
39
  language = gr.Radio(["en-us", "pt-br"], label="Language", info="Choose the language to display the classification result and audio", value='en-us', interactive=True)
40
 
41
  with gr.Row():
42
- btn0 = gr.Button("Audio Classification", scale=1, size='lg')
43
- btn1 = gr.Button("Realtime Audio Classification", scale=1,size='lg')
44
- btn2 = gr.Button("Speech Recognition", scale=1, size='lg')
45
- btn3 = gr.Button("Help", scale=1, size='lg')
46
 
47
  with gr.Row(visible=False) as audio_classification:
48
  with gr.Column(min_width=700):
@@ -57,6 +102,12 @@ with gr.Blocks() as demo:
57
  audioOutput = gr.Audio(label="Audio Output", interactive=False)
58
 
59
 
 
 
 
 
 
 
60
  with gr.Row(visible=False) as realtime_classification:
61
  with gr.Column(min_width=700):
62
  input = gr.Audio(label="Audio Input", source="microphone", type="filepath",streaming=True, every=10)
@@ -65,6 +116,11 @@ with gr.Blocks() as demo:
65
  with gr.Column(min_width=700):
66
  output = gr.Label(label="Audio Classification")
67
 
 
 
 
 
 
68
  with gr.Row(visible=False) as speech_recognition:
69
  with gr.Column(min_width=700):
70
  with gr.Accordion("Record an Audio", open=True):
@@ -75,13 +131,19 @@ with gr.Blocks() as demo:
75
  with gr.Column(min_width=700):
76
  output = gr.Label(label="Transcription")
77
 
78
-
 
 
 
 
 
 
79
  with gr.Row(visible=False) as chatbot_qa:
80
  chatbot = gr.Chatbot(
81
  [],
82
  elem_id="chatbot",
83
  bubble_full_width=False,
84
- # avatar_images=(None, "/content/avatar-socialear.png"),
85
  min_width=2000
86
  )
87
  with gr.Row(min_width=2000):
@@ -92,7 +154,24 @@ with gr.Blocks() as demo:
92
  container=False,
93
  min_width=1000
94
  )
95
- submit = gr.Button(value="", size='sm', scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
 
98
  btn0.click(fn=to_audioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ from helpers import load_model_file, load_wav_16k_mono_librosa, initialize_text_to_speech_model, load_label_mapping, predict_yamnet, classify, classify_realtime
4
+ from helpers import interface, interface_realtime, updateHistory, clearHistory, clear, format_dictionary, format_json
5
+ from helpers import generate_audio, TTS, TTS_ASR, TTS_chatbot, transcribe_speech, transcribe_speech_realtime, transcribe_realtime, translate_enpt
6
+ from helpers import chatbot_response, add_text
7
+
8
+ history = ""
9
+ last_answer = ""
10
+
11
+ examples_audio_classification = [
12
+ "content/crowd_laughing.mp3",
13
+ "content/nature-ambient-sound.mp3",
14
+ "content/talking-people.mp3",
15
+ "content/miaow_16k.wav",
16
+ ]
17
+
18
+ examples_speech_recognition_en = [
19
+ "content/speech1-en.wav",
20
+ "content/speech2-en.wav",
21
+ ]
22
+ examples_speech_recognition_ptbr = [
23
+ "content/speech1-ptbr.wav",
24
+ "content/speech2-ptbr.wav",
25
+ "content/speech3-ptbr.wav",
26
+ ]
27
+
28
+ examples_chatbot_en = [
29
+ ['How does SocialEar assist people with hearing disabilities?'],
30
+ ['Give me suggestions on how to use SocialEar'],
31
+ ['How does SocialEar work?'],
32
+ ['Are SocialEar results accurate?'],
33
+ ['What accessibility features does SocialEar offer?'],
34
+ ['Does SocialEar collect personal data?'],
35
+ ['Can I use SocialEar to identify songs and artists from recorded audio?'],
36
+ ]
37
+
38
+ examples_chatbot_ptbr = [
39
+ ['Como o SocialEar auxilia pessoas com deficiência auditiva?'],
40
+ ['Dê-me sugestões sobre como usar o SocialEar'],
41
+ ['Como funciona o SocialEar?'],
42
+ ['Os resultados do SocialEar são precisos?'],
43
+ ['Quais recursos de acessibilidade o SocialEar oferece?'],
44
+ ['O SocialEar coleta dados pessoais?'],
45
+ ['Posso usar o SocialEar para identificar músicas e artistas de áudio gravado?'],
46
+ ]
47
 
48
  def to_audioClassification():
49
  return {
 
52
  speech_recognition: gr.Row(visible=False),
53
  chatbot_qa: gr.Row(visible=False),
54
  }
55
+
56
  def to_realtimeAudioClassification():
57
  return {
58
  audio_classification: gr.Row(visible=False),
 
84
  language = gr.Radio(["en-us", "pt-br"], label="Language", info="Choose the language to display the classification result and audio", value='en-us', interactive=True)
85
 
86
  with gr.Row():
87
+ btn0 = gr.Button("Audio Classification", scale=1, icon='content/Audio Classification.png', size='lg')
88
+ btn1 = gr.Button("Realtime Audio Classification", scale=1, icon='content/Realtime Audio Classification.png', size='lg')
89
+ btn2 = gr.Button("Speech Recognition", scale=1, icon='content/Speech Recognition.png', size='lg')
90
+ btn3 = gr.Button("Help", scale=1, icon='content/Chatbot.png', size='lg')
91
 
92
  with gr.Row(visible=False) as audio_classification:
93
  with gr.Column(min_width=700):
 
102
  audioOutput = gr.Audio(label="Audio Output", interactive=False)
103
 
104
 
105
+ inputRecord.stop_recording(interface, [inputRecord, language], [output])
106
+ inputUpload.upload(interface, [inputUpload, language], [output])
107
+ btn.click(fn=TTS, inputs=[output, language], outputs=audioOutput)
108
+
109
+ examples = gr.Examples(fn=interface, examples=examples_audio_classification, inputs=[inputRecord], outputs=[output], run_on_click=True)
110
+
111
  with gr.Row(visible=False) as realtime_classification:
112
  with gr.Column(min_width=700):
113
  input = gr.Audio(label="Audio Input", source="microphone", type="filepath",streaming=True, every=10)
 
116
  with gr.Column(min_width=700):
117
  output = gr.Label(label="Audio Classification")
118
 
119
+ input.change(interface_realtime, [input, language], output)
120
+ input.change(updateHistory, None, historyOutput)
121
+ input.start_recording(clearHistory, None, historyOutput)
122
+
123
+
124
  with gr.Row(visible=False) as speech_recognition:
125
  with gr.Column(min_width=700):
126
  with gr.Accordion("Record an Audio", open=True):
 
131
  with gr.Column(min_width=700):
132
  output = gr.Label(label="Transcription")
133
 
134
+
135
+ inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [output])
136
+ inputUpload.upload(transcribe_speech, [inputUpload, language], [output])
137
+
138
+ examplesSpeechEn = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_en, inputs=[inputRecord], outputs=[output], run_on_click=True, label="English Examples")
139
+ # examplesSpeechPtbr = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_ptbr, inputs=[inputRecord], outputs=[output], run_on_click=True, label="Portuguese Examples")
140
+
141
  with gr.Row(visible=False) as chatbot_qa:
142
  chatbot = gr.Chatbot(
143
  [],
144
  elem_id="chatbot",
145
  bubble_full_width=False,
146
+ avatar_images=(None, "content/avatar-socialear.png"),
147
  min_width=2000
148
  )
149
  with gr.Row(min_width=2000):
 
154
  container=False,
155
  min_width=1000
156
  )
157
+ submit = gr.Button(value="", size='sm', scale=1, icon='content/send-icon.png')
158
+
159
+
160
+ inputRecord = gr.Audio(label="Record a question", source="microphone", type="filepath", min_width=600)
161
+ btn = gr.Button(value="Listen the answer")
162
+ audioOutput = gr.Audio(interactive=False, min_width=600)
163
+
164
+ txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
165
+ chatbot_response, [chatbot, language], chatbot)
166
+ txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
167
+ submit.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
168
+ chatbot_response, [chatbot, language], chatbot).then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
169
+ inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [txt])
170
+ btn.click(fn=TTS_chatbot, inputs=[language], outputs=audioOutput)
171
+
172
+ with gr.Row(min_width=2000):
173
+ examplesChatbotEn = gr.Examples(examples=examples_chatbot_en, inputs=[txt], label="English Examples")
174
+ examplesChatbotPtbr = gr.Examples(examples=examples_chatbot_ptbr, inputs=[txt], label="Portuguese Examples")
175
 
176
 
177
  btn0.click(fn=to_audioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])