Update app.py
Browse files
app.py
CHANGED
@@ -65,41 +65,11 @@ def transcribe(audio, state=""):
|
|
65 |
|
66 |
# Create a combined function that calls both models
|
67 |
def get_predictions(audio_input):
|
68 |
-
# Perform transcription to get the text
|
69 |
-
transcribed_text = transcribe(audio_input)
|
70 |
-
|
71 |
-
# Define the API key for DeepAI Text to Image API
|
72 |
-
api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
|
73 |
-
|
74 |
-
# Generate the image with the transcribed text using DeepAI Text to Image API
|
75 |
-
image = generate_image(api_key, transcribed_text)
|
76 |
-
|
77 |
-
# Get emotion prediction from audio
|
78 |
emotion_prediction = predict_emotion_from_audio(audio_input)
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
# Define a function to generate an image using DeepAI Text to Image API
|
83 |
-
def generate_image(api_key, text):
|
84 |
-
url = "https://api.deepai.org/api/text2img"
|
85 |
-
headers = {'api-key': api_key}
|
86 |
-
response = requests.post(
|
87 |
-
url,
|
88 |
-
data={
|
89 |
-
'text': text,
|
90 |
-
},
|
91 |
-
headers=headers
|
92 |
-
)
|
93 |
-
response_data = response.json()
|
94 |
-
if 'output_url' in response_data:
|
95 |
-
image_url = response_data['output_url']
|
96 |
-
image_response = requests.get(image_url)
|
97 |
-
image = Image.open(BytesIO(image_response.content))
|
98 |
-
return image
|
99 |
-
else:
|
100 |
-
return None
|
101 |
-
|
102 |
-
# Create the Gradio interface for acoustic and semantic predictions
|
103 |
with gr.Blocks() as interface:
|
104 |
gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
|
105 |
with gr.Tabs():
|
@@ -107,13 +77,9 @@ with gr.Blocks() as interface:
|
|
107 |
with gr.Row():
|
108 |
input_audio = gr.Audio(label="Input Audio", type="filepath")
|
109 |
submit_button = gr.Button("Submit")
|
110 |
-
output_labels = [gr.Label(num_top_classes=8), gr.Label(num_top_classes=4)
|
111 |
|
112 |
-
# Set the function to be called when the button is clicked
|
113 |
submit_button.click(get_predictions, inputs=input_audio, outputs=output_labels)
|
114 |
|
115 |
-
# Display transcribed text as a label
|
116 |
-
transcribed_text_label = gr.Label(label="Transcribed Text")
|
117 |
-
|
118 |
-
# Launch the Gradio interface
|
119 |
interface.launch()
|
|
|
65 |
|
66 |
# Create a combined function that calls both models
|
67 |
def get_predictions(audio_input):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
emotion_prediction = predict_emotion_from_audio(audio_input)
|
69 |
+
transcribe_prediction = transcribe(audio_input)
|
70 |
+
return [emotion_prediction, transcribe_prediction]
|
71 |
|
72 |
+
# Create the Gradio interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
with gr.Blocks() as interface:
|
74 |
gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
|
75 |
with gr.Tabs():
|
|
|
77 |
with gr.Row():
|
78 |
input_audio = gr.Audio(label="Input Audio", type="filepath")
|
79 |
submit_button = gr.Button("Submit")
|
80 |
+
output_labels = [gr.Label(num_top_classes=8), gr.Label(num_top_classes=4)]
|
81 |
|
82 |
+
# Set the function to be called when the button is clicked
|
83 |
submit_button.click(get_predictions, inputs=input_audio, outputs=output_labels)
|
84 |
|
|
|
|
|
|
|
|
|
85 |
interface.launch()
|