File size: 3,223 Bytes
1d6f96a d0263ad 3fdea53 b3ece58 1d6f96a b3ece58 670552f e2ed0ec b3ece58 e2ed0ec 670552f e2ed0ec b3ece58 e2ed0ec b3ece58 e2ed0ec 670552f e2ed0ec 43b4de4 4204b4c 1d6f96a 867ad7f 1d6f96a 459185f 4e7f2d5 836ccde 4e7f2d5 836ccde 4e7f2d5 836ccde 459185f f679e15 2610b1c 836ccde 2610b1c 05889af b3ece58 b2497fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import numpy as np
import librosa
import time
import requests
from io import BytesIO
from tensorflow.keras.models import load_model
# Load the emotion prediction model
def load_emotion_model(model_path):
try:
model = load_model(model_path)
return model
except Exception as e:
print("Error loading emotion prediction model:", e)
return None
model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
model = load_emotion_model(model_path)
# Function to extract MFCC features from audio
def extract_mfcc(wav_file_name):
try:
y, sr = librosa.load(wav_file_name)
mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
return mfccs
except Exception as e:
print("Error extracting MFCC features:", e)
return None
# Emotions dictionary
emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
# Function to predict emotion from audio
def predict_emotion_from_audio(wav_filepath):
try:
test_point = extract_mfcc(wav_filepath)
if test_point is not None:
test_point = np.reshape(test_point, newshape=(1, 40, 1))
predictions = model.predict(test_point)
predicted_emotion_label = np.argmax(predictions[0]) + 1
return emotions[predicted_emotion_label]
else:
return "Error: Unable to extract features"
except Exception as e:
print("Error predicting emotion:", e)
return None
# Define the API key for DeepAI Text to Image API
api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
# Predict emotion from audio
def get_predictions(audio_input):
emotion_prediction = predict_emotion_from_audio(audio_input)
# Generate image here or call a separate function
image = generate_image(api_key, emotion_prediction)
return emotion_prediction, image
###
# Define a function to generate an image using DeepAI Text to Image API
def generate_image(api_key, text):
url = "https://api.deepai.org/api/text2img"
headers = {'api-key': api_key}
response = requests.post(
url,
data={
'text': text,
},
headers=headers
)
response_data = response.json()
if 'output_url' in response_data:
image_url = response_data['output_url']
image_response = requests.get(image_url)
image = Image.open(BytesIO(image_response.content))
return image
else:
return None
####
# Create the Gradio interface
with gr.Blocks() as interface:
gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
with gr.Tabs():
with gr.Tab("Acoustic and Semantic Predictions"):
with gr.Row():
input_audio = gr.Audio(label="Input Audio", type="filepath")
submit_button = gr.Button("Submit")
output_label = [gr.Label("Prediction"), gr.Image(type='pil')] # Use a single Label instead of a list
# Set the function to be called when the button is clicked
submit_button.click(get_predictions, inputs=input_audio, outputs=output_label)
interface.launch()
|