Spaces:
jfforero
/
Runtime error

File size: 3,223 Bytes
1d6f96a
 
 
 
d0263ad
3fdea53
b3ece58
1d6f96a
b3ece58
670552f
 
 
 
 
 
 
 
 
 
e2ed0ec
b3ece58
e2ed0ec
670552f
 
 
 
 
 
 
e2ed0ec
b3ece58
e2ed0ec
 
b3ece58
e2ed0ec
670552f
 
 
 
 
 
 
 
 
 
 
 
e2ed0ec
43b4de4
 
 
 
4204b4c
1d6f96a
 
867ad7f
 
 
1d6f96a
459185f
 
 
 
4e7f2d5
 
 
 
836ccde
4e7f2d5
836ccde
 
 
 
 
4e7f2d5
836ccde
 
 
 
 
 
 
 
 
 
 
459185f
 
f679e15
2610b1c
 
 
 
 
 
 
836ccde
2610b1c
 
05889af
b3ece58
b2497fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
import numpy as np
import librosa
import time
import requests
from io import BytesIO
from tensorflow.keras.models import load_model

# Load the emotion prediction model
def load_emotion_model(model_path):
    try:
        model = load_model(model_path)
        return model
    except Exception as e:
        print("Error loading emotion prediction model:", e)
        return None

model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
model = load_emotion_model(model_path)

# Function to extract MFCC features from audio
def extract_mfcc(wav_file_name):
    try:
        y, sr = librosa.load(wav_file_name)
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
        return mfccs
    except Exception as e:
        print("Error extracting MFCC features:", e)
        return None

# Emotions dictionary
emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}

# Function to predict emotion from audio
def predict_emotion_from_audio(wav_filepath):
    try:
        test_point = extract_mfcc(wav_filepath)
        if test_point is not None:
            test_point = np.reshape(test_point, newshape=(1, 40, 1))
            predictions = model.predict(test_point)
            predicted_emotion_label = np.argmax(predictions[0]) + 1
            return emotions[predicted_emotion_label]
        else:
            return "Error: Unable to extract features"
    except Exception as e:
        print("Error predicting emotion:", e)
        return None


# Define the API key for DeepAI Text to Image API
api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
        
# Predict emotion from audio
def get_predictions(audio_input):
    emotion_prediction = predict_emotion_from_audio(audio_input)
    # Generate image here or call a separate function
    image = generate_image(api_key, emotion_prediction)
    return emotion_prediction, image

    

###





# Define a function to generate an image using DeepAI Text to Image API
def generate_image(api_key, text):
    url = "https://api.deepai.org/api/text2img"
    headers = {'api-key': api_key}
    response = requests.post(
        url,
        data={
            'text': text,
        },
        headers=headers
    )
    response_data = response.json()
    if 'output_url' in response_data:
        image_url = response_data['output_url']
        image_response = requests.get(image_url)
        image = Image.open(BytesIO(image_response.content))
        return image
    else:
        return None
####
    
# Create the Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
    with gr.Tabs():
        with gr.Tab("Acoustic and Semantic Predictions"):
            with gr.Row():
                input_audio = gr.Audio(label="Input Audio", type="filepath")
                submit_button = gr.Button("Submit")
            output_label = [gr.Label("Prediction"), gr.Image(type='pil')]  # Use a single Label instead of a list

    # Set the function to be called when the button is clicked
    submit_button.click(get_predictions, inputs=input_audio, outputs=output_label)

interface.launch()