AVE2

Runtime error

App Files Files Community

jfforero commited on Apr 16, 2024

Commit

b3ece58

verified ·

1 Parent(s): 4193647

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -49

app.py CHANGED Viewed

@@ -1,32 +1,27 @@
 import gradio as gr
-import tensorflow as tf
 import numpy as np
 import librosa
 import time
 from transformers import pipeline
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-import requests
-from PIL import Image
-from io import BytesIO
-#p = pipeline("automatic-speech-recognition")
 p = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-960h-lv60-self")
-from tensorflow.keras.models import load_model
 model = load_model('mymodel_SER_LSTM_RAVDESS.h5')
 def extract_mfcc(wav_file_name):
-    #This function extracts mfcc features and obtain the mean of each dimension
     y, sr = librosa.load(wav_file_name)
-    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T,axis=0)
     return mfccs
 emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
 def predict_emotion_from_audio(wav_filepath):
     test_point = extract_mfcc(wav_filepath)
     test_point = np.reshape(test_point, newshape=(1, 40, 1))
@@ -34,55 +29,43 @@ def predict_emotion_from_audio(wav_filepath):
     predicted_emotion_label = np.argmax(predictions[0]) + 1
     return emotions[predicted_emotion_label]
-# Semantics
 def sentiment_vader(sentence):
-    # Create a SentimentIntensityAnalyzer object.
     sid_obj = SentimentIntensityAnalyzer()
     sentiment_dict = sid_obj.polarity_scores(sentence)
-    negative = sentiment_dict['neg']
-    neutral = sentiment_dict['neu']
-    positive = sentiment_dict['pos']
-    compound = sentiment_dict['compound']
-    if sentiment_dict['compound'] >= 0.05 :
         overall_sentiment = "Positive"
-    elif sentiment_dict['compound'] <= - 0.05 :
         overall_sentiment = "Negative"
-    else :
         overall_sentiment = "Neutral"
     return overall_sentiment
-def transcribe(audio, state=""):
-    time.sleep(3)
     text = p(audio)["text"]
-    text = sentiment_vader(text)
-    return text
-# Define functions for acoustic and semantic predictions (predict_emotion_from_audio and transcribe)
-# Create a combined function that calls both models
 def get_predictions(audio_input):
     emotion_prediction = predict_emotion_from_audio(audio_input)
-    transcribe_prediction = transcribe(audio_input)
-    return [emotion_prediction, transcribe_prediction]
 # Create the Gradio interface
-with gr.Blocks() as interface:
-    gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
-    with gr.Tabs():
-        with gr.Tab("Acoustic and Semantic Predictions"):
-            with gr.Row():
-                input_audio = gr.Audio(label="Input Audio", type="filepath")
-                submit_button = gr.Button("Submit")
-            output_labels = [gr.Label(num_top_classes=8), gr.Label(num_top_classes=4)]
-    # Set the function to be called when the button is clicked
-    submit_button.click(get_predictions, inputs=input_audio, outputs=output_labels)
 interface.launch()

 import gradio as gr
 import numpy as np
 import librosa
 import time
 from transformers import pipeline
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+from tensorflow.keras.models import load_model
+# Load the ASR pipeline
 p = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-960h-lv60-self")
+# Load the emotion prediction model
 model = load_model('mymodel_SER_LSTM_RAVDESS.h5')
+# Function to extract MFCC features from audio
 def extract_mfcc(wav_file_name):
     y, sr = librosa.load(wav_file_name)
+    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
     return mfccs
+# Emotions dictionary
 emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
+# Function to predict emotion from audio
 def predict_emotion_from_audio(wav_filepath):
     test_point = extract_mfcc(wav_filepath)
     test_point = np.reshape(test_point, newshape=(1, 40, 1))
     predicted_emotion_label = np.argmax(predictions[0]) + 1
     return emotions[predicted_emotion_label]
+# Function for sentiment analysis using VADER
 def sentiment_vader(sentence):
     sid_obj = SentimentIntensityAnalyzer()
     sentiment_dict = sid_obj.polarity_scores(sentence)
+    if sentiment_dict['compound'] >= 0.05:
         overall_sentiment = "Positive"
+    elif sentiment_dict['compound'] <= -0.05:
         overall_sentiment = "Negative"
+    else:
         overall_sentiment = "Neutral"
     return overall_sentiment
+# Function to transcribe audio and perform sentiment analysis
+def transcribe(audio):
+    time.sleep(3)  # Simulate processing delay
     text = p(audio)["text"]
+    text_sentiment = sentiment_vader(text)
+    return text, text_sentiment
+# Function to get predictions for emotion and sentiment
 def get_predictions(audio_input):
     emotion_prediction = predict_emotion_from_audio(audio_input)
+    transcript, sentiment_prediction = transcribe(audio_input)
+    return emotion_prediction, transcript, sentiment_prediction
 # Create the Gradio interface
+interface = gr.Interface(
+    fn=get_predictions,
+    inputs=gr.Audio(label="Input Audio", type="file"),
+    outputs=[
+        gr.Label(label="Emotion Prediction"),
+        gr.Textbox(label="Transcript"),
+        gr.Label(label="Sentiment Prediction")
+    ],
+    title="Emotional Machines Test",
+    description="Load an audio file to analyze speech emotion and sentiment."
+)
+# Launch the interface
 interface.launch()