Spaces:
jfforero
/
Runtime error

jfforero commited on
Commit
b3ece58
·
verified ·
1 Parent(s): 4193647

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -49
app.py CHANGED
@@ -1,32 +1,27 @@
1
  import gradio as gr
2
- import tensorflow as tf
3
  import numpy as np
4
  import librosa
5
  import time
6
  from transformers import pipeline
7
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
8
- import requests
9
- from PIL import Image
10
- from io import BytesIO
11
-
12
 
13
- #p = pipeline("automatic-speech-recognition")
14
  p = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-960h-lv60-self")
15
 
16
-
17
-
18
- from tensorflow.keras.models import load_model
19
-
20
  model = load_model('mymodel_SER_LSTM_RAVDESS.h5')
21
 
 
22
  def extract_mfcc(wav_file_name):
23
- #This function extracts mfcc features and obtain the mean of each dimension
24
  y, sr = librosa.load(wav_file_name)
25
- mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T,axis=0)
26
  return mfccs
27
 
 
28
  emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
29
 
 
30
  def predict_emotion_from_audio(wav_filepath):
31
  test_point = extract_mfcc(wav_filepath)
32
  test_point = np.reshape(test_point, newshape=(1, 40, 1))
@@ -34,55 +29,43 @@ def predict_emotion_from_audio(wav_filepath):
34
  predicted_emotion_label = np.argmax(predictions[0]) + 1
35
  return emotions[predicted_emotion_label]
36
 
37
- # Semantics
38
  def sentiment_vader(sentence):
39
-
40
- # Create a SentimentIntensityAnalyzer object.
41
  sid_obj = SentimentIntensityAnalyzer()
42
-
43
  sentiment_dict = sid_obj.polarity_scores(sentence)
44
- negative = sentiment_dict['neg']
45
- neutral = sentiment_dict['neu']
46
- positive = sentiment_dict['pos']
47
- compound = sentiment_dict['compound']
48
-
49
- if sentiment_dict['compound'] >= 0.05 :
50
  overall_sentiment = "Positive"
51
-
52
- elif sentiment_dict['compound'] <= - 0.05 :
53
  overall_sentiment = "Negative"
54
-
55
- else :
56
  overall_sentiment = "Neutral"
57
-
58
  return overall_sentiment
59
 
60
- def transcribe(audio, state=""):
61
- time.sleep(3)
 
62
  text = p(audio)["text"]
63
- text = sentiment_vader(text)
64
- return text
65
 
66
-
67
- # Define functions for acoustic and semantic predictions (predict_emotion_from_audio and transcribe)
68
-
69
- # Create a combined function that calls both models
70
  def get_predictions(audio_input):
71
  emotion_prediction = predict_emotion_from_audio(audio_input)
72
- transcribe_prediction = transcribe(audio_input)
73
- return [emotion_prediction, transcribe_prediction]
74
 
75
  # Create the Gradio interface
76
- with gr.Blocks() as interface:
77
- gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
78
- with gr.Tabs():
79
- with gr.Tab("Acoustic and Semantic Predictions"):
80
- with gr.Row():
81
- input_audio = gr.Audio(label="Input Audio", type="filepath")
82
- submit_button = gr.Button("Submit")
83
- output_labels = [gr.Label(num_top_classes=8), gr.Label(num_top_classes=4)]
84
-
85
- # Set the function to be called when the button is clicked
86
- submit_button.click(get_predictions, inputs=input_audio, outputs=output_labels)
87
-
 
88
  interface.launch()
 
1
  import gradio as gr
 
2
  import numpy as np
3
  import librosa
4
  import time
5
  from transformers import pipeline
6
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
7
+ from tensorflow.keras.models import load_model
 
 
 
8
 
9
+ # Load the ASR pipeline
10
  p = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-960h-lv60-self")
11
 
12
+ # Load the emotion prediction model
 
 
 
13
  model = load_model('mymodel_SER_LSTM_RAVDESS.h5')
14
 
15
+ # Function to extract MFCC features from audio
16
  def extract_mfcc(wav_file_name):
 
17
  y, sr = librosa.load(wav_file_name)
18
+ mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
19
  return mfccs
20
 
21
+ # Emotions dictionary
22
  emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
23
 
24
+ # Function to predict emotion from audio
25
  def predict_emotion_from_audio(wav_filepath):
26
  test_point = extract_mfcc(wav_filepath)
27
  test_point = np.reshape(test_point, newshape=(1, 40, 1))
 
29
  predicted_emotion_label = np.argmax(predictions[0]) + 1
30
  return emotions[predicted_emotion_label]
31
 
32
+ # Function for sentiment analysis using VADER
33
  def sentiment_vader(sentence):
 
 
34
  sid_obj = SentimentIntensityAnalyzer()
 
35
  sentiment_dict = sid_obj.polarity_scores(sentence)
36
+ if sentiment_dict['compound'] >= 0.05:
 
 
 
 
 
37
  overall_sentiment = "Positive"
38
+ elif sentiment_dict['compound'] <= -0.05:
 
39
  overall_sentiment = "Negative"
40
+ else:
 
41
  overall_sentiment = "Neutral"
 
42
  return overall_sentiment
43
 
44
+ # Function to transcribe audio and perform sentiment analysis
45
+ def transcribe(audio):
46
+ time.sleep(3) # Simulate processing delay
47
  text = p(audio)["text"]
48
+ text_sentiment = sentiment_vader(text)
49
+ return text, text_sentiment
50
 
51
+ # Function to get predictions for emotion and sentiment
 
 
 
52
  def get_predictions(audio_input):
53
  emotion_prediction = predict_emotion_from_audio(audio_input)
54
+ transcript, sentiment_prediction = transcribe(audio_input)
55
+ return emotion_prediction, transcript, sentiment_prediction
56
 
57
  # Create the Gradio interface
58
+ interface = gr.Interface(
59
+ fn=get_predictions,
60
+ inputs=gr.Audio(label="Input Audio", type="file"),
61
+ outputs=[
62
+ gr.Label(label="Emotion Prediction"),
63
+ gr.Textbox(label="Transcript"),
64
+ gr.Label(label="Sentiment Prediction")
65
+ ],
66
+ title="Emotional Machines Test",
67
+ description="Load an audio file to analyze speech emotion and sentiment."
68
+ )
69
+
70
+ # Launch the interface
71
  interface.launch()