Spaces:

Boltz79
/

Sentiment-Analysis

Sleeping

App Files Files Community

Boltz79 commited on Feb 8

Commit

cd578af

verified ·

1 Parent(s): 88c3f37

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -19

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import librosa
 import numpy as np
@@ -8,7 +9,7 @@ from speechbrain.inference.interfaces import foreign_class
 import io
 import matplotlib.pyplot as plt
 import librosa.display
-from PIL import Image  # Added for image conversion
 # Try to import noisereduce (if not available, noise reduction will be skipped)
 try:
@@ -43,7 +44,13 @@ classifier = foreign_class(
 )
 def preprocess_audio(audio_file, apply_noise_reduction=False):
-    """Load and preprocess the audio file: convert to 16kHz mono, optionally apply noise reduction, and normalize."""
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     if apply_noise_reduction and NOISEREDUCE_AVAILABLE:
         y = nr.reduce_noise(y=y, sr=sr)
@@ -55,7 +62,10 @@ def preprocess_audio(audio_file, apply_noise_reduction=False):
     return temp_file.name
 def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
-    """Split longer audio files into overlapping segments, predict each segment, and return the majority-voted emotion label."""
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     total_duration = librosa.get_duration(y=y, sr=sr)
@@ -80,7 +90,7 @@ def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duratio
     for seg in segments:
         temp_file = preprocess_audio(seg, apply_noise_reduction)
         _, _, _, label = classifier.classify_file(temp_file)
-        predictions.append(label[0])  # Extract the predicted emotion
         os.remove(temp_file)
         os.remove(seg)
@@ -89,7 +99,9 @@ def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duratio
     return most_common
 def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
-    """Predict emotion from an audio file and return the emotion with an emoji."""
     try:
         if use_ensemble:
             label = ensemble_prediction(audio_file, apply_noise_reduction, segment_duration, overlap)
@@ -97,18 +109,18 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
             temp_file = preprocess_audio(audio_file, apply_noise_reduction)
             result = classifier.classify_file(temp_file)
             os.remove(temp_file)
             if isinstance(result, tuple) and len(result) > 3:
-                label = result[3][0]  # Extract the predicted emotion label
             else:
-                label = str(result)  # Convert to string if unexpected format
-        return add_emoji_to_label(label.lower())  # Format and add an emoji
     except Exception as e:
         return f"Error processing file: {str(e)}"
 def plot_waveform(audio_file):
-    """Generate and return a waveform plot image (as a PIL Image) for the given audio file."""
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     plt.figure(figsize=(10, 3))
     librosa.display.waveshow(y, sr=sr)
@@ -120,18 +132,19 @@ def plot_waveform(audio_file):
     return Image.open(buf)
 def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
-    """Run emotion prediction and generate a waveform plot."""
     emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
     waveform = plot_waveform(audio_file)
-    return emotion  # Ensure emoji is included here
-# Build the enhanced UI using Gradio Blocks
 with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
-   gr.Markdown("<h1 style='text-align: center;'>Enhanced Emotion Recognition</h1>")
     gr.Markdown(
         "Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
-        "The prediction is accompanied by an emoji, and you can also view the audio's waveform. "
         "Use the options below to adjust ensemble prediction and noise reduction settings."
     )
@@ -149,7 +162,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
             waveform_image = gr.Image(label="Audio Waveform", type="pil")
             predict_button.click(
-                predict_and_plot,
                 inputs=[audio_input, use_ensemble, apply_noise_reduction, segment_duration, overlap],
                 outputs=[result_text, waveform_image]
             )
@@ -163,7 +176,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
   - Ensemble Prediction for long audio files.
   - Optional Noise Reduction.
   - Visualization of the audio waveform.
-  - Emoji representation of the predicted emotion.
 **Credits:**
 - [SpeechBrain](https://speechbrain.github.io)

+# app.py
 import gradio as gr
 import librosa
 import numpy as np
 import io
 import matplotlib.pyplot as plt
 import librosa.display
+from PIL import Image  # For image conversion
 # Try to import noisereduce (if not available, noise reduction will be skipped)
 try:
 )
 def preprocess_audio(audio_file, apply_noise_reduction=False):
+    """
+    Load and preprocess the audio file:
+      - Convert to 16kHz mono.
+      - Optionally apply noise reduction.
+      - Normalize the audio.
+    Saves the processed audio to a temporary file and returns its path.
+    """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     if apply_noise_reduction and NOISEREDUCE_AVAILABLE:
         y = nr.reduce_noise(y=y, sr=sr)
     return temp_file.name
 def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
+    """
+    For longer audio files, split into overlapping segments, predict each segment,
+    and return the majority-voted emotion label.
+    """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     total_duration = librosa.get_duration(y=y, sr=sr)
     for seg in segments:
         temp_file = preprocess_audio(seg, apply_noise_reduction)
         _, _, _, label = classifier.classify_file(temp_file)
+        predictions.append(label[0])
         os.remove(temp_file)
         os.remove(seg)
     return most_common
 def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
+    """
+    Predict emotion from an audio file and return the emotion with an emoji.
+    """
     try:
         if use_ensemble:
             label = ensemble_prediction(audio_file, apply_noise_reduction, segment_duration, overlap)
             temp_file = preprocess_audio(audio_file, apply_noise_reduction)
             result = classifier.classify_file(temp_file)
             os.remove(temp_file)
             if isinstance(result, tuple) and len(result) > 3:
+                label = result[3][0]  # Extract predicted emotion label from the tuple
             else:
+                label = str(result)
+        return add_emoji_to_label(label.lower())
     except Exception as e:
         return f"Error processing file: {str(e)}"
 def plot_waveform(audio_file):
+    """
+    Generate and return a waveform plot image (as a PIL Image) for the given audio file.
+    """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     plt.figure(figsize=(10, 3))
     librosa.display.waveshow(y, sr=sr)
     return Image.open(buf)
 def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
+    """
+    Run emotion prediction and generate a waveform plot.
+    Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
+    """
     emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
     waveform = plot_waveform(audio_file)
+    return emotion, waveform
 with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: Arial;}") as demo:
+    gr.Markdown("<h1 style='text-align: center;'>Enhanced Emotion Recognition</h1>")
     gr.Markdown(
         "Upload an audio file, and the model will predict the emotion using a wav2vec2 model fine-tuned on IEMOCAP data. "
+        "The prediction is accompanied by an emoji in the output, and you can also view the audio's waveform. "
         "Use the options below to adjust ensemble prediction and noise reduction settings."
     )
             waveform_image = gr.Image(label="Audio Waveform", type="pil")
             predict_button.click(
+                predict_and_plot,
                 inputs=[audio_input, use_ensemble, apply_noise_reduction, segment_duration, overlap],
                 outputs=[result_text, waveform_image]
             )
   - Ensemble Prediction for long audio files.
   - Optional Noise Reduction.
   - Visualization of the audio waveform.
+  - Emoji representation of the predicted emotion in the output.
 **Credits:**
 - [SpeechBrain](https://speechbrain.github.io)