Spaces:

Boltz79
/

Sentiment-Analysis

Sleeping

App Files Files Community

Boltz79 commited on Feb 8

Commit

6f98b5f

verified ·

1 Parent(s): d250b36

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -25

app.py CHANGED Viewed

@@ -30,10 +30,11 @@ emotion_to_emoji = {
 }
 def add_emoji_to_label(label):
     emoji = emotion_to_emoji.get(label.lower(), "")
     return f"{label.capitalize()} {emoji}"
-# Load the pre-trained SpeechBrain classifier (Emotion Recognition with wav2vec2 on IEMOCAP)
 classifier = foreign_class(
     source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
     pymodule_file="custom_interface.py",
@@ -47,16 +48,13 @@ def preprocess_audio(audio_file, apply_noise_reduction=False):
       - Convert to 16kHz mono.
       - Optionally apply noise reduction.
       - Normalize the audio.
-    The processed audio is saved to a temporary file and its path is returned.
     """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     if apply_noise_reduction and NOISEREDUCE_AVAILABLE:
         y = nr.reduce_noise(y=y, sr=sr)
     if np.max(np.abs(y)) > 0:
         y = y / np.max(np.abs(y))
     temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
     import soundfile as sf
     sf.write(temp_file.name, y, sr)
@@ -64,18 +62,19 @@ def preprocess_audio(audio_file, apply_noise_reduction=False):
 def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
     """
-    For long audio files, split the file into overlapping segments, predict the emotion for each segment,
-    and return the majority-voted label.
     """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     total_duration = librosa.get_duration(y=y, sr=sr)
     if total_duration <= segment_duration:
         temp_file = preprocess_audio(audio_file, apply_noise_reduction)
         _, _, _, label = classifier.classify_file(temp_file)
         os.remove(temp_file)
         return label
     step = segment_duration - overlap
     segments = []
     for start in np.arange(0, total_duration - segment_duration + 0.001, step):
@@ -101,10 +100,10 @@ def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duratio
 def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
     """
-    Main prediction function.
       - Uses ensemble prediction if enabled.
       - Otherwise, processes the entire audio at once.
-      - Returns the predicted emotion with an emoji.
     """
     try:
         if use_ensemble:
@@ -119,7 +118,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
 def plot_waveform(audio_file):
     """
-    Generate a waveform plot for the given audio file and return the image bytes.
     """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     plt.figure(figsize=(10, 3))
@@ -133,8 +132,8 @@ def plot_waveform(audio_file):
 def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
     """
-    Predict the emotion and also generate the waveform plot.
-    Returns a tuple: (emotion label with emoji, waveform image)
     """
     emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
     waveform = plot_waveform(audio_file)
@@ -152,7 +151,7 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
     with gr.Tabs():
         with gr.TabItem("Emotion Recognition"):
             with gr.Row():
-                # Removed the 'source' argument which caused the error.
                 audio_input = gr.Audio(type="filepath", label="Upload Audio")
             use_ensemble = gr.Checkbox(label="Use Ensemble Prediction (for long audio)", value=False)
             apply_noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=False)
@@ -171,18 +170,18 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
         with gr.TabItem("About"):
             gr.Markdown("""
-            **Enhanced Emotion Recognition App**
-            - **Model:** SpeechBrain's wav2vec2 model fine-tuned on IEMOCAP for emotion recognition.
-            - **Features:**
-              - Ensemble Prediction for long audio files.
-              - Optional Noise Reduction.
-              - Visualization of the audio waveform.
-              - Emoji representation of the predicted emotion.
-            **Credits:**
-            - [SpeechBrain](https://speechbrain.github.io)
-            - [Gradio](https://gradio.app)
             """)
 if __name__ == "__main__":

 }
 def add_emoji_to_label(label):
+    """Append an emoji corresponding to the emotion label."""
     emoji = emotion_to_emoji.get(label.lower(), "")
     return f"{label.capitalize()} {emoji}"
+# Load the pre-trained SpeechBrain classifier
 classifier = foreign_class(
     source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
     pymodule_file="custom_interface.py",
       - Convert to 16kHz mono.
       - Optionally apply noise reduction.
       - Normalize the audio.
+    Saves the processed audio to a temporary file and returns its path.
     """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     if apply_noise_reduction and NOISEREDUCE_AVAILABLE:
         y = nr.reduce_noise(y=y, sr=sr)
     if np.max(np.abs(y)) > 0:
         y = y / np.max(np.abs(y))
     temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
     import soundfile as sf
     sf.write(temp_file.name, y, sr)
 def ensemble_prediction(audio_file, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
     """
+    For longer audio files, split into overlapping segments, predict each segment,
+    and return the majority-voted emotion label.
     """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     total_duration = librosa.get_duration(y=y, sr=sr)
+    # If the audio is short, process it directly
     if total_duration <= segment_duration:
         temp_file = preprocess_audio(audio_file, apply_noise_reduction)
         _, _, _, label = classifier.classify_file(temp_file)
         os.remove(temp_file)
         return label
     step = segment_duration - overlap
     segments = []
     for start in np.arange(0, total_duration - segment_duration + 0.001, step):
 def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False, segment_duration=3.0, overlap=1.0):
     """
+    Main prediction function:
       - Uses ensemble prediction if enabled.
       - Otherwise, processes the entire audio at once.
+      Returns the emotion label enhanced with an emoji.
     """
     try:
         if use_ensemble:
 def plot_waveform(audio_file):
     """
+    Generate and return a waveform plot image for the given audio file.
     """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     plt.figure(figsize=(10, 3))
 def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
     """
+    Run emotion prediction and generate a waveform plot.
+    Returns a tuple: (emotion label with emoji, waveform image).
     """
     emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
     waveform = plot_waveform(audio_file)
     with gr.Tabs():
         with gr.TabItem("Emotion Recognition"):
             with gr.Row():
+                # 'source' argument removed to avoid errors
                 audio_input = gr.Audio(type="filepath", label="Upload Audio")
             use_ensemble = gr.Checkbox(label="Use Ensemble Prediction (for long audio)", value=False)
             apply_noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=False)
         with gr.TabItem("About"):
             gr.Markdown("""
+**Enhanced Emotion Recognition App**
+- **Model:** SpeechBrain's wav2vec2 model fine-tuned on IEMOCAP for emotion recognition.
+- **Features:**
+  - Ensemble Prediction for long audio files.
+  - Optional Noise Reduction.
+  - Visualization of the audio waveform.
+  - Emoji representation of the predicted emotion.
+**Credits:**
+- [SpeechBrain](https://speechbrain.github.io)
+- [Gradio](https://gradio.app)
             """)
 if __name__ == "__main__":