Spaces:

Boltz79
/

Sentiment-Analysis

Sleeping

App Files Files Community

Boltz79 commited on Jan 31

Commit

def04d4

verified ·

1 Parent(s): 786ea23

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -45

app.py CHANGED Viewed

@@ -1,53 +1,110 @@
 import gradio as gr
 from transformers import pipeline
-# Load Whisper for speech-to-text
-whisper = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
-# Load a sentiment analysis model
-sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
-# Function to process audio and analyze tone
-def analyze_call(audio_file):
-    try:
-        # Step 1: Transcribe audio to text using Whisper
-        transcription = whisper(audio_file)["text"]
-        # Step 2: Analyze sentiment of the transcription
-        sentiment_result = sentiment_analyzer(transcription)[0]
-        # Prepare the output
-        output = {
-            "transcription": transcription,
-            "sentiment": sentiment_result["label"],
-            "confidence": round(sentiment_result["score"], 4)
-        }
-        return output
-    except Exception as e:
-        return {"error": str(e)}
-# Gradio Interface
-def gradio_interface(audio):
-    if audio is None:
-        return "Please record or upload an audio file."
-    result = analyze_call(audio)
-    if "error" in result:
-        return f"Error: {result['error']}"
-    return (
-        f"**Transcription:** {result['transcription']}\n\n"
-        f"**Sentiment:** {result['sentiment']}\n\n"
-        f"**Confidence:** {result['confidence']}"
-    )
-# Create Gradio app
-interface = gr.Interface(
-    fn=gradio_interface,
-    inputs=gr.Audio(type="filepath", label="Record or Upload Audio"),
-    outputs=gr.Textbox(label="Analysis Result", lines=5),
-    title="Real-Time Call Analysis",
-    description="Record or upload audio to analyze tone and sentiment in real time.",
-    live=False  # Set to False to avoid constant re-runs
-)
-# Launch the app
-interface.launch()

 import gradio as gr
+import numpy as np
+import torch
 from transformers import pipeline
+import librosa
+class EmotionRecognizer:
+    def __init__(self):
+        self.classifier = pipeline(
+            "audio-classification",
+            model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
+            device=0 if torch.cuda.is_available() else -1
+        )
+        self.sample_rate = 16000
+    def process_audio(self, audio_input):
+        try:
+            # Extract audio data and sample rate from gradio input
+            sample_rate, audio_data = audio_input
+            # Convert stereo to mono if necessary
+            if len(audio_data.shape) > 1:
+                audio_data = np.mean(audio_data, axis=1)
+            # Convert to float32 and normalize
+            audio_data = audio_data.astype(np.float32)
+            if np.max(np.abs(audio_data)) > 1.0:
+                audio_data = audio_data / np.max(np.abs(audio_data))
+            # Resample if necessary
+            if sample_rate != self.sample_rate:
+                audio_data = librosa.resample(
+                    y=audio_data,
+                    orig_sr=sample_rate,
+                    target_sr=self.sample_rate
+                )
+            # Ensure the audio isn't too short
+            if len(audio_data) < self.sample_rate:
+                # Pad audio if it's too short
+                audio_data = np.pad(audio_data, (0, self.sample_rate - len(audio_data)))
+            elif len(audio_data) > 10 * self.sample_rate:
+                # Take first 10 seconds if audio is too long
+                audio_data = audio_data[:10 * self.sample_rate]
+            # Make prediction
+            result = self.classifier({"array": audio_data, "sampling_rate": self.sample_rate})
+            # Format results
+            emotions_text = "\n".join([
+                f"{pred['label']}: {pred['score']*100:.2f}%"
+                for pred in result
+            ])
+            # Prepare plot data
+            plot_data = {
+                "labels": [pred['label'] for pred in result],
+                "values": [pred['score'] * 100 for pred in result]
+            }
+            return emotions_text, plot_data
+        except Exception as e:
+            print(f"Error details: {str(e)}")
+            return f"Error processing audio: {str(e)}", None
+def create_interface():
+    recognizer = EmotionRecognizer()
+    def process_audio_file(audio):
+        if audio is None:
+            return "Please provide an audio input.", None
+        return recognizer.process_audio(audio)
+    with gr.Blocks() as interface:
+        gr.Markdown("# Audio Emotion Recognition")
+        gr.Markdown("Record or upload audio to analyze the emotional content. The model works best with clear speech in English.")
+        with gr.Row():
+            with gr.Column():
+                audio_input = gr.Audio(
+                    label="Upload or Record Audio",
+                    type="numpy",
+                    sources=["microphone", "upload"],
+                )
+                analyze_btn = gr.Button("Analyze Emotion")
+                gr.Markdown("Note: Audio will be automatically converted to mono and resampled if needed.")
+            with gr.Column():
+                output_text = gr.Textbox(
+                    label="Results",
+                    lines=5
+                )
+                output_plot = gr.BarPlot(
+                    title="Emotion Confidence Scores",
+                    x_title="Emotions",
+                    y_title="Confidence (%)"
+                )
+        analyze_btn.click(
+            fn=process_audio_file,
+            inputs=[audio_input],
+            outputs=[output_text, output_plot]
+        )
+    return interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(share=True)