Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Sleeping

File size: 6,526 Bytes

import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSequenceClassification, AutoTokenizer
import librosa
import numpy as np
import plotly.graph_objects as go

class ModelManager:
    def __init__(self):
        self.device = torch.device("cpu")
        self.models = {}
        self.tokenizers = {}
        self.processors = {}
        self.load_models()
        
    def load_models(self):
        try:
            print("Loading Whisper model...")
            self.processors['whisper'] = WhisperProcessor.from_pretrained(
                "openai/whisper-base"  # Removed device_map parameter
            )
            self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained(
                "openai/whisper-base"  # Removed device_map parameter
            ).to(self.device)
            
            print("Loading emotion model...")
            self.tokenizers['emotion'] = AutoTokenizer.from_pretrained(
                "j-hartmann/emotion-english-distilroberta-base"
            )
            self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained(
                "j-hartmann/emotion-english-distilroberta-base"  # Removed device_map parameter
            ).to(self.device)
            
            print("Models loaded successfully")
        except Exception as e:
            print(f"Error loading models: {str(e)}")
            raise

class AudioProcessor:
    def __init__(self):
        self.sample_rate = 16000
        self.n_mfcc = 13
        
    def process_audio(self, audio_path):
        try:
            waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
            return waveform, self._extract_features(waveform)
        except Exception as e:
            print(f"Error processing audio: {str(e)}")
            raise
        
    def _extract_features(self, waveform):
        try:
            return {
                'mfcc': librosa.feature.mfcc(y=waveform, sr=self.sample_rate, n_mfcc=self.n_mfcc),
                'energy': librosa.feature.rms(y=waveform)[0]
            }
        except Exception as e:
            print(f"Error extracting features: {str(e)}")
            raise

class Analyzer:
    def __init__(self):
        print("Initializing Analyzer...")
        try:
            self.model_manager = ModelManager()
            self.audio_processor = AudioProcessor()
            print("Analyzer initialization complete")
        except Exception as e:
            print(f"Error initializing Analyzer: {str(e)}")
            raise
        
    def analyze(self, audio_path):
        try:
            print(f"Processing audio file: {audio_path}")
            waveform, features = self.audio_processor.process_audio(audio_path)
            
            print("Transcribing audio...")
            inputs = self.model_manager.processors['whisper'](
                waveform, 
                return_tensors="pt"
            ).input_features.to(self.model_manager.device)
            
            with torch.no_grad():
                predicted_ids = self.model_manager.models['whisper'].generate(inputs)
            transcription = self.model_manager.processors['whisper'].batch_decode(
                predicted_ids, 
                skip_special_tokens=True
            )[0]
            
            print("Analyzing emotions...")
            inputs = self.model_manager.tokenizers['emotion'](
                transcription,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=512
            )
            inputs = {k: v.to(self.model_manager.device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = self.model_manager.models['emotion'](**inputs)
            emotions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            
            emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
            emotion_scores = {
                label: float(score) 
                for label, score in zip(emotion_labels, emotions[0].cpu())
            }
            
            return {
                'transcription': transcription,
                'emotions': emotion_scores
            }
        except Exception as e:
            print(f"Error in analysis: {str(e)}")
            raise

def create_emotion_plot(emotions):
    try:
        fig = go.Figure(data=[
            go.Bar(
                x=list(emotions.keys()), 
                y=list(emotions.values()),
                marker_color='rgb(55, 83, 109)'
            )
        ])
        
        fig.update_layout(
            title='Emotion Analysis',
            xaxis_title='Emotion',
            yaxis_title='Score',
            yaxis_range=[0, 1],
            template='plotly_white',
            height=400
        )
        
        return fig.to_html(include_plotlyjs=True)
    except Exception as e:
        print(f"Error creating plot: {str(e)}")
        return "Error creating visualization"

def process_audio(audio_file):
    try:
        if audio_file is None:
            return "No audio file provided", "Please provide an audio file"
            
        print(f"Processing audio file: {audio_file}")
        results = analyzer.analyze(audio_file)
        
        return (
            results['transcription'],
            create_emotion_plot(results['emotions'])
        )
    except Exception as e:
        error_msg = f"Error processing audio: {str(e)}"
        print(error_msg)
        return error_msg, "Error in analysis"

if __name__ == "__main__":
    print("Initializing application...")
    try:
        analyzer = Analyzer()
        
        print("Creating Gradio interface...")
        interface = gr.Interface(
            fn=process_audio,
            inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
            outputs=[
                gr.Textbox(label="Transcription"),
                gr.HTML(label="Emotion Analysis")
            ],
            title="Vocal Biomarker Analysis",
            description="Analyze voice for emotional indicators",
            examples=[],
            cache_examples=False
        )
        
        print("Launching application...")
        interface.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=False
        )
    except Exception as e:
        print(f"Fatal error during application startup: {str(e)}")
        raise