Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Sleeping

File size: 4,229 Bytes

import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSequenceClassification, AutoTokenizer
import librosa
import numpy as np
import plotly.graph_objects as go

class ModelManager:
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.models = {}
        self.tokenizers = {}
        self.processors = {}
        self.load_models()
        
    def load_models(self):
        print("Loading Whisper model...")
        self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
        self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to(self.device)
        
        print("Loading emotion model...")
        self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
        self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base").to(self.device)

class AudioProcessor:
    def __init__(self):
        self.sample_rate = 16000
        self.n_mfcc = 13
        
    def process_audio(self, audio_path):
        waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
        return waveform, self._extract_features(waveform)
        
    def _extract_features(self, waveform):
        return {
            'mfcc': librosa.feature.mfcc(y=waveform, sr=self.sample_rate, n_mfcc=self.n_mfcc),
            'energy': librosa.feature.rms(y=waveform)[0]
        }

class Analyzer:
    def __init__(self):
        print("Initializing Analyzer...")
        self.model_manager = ModelManager()
        self.audio_processor = AudioProcessor()
        print("Analyzer initialization complete")
        
    def analyze(self, audio_path):
        print(f"Processing audio file: {audio_path}")
        # Process audio
        waveform, features = self.audio_processor.process_audio(audio_path)
        
        # Transcribe
        print("Transcribing audio...")
        inputs = self.model_manager.processors['whisper'](waveform, return_tensors="pt").input_features.to(self.model_manager.device)
        predicted_ids = self.model_manager.models['whisper'].generate(inputs)
        transcription = self.model_manager.processors['whisper'].batch_decode(predicted_ids, skip_special_tokens=True)[0]
        
        # Analyze emotions
        print("Analyzing emotions...")
        inputs = self.model_manager.tokenizers['emotion'](transcription, return_tensors="pt", padding=True, truncation=True)
        outputs = self.model_manager.models['emotion'](**inputs)
        emotions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        
        emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
        emotion_scores = {
            label: float(score) 
            for label, score in zip(emotion_labels, emotions[0])
        }
        
        return {
            'transcription': transcription,
            'emotions': emotion_scores
        }

def create_emotion_plot(emotions):
    fig = go.Figure(data=[
        go.Bar(x=list(emotions.keys()), y=list(emotions.values()))
    ])
    fig.update_layout(
        title='Emotion Analysis',
        yaxis_range=[0, 1]
    )
    return fig.to_html()

print("Initializing application...")
analyzer = Analyzer()

def process_audio(audio_file):
    try:
        print(f"Processing audio file: {audio_file}")
        results = analyzer.analyze(audio_file)
        
        return (
            results['transcription'],
            create_emotion_plot(results['emotions'])
        )
    except Exception as e:
        print(f"Error processing audio: {str(e)}")
        return str(e), "Error in analysis"

print("Creating Gradio interface...")
interface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs=[
        gr.Textbox(label="Transcription"),
        gr.HTML(label="Emotion Analysis")
    ],
    title="Vocal Biomarker Analysis",
    description="Analyze voice for emotional indicators"
)

print("Launching application...")
if __name__ == "__main__":
    interface.launch()