Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Running

App Files Files Community

invincible-jha commited on Nov 18

Commit

0e04908

•

1 Parent(s): e10faf1

Upload app.py

Browse files

Files changed (1) hide show

app.py +92 -6

app.py CHANGED Viewed

@@ -14,16 +14,102 @@ class ModelManager:
         self.load_models()
     def load_models(self):
-        # Load Whisper
         self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
         self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to(self.device)
-        # Load emotion model (using open-source model)
         self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
         self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base").to(self.device)
-        # Load clinical model (using open-source model)
-        self.tokenizers['clinical'] = AutoTokenizer.from_pretrained("medicalai/ClinicalBERT")
-        self.models['clinical'] = AutoModelForSequenceClassification.from_pretrained("medicalai/ClinicalBERT").to(self.device)
-# Rest of the code remains the same...

         self.load_models()
     def load_models(self):
+        print("Loading Whisper model...")
         self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
         self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to(self.device)
+        print("Loading emotion model...")
         self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
         self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base").to(self.device)
+class AudioProcessor:
+    def __init__(self):
+        self.sample_rate = 16000
+        self.n_mfcc = 13
+    def process_audio(self, audio_path):
+        waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
+        return waveform, self._extract_features(waveform)
+    def _extract_features(self, waveform):
+        return {
+            'mfcc': librosa.feature.mfcc(y=waveform, sr=self.sample_rate, n_mfcc=self.n_mfcc),
+            'energy': librosa.feature.rms(y=waveform)[0]
+        }
+class Analyzer:
+    def __init__(self):
+        print("Initializing Analyzer...")
+        self.model_manager = ModelManager()
+        self.audio_processor = AudioProcessor()
+        print("Analyzer initialization complete")
+    def analyze(self, audio_path):
+        print(f"Processing audio file: {audio_path}")
+        # Process audio
+        waveform, features = self.audio_processor.process_audio(audio_path)
+        # Transcribe
+        print("Transcribing audio...")
+        inputs = self.model_manager.processors['whisper'](waveform, return_tensors="pt").input_features.to(self.model_manager.device)
+        predicted_ids = self.model_manager.models['whisper'].generate(inputs)
+        transcription = self.model_manager.processors['whisper'].batch_decode(predicted_ids, skip_special_tokens=True)[0]
+        # Analyze emotions
+        print("Analyzing emotions...")
+        inputs = self.model_manager.tokenizers['emotion'](transcription, return_tensors="pt", padding=True, truncation=True)
+        outputs = self.model_manager.models['emotion'](**inputs)
+        emotions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
+        emotion_scores = {
+            label: float(score)
+            for label, score in zip(emotion_labels, emotions[0])
+        }
+        return {
+            'transcription': transcription,
+            'emotions': emotion_scores
+        }
+def create_emotion_plot(emotions):
+    fig = go.Figure(data=[
+        go.Bar(x=list(emotions.keys()), y=list(emotions.values()))
+    ])
+    fig.update_layout(
+        title='Emotion Analysis',
+        yaxis_range=[0, 1]
+    )
+    return fig.to_html()
+print("Initializing application...")
+analyzer = Analyzer()
+def process_audio(audio_file):
+    try:
+        print(f"Processing audio file: {audio_file}")
+        results = analyzer.analyze(audio_file)
+        return (
+            results['transcription'],
+            create_emotion_plot(results['emotions'])
+        )
+    except Exception as e:
+        print(f"Error processing audio: {str(e)}")
+        return str(e), "Error in analysis"
+print("Creating Gradio interface...")
+interface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(source="microphone", type="filepath"),
+    outputs=[
+        gr.Textbox(label="Transcription"),
+        gr.HTML(label="Emotion Analysis")
+    ],
+    title="Vocal Biomarker Analysis",
+    description="Analyze voice for emotional indicators"
+)
+print("Launching application...")
+if __name__ == "__main__":
+    interface.launch()