Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Running

App Files Files Community

invincible-jha commited on Nov 18

Commit

1cd7ce8

•

1 Parent(s): 784383b

Upload app.py

Browse files

Files changed (1) hide show

app.py +62 -48

app.py CHANGED Viewed

@@ -14,24 +14,27 @@ class ModelManager:
         self.load_models()
     def load_models(self):
-        print("Loading Whisper model...")
-        self.processors['whisper'] = WhisperProcessor.from_pretrained(
-            "openai/whisper-base",
-            device_map="cpu"
-        )
-        self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained(
-            "openai/whisper-base",
-            device_map="cpu"
-        )
-        print("Loading emotion model...")
-        self.tokenizers['emotion'] = AutoTokenizer.from_pretrained(
-            "j-hartmann/emotion-english-distilroberta-base"
-        )
-        self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained(
-            "j-hartmann/emotion-english-distilroberta-base",
-            device_map="cpu"
-        )
 class AudioProcessor:
     def __init__(self):
@@ -59,9 +62,13 @@ class AudioProcessor:
 class Analyzer:
     def __init__(self):
         print("Initializing Analyzer...")
-        self.model_manager = ModelManager()
-        self.audio_processor = AudioProcessor()
-        print("Analyzer initialization complete")
     def analyze(self, audio_path):
         try:
@@ -72,9 +79,10 @@ class Analyzer:
             inputs = self.model_manager.processors['whisper'](
                 waveform,
                 return_tensors="pt"
-            ).input_features
-            predicted_ids = self.model_manager.models['whisper'].generate(inputs)
             transcription = self.model_manager.processors['whisper'].batch_decode(
                 predicted_ids,
                 skip_special_tokens=True
@@ -88,14 +96,16 @@ class Analyzer:
                 truncation=True,
                 max_length=512
             )
-            outputs = self.model_manager.models['emotion'](**inputs)
             emotions = torch.nn.functional.softmax(outputs.logits, dim=-1)
             emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
             emotion_scores = {
                 label: float(score)
-                for label, score in zip(emotion_labels, emotions[0])
             }
             return {
@@ -130,9 +140,6 @@ def create_emotion_plot(emotions):
         print(f"Error creating plot: {str(e)}")
         return "Error creating visualization"
-print("Initializing application...")
-analyzer = Analyzer()
 def process_audio(audio_file):
     try:
         if audio_file is None:
@@ -150,24 +157,31 @@ def process_audio(audio_file):
         print(error_msg)
         return error_msg, "Error in analysis"
-print("Creating Gradio interface...")
-interface = gr.Interface(
-    fn=process_audio,
-    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
-    outputs=[
-        gr.Textbox(label="Transcription"),
-        gr.HTML(label="Emotion Analysis")
-    ],
-    title="Vocal Biomarker Analysis",
-    description="Analyze voice for emotional indicators",
-    examples=[],
-    cache_examples=False
-)
 if __name__ == "__main__":
-    print("Launching application...")
-    interface.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

         self.load_models()
     def load_models(self):
+        try:
+            print("Loading Whisper model...")
+            self.processors['whisper'] = WhisperProcessor.from_pretrained(
+                "openai/whisper-base"  # Removed device_map parameter
+            )
+            self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained(
+                "openai/whisper-base"  # Removed device_map parameter
+            ).to(self.device)
+            print("Loading emotion model...")
+            self.tokenizers['emotion'] = AutoTokenizer.from_pretrained(
+                "j-hartmann/emotion-english-distilroberta-base"
+            )
+            self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained(
+                "j-hartmann/emotion-english-distilroberta-base"  # Removed device_map parameter
+            ).to(self.device)
+            print("Models loaded successfully")
+        except Exception as e:
+            print(f"Error loading models: {str(e)}")
+            raise
 class AudioProcessor:
     def __init__(self):
 class Analyzer:
     def __init__(self):
         print("Initializing Analyzer...")
+        try:
+            self.model_manager = ModelManager()
+            self.audio_processor = AudioProcessor()
+            print("Analyzer initialization complete")
+        except Exception as e:
+            print(f"Error initializing Analyzer: {str(e)}")
+            raise
     def analyze(self, audio_path):
         try:
             inputs = self.model_manager.processors['whisper'](
                 waveform,
                 return_tensors="pt"
+            ).input_features.to(self.model_manager.device)
+            with torch.no_grad():
+                predicted_ids = self.model_manager.models['whisper'].generate(inputs)
             transcription = self.model_manager.processors['whisper'].batch_decode(
                 predicted_ids,
                 skip_special_tokens=True
                 truncation=True,
                 max_length=512
             )
+            inputs = {k: v.to(self.model_manager.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                outputs = self.model_manager.models['emotion'](**inputs)
             emotions = torch.nn.functional.softmax(outputs.logits, dim=-1)
             emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
             emotion_scores = {
                 label: float(score)
+                for label, score in zip(emotion_labels, emotions[0].cpu())
             }
             return {
         print(f"Error creating plot: {str(e)}")
         return "Error creating visualization"
 def process_audio(audio_file):
     try:
         if audio_file is None:
         print(error_msg)
         return error_msg, "Error in analysis"
 if __name__ == "__main__":
+    print("Initializing application...")
+    try:
+        analyzer = Analyzer()
+        print("Creating Gradio interface...")
+        interface = gr.Interface(
+            fn=process_audio,
+            inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
+            outputs=[
+                gr.Textbox(label="Transcription"),
+                gr.HTML(label="Emotion Analysis")
+            ],
+            title="Vocal Biomarker Analysis",
+            description="Analyze voice for emotional indicators",
+            examples=[],
+            cache_examples=False
+        )
+        print("Launching application...")
+        interface.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False
+        )
+    except Exception as e:
+        print(f"Fatal error during application startup: {str(e)}")
+        raise