Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Running

App Files Files Community

invincible-jha commited on 28 days ago

Commit

d13902f

•

1 Parent(s): 3f4b577

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -51

app.py CHANGED Viewed

@@ -15,6 +15,10 @@ from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Suppress warnings
 warnings.filterwarnings('ignore')
@@ -30,17 +34,26 @@ def load_models():
     global processor, whisper_model, emotion_tokenizer, emotion_model
     try:
-        # Load Whisper model
         print("Loading Whisper model...")
-        processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
-        whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
-        # Load emotion model
         print("Loading emotion model...")
-        emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
-        emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
-        # Set device
         device = "cpu"
         whisper_model.to(device)
         emotion_model.to(device)
@@ -136,75 +149,147 @@ def extract_prosodic_features(waveform, sr):
         return None
 class ClinicalVoiceAnalyzer:
-    """Clinical voice analysis and interpretation."""
     def __init__(self):
         """Initialize analyzer with API and reference ranges."""
-        self.anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
-        self.model = "claude-3-opus-20240229"
-        self.reference_ranges = {
-            'pitch': {'min': 150, 'max': 400},
-            'tempo': {'min': 90, 'max': 130},
-            'energy': {'min': 0.01, 'max': 0.05}
-        }
-        print("Clinical analyzer ready")
     def analyze_voice_metrics(self, features, emotions, transcription):
-        """Analyze voice metrics and generate clinical insights."""
         try:
             prompt = self._create_clinical_prompt(features, emotions, transcription)
             response = self.anthropic.messages.create(
                 model=self.model,
                 max_tokens=1000,
-                messages=[{"role": "user", "content": prompt}]
             )
-            return self._format_analysis(response.content)
         except Exception as e:
             print(f"Clinical analysis error: {e}")
             return self._generate_backup_analysis(features, emotions)
     def _create_clinical_prompt(self, features, emotions, transcription):
-        """Create clinical analysis prompt."""
-        return f"""As a clinical voice analysis expert, provide a psychological assessment of:
-Voice Metrics:
-- Pitch: {features['pitch_mean']:.2f} Hz (Normal: {self.reference_ranges['pitch']['min']}-{self.reference_ranges['pitch']['max']} Hz)
 - Pitch Variation: {features['pitch_std']:.2f} Hz
-- Speech Rate: {features['tempo']:.2f} BPM (Normal: {self.reference_ranges['tempo']['min']}-{self.reference_ranges['tempo']['max']} BPM)
-- Voice Energy: {features['energy_mean']:.4f}
-Emotions Detected:
 {', '.join(f'{emotion}: {score:.1%}' for emotion, score in emotions.items())}
 Speech Content:
 "{transcription}"
-Provide:
-1. Voice characteristic analysis
-2. Emotional state assessment
-3. Anxiety/depression indicators
-4. Stress level evaluation
-5. Clinical recommendations"""
     def _format_analysis(self, analysis):
-        """Format clinical analysis output."""
         return f"\nClinical Assessment:\n{analysis}"
     def _generate_backup_analysis(self, features, emotions):
-        """Generate backup analysis when API fails."""
-        dominant_emotion = max(emotions.items(), key=lambda x: x[1])
-        pitch_status = (
-            "elevated" if features['pitch_mean'] > self.reference_ranges['pitch']['max']
-            else "reduced" if features['pitch_mean'] < self.reference_ranges['pitch']['min']
-            else "normal"
-        )
-        return f"""
-Basic Voice Analysis (API Unavailable):
 - Pitch Status: {pitch_status} ({features['pitch_mean']:.2f} Hz)
-- Speech Rate: {features['tempo']:.2f} BPM
-- Voice Energy Level: {features['energy_mean']:.4f}
-- Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)"""
 def create_feature_plots(features):
     """Create visualizations for voice features."""
@@ -334,10 +419,12 @@ def analyze_audio(audio_input):
         global clinical_analyzer
         if clinical_analyzer is None:
             clinical_analyzer = ClinicalVoiceAnalyzer()
         clinical_analysis = clinical_analyzer.analyze_voice_metrics(
             features, emotion_scores, transcription
         )
         # Create summary with fixed string formatting
         summary = f"""Voice Analysis Summary:
@@ -369,9 +456,11 @@ Recording Duration: {duration:.2f} seconds
 try:
     print("===== Application Startup =====")
     if not load_models():
         raise RuntimeError("Model loading failed")
     clinical_analyzer = ClinicalVoiceAnalyzer()
     print("Clinical analyzer initialized")
@@ -428,10 +517,10 @@ Upload an audio file or record directly through your microphone."""
     if __name__ == "__main__":
         demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=False,
-            debug=False
         )
 except Exception as e:

 # Load environment variables
 load_dotenv()
+# Get API tokens
+ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', 'your_anthropic_api_key')
+HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN', 'your_huggingface_api_token')
 # Suppress warnings
 warnings.filterwarnings('ignore')
     global processor, whisper_model, emotion_tokenizer, emotion_model
     try:
         print("Loading Whisper model...")
+        processor = WhisperProcessor.from_pretrained(
+            "openai/whisper-tiny",
+            use_auth_token=HUGGINGFACE_TOKEN
+        )
+        whisper_model = WhisperForConditionalGeneration.from_pretrained(
+            "openai/whisper-tiny",
+            use_auth_token=HUGGINGFACE_TOKEN
+        )
         print("Loading emotion model...")
+        emotion_tokenizer = AutoTokenizer.from_pretrained(
+            "j-hartmann/emotion-english-distilroberta-base",
+            use_auth_token=HUGGINGFACE_TOKEN
+        )
+        emotion_model = AutoModelForSequenceClassification.from_pretrained(
+            "j-hartmann/emotion-english-distilroberta-base",
+            use_auth_token=HUGGINGFACE_TOKEN
+        )
         device = "cpu"
         whisper_model.to(device)
         emotion_model.to(device)
         return None
 class ClinicalVoiceAnalyzer:
+    """Analyze voice characteristics for psychological indicators."""
     def __init__(self):
         """Initialize analyzer with API and reference ranges."""
+        try:
+            if not ANTHROPIC_API_KEY:
+                raise ValueError("ANTHROPIC_API_KEY not found in environment variables")
+            self.anthropic = Anthropic(api_key=ANTHROPIC_API_KEY)
+            self.model = "claude-3-opus-20240229"
+            self.reference_ranges = {
+                'pitch': {'min': 150, 'max': 400},
+                'tempo': {'min': 90, 'max': 130},
+                'energy': {'min': 0.01, 'max': 0.05}
+            }
+            print("Clinical analyzer ready")
+        except Exception as e:
+            print(f"Error initializing clinical analyzer: {e}")
+            self.anthropic = None
     def analyze_voice_metrics(self, features, emotions, transcription):
+        """Generate clinical insights from voice and emotion data."""
         try:
+            if not self.anthropic:
+                return self._generate_backup_analysis(features, emotions)
             prompt = self._create_clinical_prompt(features, emotions, transcription)
+            print("Sending analysis request to Anthropic API...")
             response = self.anthropic.messages.create(
                 model=self.model,
                 max_tokens=1000,
+                messages=[{
+                    "role": "user",
+                    "content": prompt
+                }],
+                temperature=0.7
             )
+            if response and hasattr(response, 'content'):
+                print("Received response from Anthropic API")
+                return self._format_analysis(response.content)
+            else:
+                print("No valid response from API")
+                return self._generate_backup_analysis(features, emotions)
         except Exception as e:
             print(f"Clinical analysis error: {e}")
             return self._generate_backup_analysis(features, emotions)
     def _create_clinical_prompt(self, features, emotions, transcription):
+        """Create detailed prompt for clinical analysis."""
+        prompt = f"""As a clinical voice analysis expert, provide a detailed psychological assessment based on the following data:
+Voice Characteristics Analysis:
+- Pitch: {features['pitch_mean']:.2f} Hz (Normal range: {self.reference_ranges['pitch']['min']}-{self.reference_ranges['pitch']['max']} Hz)
 - Pitch Variation: {features['pitch_std']:.2f} Hz
+- Speech Rate: {features['tempo']:.2f} BPM (Normal range: {self.reference_ranges['tempo']['min']}-{self.reference_ranges['tempo']['max']} BPM)
+- Voice Energy Level: {features['energy_mean']:.4f} (Normal range: {self.reference_ranges['energy']['min']}-{self.reference_ranges['energy']['max']})
+Emotional Analysis:
 {', '.join(f'{emotion}: {score:.1%}' for emotion, score in emotions.items())}
 Speech Content:
 "{transcription}"
+Please provide a comprehensive assessment including:
+1. Detailed voice characteristic analysis and what it indicates about mental state
+2. Assessment of emotional state based on both voice features and detected emotions
+3. Potential indicators of anxiety, depression, or other mental health concerns
+4. Evaluation of stress levels and emotional stability
+5. Specific recommendations for mental health professionals or further assessment if needed
+Base your analysis on established clinical research connecting voice biomarkers to psychological states."""
+        print(f"Generated prompt length: {len(prompt)} characters")
+        return prompt
     def _format_analysis(self, analysis):
+        """Format the clinical analysis output."""
         return f"\nClinical Assessment:\n{analysis}"
     def _generate_backup_analysis(self, features, emotions):
+        """Generate basic analysis when API is unavailable."""
+        try:
+            dominant_emotion = max(emotions.items(), key=lambda x: x[1])
+            pitch_status = (
+                "elevated" if features['pitch_mean'] > self.reference_ranges['pitch']['max']
+                else "reduced" if features['pitch_mean'] < self.reference_ranges['pitch']['min']
+                else "normal"
+            )
+            tempo_status = (
+                "rapid" if features['tempo'] > self.reference_ranges['tempo']['max']
+                else "slow" if features['tempo'] < self.reference_ranges['tempo']['min']
+                else "normal"
+            )
+            energy_status = (
+                "high" if features['energy_mean'] > self.reference_ranges['energy']['max']
+                else "low" if features['energy_mean'] < self.reference_ranges['energy']['min']
+                else "normal"
+            )
+            return f"""
+Detailed Voice Analysis:
 - Pitch Status: {pitch_status} ({features['pitch_mean']:.2f} Hz)
+- Speech Rate: {features['tempo']:.2f} BPM ({tempo_status})
+- Voice Energy Level: {features['energy_mean']:.4f} ({energy_status})
+- Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)
+Potential Indicators:
+- Pitch: {self._interpret_pitch(features['pitch_mean'], pitch_status)}
+- Rate: {self._interpret_tempo(features['tempo'], tempo_status)}
+- Energy: {self._interpret_energy(features['energy_mean'], energy_status)}
+"""
+        except Exception as e:
+            print(f"Error in backup analysis: {e}")
+            return "Error generating analysis. Please try again."
+    def _interpret_pitch(self, pitch, status):
+        if status == "elevated":
+            return "May indicate heightened stress or anxiety"
+        elif status == "reduced":
+            return "Could suggest low energy or depressed mood"
+        return "Within normal range, suggesting stable emotional state"
+    def _interpret_tempo(self, tempo, status):
+        if status == "rapid":
+            return "May indicate anxiety or agitation"
+        elif status == "slow":
+            return "Could suggest fatigue or low mood"
+        return "Normal pacing indicates balanced emotional state"
+    def _interpret_energy(self, energy, status):
+        if status == "high":
+            return "May indicate heightened emotional state or agitation"
+        elif status == "low":
+            return "Could suggest reduced emotional expression or fatigue"
+        return "Appropriate energy level suggests emotional stability"
 def create_feature_plots(features):
     """Create visualizations for voice features."""
         global clinical_analyzer
         if clinical_analyzer is None:
             clinical_analyzer = ClinicalVoiceAnalyzer()
+        print("Initiating clinical analysis...")  # Debug log
         clinical_analysis = clinical_analyzer.analyze_voice_metrics(
             features, emotion_scores, transcription
         )
+        print("Clinical analysis completed")  # Debug log
         # Create summary with fixed string formatting
         summary = f"""Voice Analysis Summary:
 try:
     print("===== Application Startup =====")
+    # Load required models with authentication
     if not load_models():
         raise RuntimeError("Model loading failed")
+    # Initialize clinical analyzer with authentication
     clinical_analyzer = ClinicalVoiceAnalyzer()
     print("Clinical analyzer initialized")
     if __name__ == "__main__":
         demo.launch(
+            server_name="0.0.0.0",  # Allow external access
+            server_port=7860,        # Default Gradio port
+            share=False,             # Disable public URL generation
+            debug=False              # Disable debug mode in production
         )
 except Exception as e: