Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Runtime error

App Files Files Community

invincible-jha commited on Nov 27, 2024

Commit

978a6ce

verified ·

1 Parent(s): e666e44

Upload 2 files

Browse files

Files changed (2) hide show

app.py +79 -257
requirements.txt +12 -1

app.py CHANGED Viewed

@@ -1,239 +1,82 @@
-import gradio as gr
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSequenceClassification, AutoTokenizer
-import librosa
-import numpy as np
-import plotly.graph_objects as go
-import warnings
 import os
-from scipy.stats import kurtosis, skew
-warnings.filterwarnings('ignore')
-def extract_prosodic_features(waveform, sr):
-    """Extract prosodic features from audio"""
-    try:
-        features = {}
-        # 1. Pitch (F0) Features
-        pitches, magnitudes = librosa.piptrack(y=waveform, sr=sr)
-        f0_contour = []
-        for t in range(pitches.shape[1]):
-            pitches_at_t = pitches[:, t]
-            mags = magnitudes[:, t]
-            pitch_index = mags.argmax()
-            f0_contour.append(pitches[pitch_index, t])
-        f0_contour = np.array(f0_contour)
-        f0_contour = f0_contour[f0_contour > 0]  # Remove zero pitches
-        if len(f0_contour) > 0:
-            features['pitch_mean'] = np.mean(f0_contour)
-            features['pitch_std'] = np.std(f0_contour)
-            features['pitch_range'] = np.ptp(f0_contour)
-        else:
-            features['pitch_mean'] = 0
-            features['pitch_std'] = 0
-            features['pitch_range'] = 0
-        # 2. Energy/Intensity Features
-        rms = librosa.feature.rms(y=waveform)[0]
-        features['energy_mean'] = np.mean(rms)
-        features['energy_std'] = np.std(rms)
-        features['energy_range'] = np.ptp(rms)
-        # 3. Rhythm Features
-        onset_env = librosa.onset.onset_strength(y=waveform, sr=sr)
-        tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
-        features['tempo'] = tempo[0]
-        # 4. Voice Quality Features
-        spectral_centroids = librosa.feature.spectral_centroid(y=waveform, sr=sr)[0]
-        features['spectral_centroid_mean'] = np.mean(spectral_centroids)
-        spectral_rolloff = librosa.feature.spectral_rolloff(y=waveform, sr=sr)[0]
-        features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
-        # 5. MFCC Features
-        mfccs = librosa.feature.mfcc(y=waveform, sr=sr, n_mfcc=13)
-        for i in range(13):
-            features[f'mfcc_{i}_mean'] = np.mean(mfccs[i])
-            features[f'mfcc_{i}_std'] = np.std(mfccs[i])
-        return features
-    except Exception as e:
-        print(f"Error in extract_prosodic_features: {str(e)}")
-        return None
-def create_feature_plots(features):
-    """Create visualizations for audio features"""
-    try:
-        # Create main figure with subplots
-        fig = go.Figure()
-        # 1. Pitch Features
-        pitch_data = {
-            'Mean': features['pitch_mean'],
-            'Std Dev': features['pitch_std'],
-            'Range': features['pitch_range']
-        }
-        fig.add_trace(go.Bar(
-            name='Pitch Features',
-            x=list(pitch_data.keys()),
-            y=list(pitch_data.values()),
-            marker_color='blue'
-        ))
-        # 2. Energy Features
-        energy_data = {
-            'Mean': features['energy_mean'],
-            'Std Dev': features['energy_std'],
-            'Range': features['energy_range']
         }
-        fig.add_trace(go.Bar(
-            name='Energy Features',
-            x=[f"Energy {k}" for k in energy_data.keys()],
-            y=list(energy_data.values()),
-            marker_color='red'
-        ))
-        # 3. MFCC Plot
-        mfcc_means = [features[f'mfcc_{i}_mean'] for i in range(13)]
-        fig.add_trace(go.Scatter(
-            name='MFCC Coefficients',
-            y=mfcc_means,
-            mode='lines+markers',
-            marker_color='green'
-        ))
-        # Update layout
-        fig.update_layout(
-            title='Voice Feature Analysis',
-            showlegend=True,
-            height=600,
-            barmode='group'
-        )
-        return fig.to_html(include_plotlyjs=True)
-    except Exception as e:
-        print(f"Error in create_feature_plots: {str(e)}")
-        return None
-def load_models():
-    """Initialize and load all required models"""
-    global processor, whisper_model, emotion_tokenizer, emotion_model
-    try:
-        print("Loading Whisper model...")
-        processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
-        whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
-        print("Loading emotion model...")
-        emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
-        emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
-        whisper_model.to("cpu")
-        emotion_model.to("cpu")
-        print("Models loaded successfully!")
-        return True
-    except Exception as e:
-        print(f"Error loading models: {str(e)}")
-        return False
-def create_emotion_plot(emotions):
-    """Create emotion analysis visualization"""
-    try:
-        fig = go.Figure(data=[
-            go.Bar(
-                x=list(emotions.keys()),
-                y=list(emotions.values()),
-                marker_color='rgb(55, 83, 109)'
             )
-        ])
-        fig.update_layout(
-            title='Emotion Analysis',
-            xaxis_title='Emotion',
-            yaxis_title='Score',
-            yaxis_range=[0, 1],
-            template='plotly_white',
-            height=400
-        )
-        return fig.to_html(include_plotlyjs=True)
-    except Exception as e:
-        print(f"Error creating emotion plot: {str(e)}")
-        return None
 def analyze_audio(audio_input):
-    """Main function to analyze audio input"""
     try:
-        if audio_input is None:
-            return "Please provide an audio input", None, None
-        print(f"Processing audio input: {type(audio_input)}")
-        # Handle audio input
-        if isinstance(audio_input, tuple):
-            audio_path = audio_input[0]  # Get file path from tuple
-        else:
-            audio_path = audio_input
-        print(f"Loading audio from path: {audio_path}")
-        # Load audio
-        waveform, sr = librosa.load(audio_path, sr=16000)
-        print(f"Audio loaded: {waveform.shape}, SR: {sr}")
-        # Extract voice features
-        print("Extracting voice features...")
-        features = extract_prosodic_features(waveform, sr)
-        if features is None:
-            return "Error extracting voice features", None, None
-        # Create feature plots
-        print("Creating feature visualizations...")
-        feature_viz = create_feature_plots(features)
-        if feature_viz is None:
-            return "Error creating feature visualizations", None, None
-        # Transcribe audio
-        print("Transcribing audio...")
-        inputs = processor(waveform, sampling_rate=sr, return_tensors="pt").input_features
-        with torch.no_grad():
-            predicted_ids = whisper_model.generate(inputs)
-        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-        # Analyze emotions
-        print("Analyzing emotions...")
-        emotion_inputs = emotion_tokenizer(
-            transcription,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=512
-        )
-        with torch.no_grad():
-            emotion_outputs = emotion_model(**emotion_inputs)
-        emotions = torch.nn.functional.softmax(emotion_outputs.logits, dim=-1)
-        emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
-        emotion_scores = {
-            label: float(score)
-            for label, score in zip(emotion_labels, emotions[0].cpu().numpy())
-        }
-        # Create emotion visualization
-        emotion_viz = create_emotion_plot(emotion_scores)
-        if emotion_viz is None:
-            return "Error creating emotion visualization", None, None
-        # Create analysis summary
         summary = f"""Voice Analysis Summary:
 Speech Content:
@@ -246,21 +89,20 @@ Voice Characteristics:
 - Voice Energy: {features['energy_mean']:.4f}
 Dominant Emotion: {max(emotion_scores.items(), key=lambda x: x[1])[0]}
 """
-        return summary, emotion_viz, feature_viz
     except Exception as e:
         error_msg = f"Error in audio analysis: {str(e)}"
         print(error_msg)
-        return error_msg, None, None
-# Load models at startup
-print("Initializing application...")
-if not load_models():
-    raise RuntimeError("Failed to load required models")
-# Create Gradio interface
 demo = gr.Interface(
     fn=analyze_audio,
     inputs=gr.Audio(
@@ -269,32 +111,12 @@ demo = gr.Interface(
         label="Audio Input"
     ),
     outputs=[
-        gr.Textbox(label="Analysis Summary", lines=10),
         gr.HTML(label="Emotion Analysis"),
-        gr.HTML(label="Voice Feature Analysis")
     ],
-    title="Voice Analysis System",
-    description="""
-    This application analyzes voice recordings to extract various characteristics:
-    1. Voice Features:
-       - Pitch analysis
-       - Energy patterns
-       - Speech rate
-       - Voice quality
-    2. Emotional Content:
-       - Emotion detection
-       - Emotional intensity
-    3. Speech Content:
-       - Text transcription
-    Upload an audio file or record directly through your microphone.
-    """,
-    examples=None,
-    cache_examples=False
-)
-if __name__ == "__main__":
-    demo.launch(share=True)

 import os
+from anthropic import Anthropic
+import gradio as gr
+# ... (your existing imports)
+class ClinicalVoiceAnalyzer:
+    def __init__(self):
+        # Initialize without the API key first
+        self.anthropic = None
+        self.model = "claude-3-opus-20240229"
+        self.api_key = os.getenv('ANTHROPIC_API_KEY')
+        # Reference ranges remain the same
+        self.reference_ranges = {
+            'pitch': {'min': 150, 'max': 400},
+            'tempo': {'min': 90, 'max': 130},
+            'energy': {'min': 0.01, 'max': 0.05}
         }
+        # Initialize Anthropic client if API key is available
+        self._initialize_anthropic()
+    def _initialize_anthropic(self):
+        """Safely initialize the Anthropic client"""
+        try:
+            if self.api_key:
+                self.anthropic = Anthropic(api_key=self.api_key)
+                print("Anthropic client initialized successfully")
+            else:
+                print("Warning: ANTHROPIC_API_KEY not found in environment variables")
+        except Exception as e:
+            print(f"Error initializing Anthropic client: {str(e)}")
+            self.anthropic = None
+    def generate_clinical_analysis(self, voice_features):
+        """Generate clinical analysis with fallback behavior"""
+        if not self.anthropic:
+            return self._generate_fallback_analysis(voice_features), {}
+        try:
+            prompt = self._construct_analysis_prompt(voice_features)
+            response = self.anthropic.messages.create(
+                model=self.model,
+                max_tokens=1000,
+                messages=[{
+                    "role": "user",
+                    "content": prompt
+                }]
             )
+            return response.content, self._parse_clinical_response(response.content)
+        except Exception as e:
+            print(f"Error in clinical analysis: {str(e)}")
+            return self._generate_fallback_analysis(voice_features), {}
+    def _generate_fallback_analysis(self, features):
+        """Generate basic analysis when Anthropic API is unavailable"""
+        pitch_status = "elevated" if features['pitch_mean'] > self.reference_ranges['pitch']['max'] else "normal"
+        tempo_status = "elevated" if features['tempo'] > self.reference_ranges['tempo']['max'] else "normal"
+        return f"""Basic Voice Analysis:
+Pitch Analysis: {pitch_status} ({features['pitch_mean']:.2f} Hz)
+Speech Rate: {tempo_status} ({features['tempo']:.2f} BPM)
+Energy Level: {features['energy_mean']:.4f}
+Note: This is a basic analysis. For detailed clinical interpretation, please ensure the Anthropic API key is configured."""
+    # ... (rest of your ClinicalVoiceAnalyzer methods remain the same)
+# Modified analyze_audio function
 def analyze_audio(audio_input):
     try:
+        # Your existing audio processing code...
+        # Initialize clinical analyzer with graceful fallback
+        clinical_analyzer = ClinicalVoiceAnalyzer()
+        clinical_analysis, clinical_insights = clinical_analyzer.generate_clinical_analysis(features)
+        # Create enhanced summary
         summary = f"""Voice Analysis Summary:
 Speech Content:
 - Voice Energy: {features['energy_mean']:.4f}
 Dominant Emotion: {max(emotion_scores.items(), key=lambda x: x[1])[0]}
+Clinical Analysis:
+{clinical_analysis}
 """
+        return summary, emotion_viz, feature_viz, clinical_insights
     except Exception as e:
         error_msg = f"Error in audio analysis: {str(e)}"
         print(error_msg)
+        return error_msg, None, None, None
+# ... (rest of your existing code)
+# Modified Gradio interface
 demo = gr.Interface(
     fn=analyze_audio,
     inputs=gr.Audio(
         label="Audio Input"
     ),
     outputs=[
+        gr.Textbox(label="Analysis Summary", lines=15),
         gr.HTML(label="Emotion Analysis"),
+        gr.HTML(label="Voice Feature Analysis"),
+        gr.JSON(label="Clinical Insights")
     ],
+    title="Advanced Voice Analysis System",
+    description="""This system provides comprehensive voice analysis with clinical interpretation.
+                Upload an audio file or record directly through your microphone."""
+)

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio==3.50.2
 torch==2.1.0
 transformers==4.35.2
@@ -5,4 +6,14 @@ librosa==0.10.1
 numpy==1.24.3
 plotly==5.18.0
 soundfile==0.12.1
-scipy==1.11.3

+# Core dependencies with existing versions
 gradio==3.50.2
 torch==2.1.0
 transformers==4.35.2
 numpy==1.24.3
 plotly==5.18.0
 soundfile==0.12.1
+scipy==1.11.3
+# New dependencies for Anthropic integration
+anthropic==0.3.11
+python-dotenv==1.0.0
+requests>=2.31.0
+# Additional utilities that enhance stability
+tqdm>=4.66.1
+regex>=2023.8.8
+tenacity>=8.2.3