invincible-jha commited on
Commit
d13902f
1 Parent(s): 3f4b577

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -51
app.py CHANGED
@@ -15,6 +15,10 @@ from dotenv import load_dotenv
15
  # Load environment variables
16
  load_dotenv()
17
 
 
 
 
 
18
  # Suppress warnings
19
  warnings.filterwarnings('ignore')
20
 
@@ -30,17 +34,26 @@ def load_models():
30
  global processor, whisper_model, emotion_tokenizer, emotion_model
31
 
32
  try:
33
- # Load Whisper model
34
  print("Loading Whisper model...")
35
- processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
36
- whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
 
 
 
 
 
 
37
 
38
- # Load emotion model
39
  print("Loading emotion model...")
40
- emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
41
- emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
 
 
 
 
 
 
42
 
43
- # Set device
44
  device = "cpu"
45
  whisper_model.to(device)
46
  emotion_model.to(device)
@@ -136,75 +149,147 @@ def extract_prosodic_features(waveform, sr):
136
  return None
137
 
138
  class ClinicalVoiceAnalyzer:
139
- """Clinical voice analysis and interpretation."""
140
 
141
  def __init__(self):
142
  """Initialize analyzer with API and reference ranges."""
143
- self.anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
144
- self.model = "claude-3-opus-20240229"
145
- self.reference_ranges = {
146
- 'pitch': {'min': 150, 'max': 400},
147
- 'tempo': {'min': 90, 'max': 130},
148
- 'energy': {'min': 0.01, 'max': 0.05}
149
- }
150
- print("Clinical analyzer ready")
 
 
 
 
 
 
 
 
151
 
152
  def analyze_voice_metrics(self, features, emotions, transcription):
153
- """Analyze voice metrics and generate clinical insights."""
154
  try:
 
 
 
155
  prompt = self._create_clinical_prompt(features, emotions, transcription)
 
 
156
  response = self.anthropic.messages.create(
157
  model=self.model,
158
  max_tokens=1000,
159
- messages=[{"role": "user", "content": prompt}]
 
 
 
 
160
  )
161
- return self._format_analysis(response.content)
 
 
 
 
 
 
 
162
  except Exception as e:
163
  print(f"Clinical analysis error: {e}")
164
  return self._generate_backup_analysis(features, emotions)
165
 
166
  def _create_clinical_prompt(self, features, emotions, transcription):
167
- """Create clinical analysis prompt."""
168
- return f"""As a clinical voice analysis expert, provide a psychological assessment of:
169
 
170
- Voice Metrics:
171
- - Pitch: {features['pitch_mean']:.2f} Hz (Normal: {self.reference_ranges['pitch']['min']}-{self.reference_ranges['pitch']['max']} Hz)
172
  - Pitch Variation: {features['pitch_std']:.2f} Hz
173
- - Speech Rate: {features['tempo']:.2f} BPM (Normal: {self.reference_ranges['tempo']['min']}-{self.reference_ranges['tempo']['max']} BPM)
174
- - Voice Energy: {features['energy_mean']:.4f}
175
 
176
- Emotions Detected:
177
  {', '.join(f'{emotion}: {score:.1%}' for emotion, score in emotions.items())}
178
 
179
  Speech Content:
180
  "{transcription}"
181
 
182
- Provide:
183
- 1. Voice characteristic analysis
184
- 2. Emotional state assessment
185
- 3. Anxiety/depression indicators
186
- 4. Stress level evaluation
187
- 5. Clinical recommendations"""
 
 
 
 
 
188
 
189
  def _format_analysis(self, analysis):
190
- """Format clinical analysis output."""
191
  return f"\nClinical Assessment:\n{analysis}"
192
 
193
  def _generate_backup_analysis(self, features, emotions):
194
- """Generate backup analysis when API fails."""
195
- dominant_emotion = max(emotions.items(), key=lambda x: x[1])
196
- pitch_status = (
197
- "elevated" if features['pitch_mean'] > self.reference_ranges['pitch']['max']
198
- else "reduced" if features['pitch_mean'] < self.reference_ranges['pitch']['min']
199
- else "normal"
200
- )
201
-
202
- return f"""
203
- Basic Voice Analysis (API Unavailable):
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  - Pitch Status: {pitch_status} ({features['pitch_mean']:.2f} Hz)
205
- - Speech Rate: {features['tempo']:.2f} BPM
206
- - Voice Energy Level: {features['energy_mean']:.4f}
207
- - Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  def create_feature_plots(features):
210
  """Create visualizations for voice features."""
@@ -334,10 +419,12 @@ def analyze_audio(audio_input):
334
  global clinical_analyzer
335
  if clinical_analyzer is None:
336
  clinical_analyzer = ClinicalVoiceAnalyzer()
337
-
 
338
  clinical_analysis = clinical_analyzer.analyze_voice_metrics(
339
  features, emotion_scores, transcription
340
  )
 
341
 
342
  # Create summary with fixed string formatting
343
  summary = f"""Voice Analysis Summary:
@@ -369,9 +456,11 @@ Recording Duration: {duration:.2f} seconds
369
  try:
370
  print("===== Application Startup =====")
371
 
 
372
  if not load_models():
373
  raise RuntimeError("Model loading failed")
374
 
 
375
  clinical_analyzer = ClinicalVoiceAnalyzer()
376
  print("Clinical analyzer initialized")
377
 
@@ -428,10 +517,10 @@ Upload an audio file or record directly through your microphone."""
428
 
429
  if __name__ == "__main__":
430
  demo.launch(
431
- server_name="0.0.0.0",
432
- server_port=7860,
433
- share=False,
434
- debug=False
435
  )
436
 
437
  except Exception as e:
 
15
  # Load environment variables
16
  load_dotenv()
17
 
18
+ # Get API tokens
19
+ ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', 'your_anthropic_api_key')
20
+ HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN', 'your_huggingface_api_token')
21
+
22
  # Suppress warnings
23
  warnings.filterwarnings('ignore')
24
 
 
34
  global processor, whisper_model, emotion_tokenizer, emotion_model
35
 
36
  try:
 
37
  print("Loading Whisper model...")
38
+ processor = WhisperProcessor.from_pretrained(
39
+ "openai/whisper-tiny",
40
+ use_auth_token=HUGGINGFACE_TOKEN
41
+ )
42
+ whisper_model = WhisperForConditionalGeneration.from_pretrained(
43
+ "openai/whisper-tiny",
44
+ use_auth_token=HUGGINGFACE_TOKEN
45
+ )
46
 
 
47
  print("Loading emotion model...")
48
+ emotion_tokenizer = AutoTokenizer.from_pretrained(
49
+ "j-hartmann/emotion-english-distilroberta-base",
50
+ use_auth_token=HUGGINGFACE_TOKEN
51
+ )
52
+ emotion_model = AutoModelForSequenceClassification.from_pretrained(
53
+ "j-hartmann/emotion-english-distilroberta-base",
54
+ use_auth_token=HUGGINGFACE_TOKEN
55
+ )
56
 
 
57
  device = "cpu"
58
  whisper_model.to(device)
59
  emotion_model.to(device)
 
149
  return None
150
 
151
  class ClinicalVoiceAnalyzer:
152
+ """Analyze voice characteristics for psychological indicators."""
153
 
154
  def __init__(self):
155
  """Initialize analyzer with API and reference ranges."""
156
+ try:
157
+ if not ANTHROPIC_API_KEY:
158
+ raise ValueError("ANTHROPIC_API_KEY not found in environment variables")
159
+
160
+ self.anthropic = Anthropic(api_key=ANTHROPIC_API_KEY)
161
+ self.model = "claude-3-opus-20240229"
162
+
163
+ self.reference_ranges = {
164
+ 'pitch': {'min': 150, 'max': 400},
165
+ 'tempo': {'min': 90, 'max': 130},
166
+ 'energy': {'min': 0.01, 'max': 0.05}
167
+ }
168
+ print("Clinical analyzer ready")
169
+ except Exception as e:
170
+ print(f"Error initializing clinical analyzer: {e}")
171
+ self.anthropic = None
172
 
173
  def analyze_voice_metrics(self, features, emotions, transcription):
174
+ """Generate clinical insights from voice and emotion data."""
175
  try:
176
+ if not self.anthropic:
177
+ return self._generate_backup_analysis(features, emotions)
178
+
179
  prompt = self._create_clinical_prompt(features, emotions, transcription)
180
+ print("Sending analysis request to Anthropic API...")
181
+
182
  response = self.anthropic.messages.create(
183
  model=self.model,
184
  max_tokens=1000,
185
+ messages=[{
186
+ "role": "user",
187
+ "content": prompt
188
+ }],
189
+ temperature=0.7
190
  )
191
+
192
+ if response and hasattr(response, 'content'):
193
+ print("Received response from Anthropic API")
194
+ return self._format_analysis(response.content)
195
+ else:
196
+ print("No valid response from API")
197
+ return self._generate_backup_analysis(features, emotions)
198
+
199
  except Exception as e:
200
  print(f"Clinical analysis error: {e}")
201
  return self._generate_backup_analysis(features, emotions)
202
 
203
  def _create_clinical_prompt(self, features, emotions, transcription):
204
+ """Create detailed prompt for clinical analysis."""
205
+ prompt = f"""As a clinical voice analysis expert, provide a detailed psychological assessment based on the following data:
206
 
207
+ Voice Characteristics Analysis:
208
+ - Pitch: {features['pitch_mean']:.2f} Hz (Normal range: {self.reference_ranges['pitch']['min']}-{self.reference_ranges['pitch']['max']} Hz)
209
  - Pitch Variation: {features['pitch_std']:.2f} Hz
210
+ - Speech Rate: {features['tempo']:.2f} BPM (Normal range: {self.reference_ranges['tempo']['min']}-{self.reference_ranges['tempo']['max']} BPM)
211
+ - Voice Energy Level: {features['energy_mean']:.4f} (Normal range: {self.reference_ranges['energy']['min']}-{self.reference_ranges['energy']['max']})
212
 
213
+ Emotional Analysis:
214
  {', '.join(f'{emotion}: {score:.1%}' for emotion, score in emotions.items())}
215
 
216
  Speech Content:
217
  "{transcription}"
218
 
219
+ Please provide a comprehensive assessment including:
220
+ 1. Detailed voice characteristic analysis and what it indicates about mental state
221
+ 2. Assessment of emotional state based on both voice features and detected emotions
222
+ 3. Potential indicators of anxiety, depression, or other mental health concerns
223
+ 4. Evaluation of stress levels and emotional stability
224
+ 5. Specific recommendations for mental health professionals or further assessment if needed
225
+
226
+ Base your analysis on established clinical research connecting voice biomarkers to psychological states."""
227
+
228
+ print(f"Generated prompt length: {len(prompt)} characters")
229
+ return prompt
230
 
231
  def _format_analysis(self, analysis):
232
+ """Format the clinical analysis output."""
233
  return f"\nClinical Assessment:\n{analysis}"
234
 
235
  def _generate_backup_analysis(self, features, emotions):
236
+ """Generate basic analysis when API is unavailable."""
237
+ try:
238
+ dominant_emotion = max(emotions.items(), key=lambda x: x[1])
239
+ pitch_status = (
240
+ "elevated" if features['pitch_mean'] > self.reference_ranges['pitch']['max']
241
+ else "reduced" if features['pitch_mean'] < self.reference_ranges['pitch']['min']
242
+ else "normal"
243
+ )
244
+
245
+ tempo_status = (
246
+ "rapid" if features['tempo'] > self.reference_ranges['tempo']['max']
247
+ else "slow" if features['tempo'] < self.reference_ranges['tempo']['min']
248
+ else "normal"
249
+ )
250
+
251
+ energy_status = (
252
+ "high" if features['energy_mean'] > self.reference_ranges['energy']['max']
253
+ else "low" if features['energy_mean'] < self.reference_ranges['energy']['min']
254
+ else "normal"
255
+ )
256
+
257
+ return f"""
258
+ Detailed Voice Analysis:
259
  - Pitch Status: {pitch_status} ({features['pitch_mean']:.2f} Hz)
260
+ - Speech Rate: {features['tempo']:.2f} BPM ({tempo_status})
261
+ - Voice Energy Level: {features['energy_mean']:.4f} ({energy_status})
262
+ - Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)
263
+
264
+ Potential Indicators:
265
+ - Pitch: {self._interpret_pitch(features['pitch_mean'], pitch_status)}
266
+ - Rate: {self._interpret_tempo(features['tempo'], tempo_status)}
267
+ - Energy: {self._interpret_energy(features['energy_mean'], energy_status)}
268
+ """
269
+ except Exception as e:
270
+ print(f"Error in backup analysis: {e}")
271
+ return "Error generating analysis. Please try again."
272
+
273
+ def _interpret_pitch(self, pitch, status):
274
+ if status == "elevated":
275
+ return "May indicate heightened stress or anxiety"
276
+ elif status == "reduced":
277
+ return "Could suggest low energy or depressed mood"
278
+ return "Within normal range, suggesting stable emotional state"
279
+
280
+ def _interpret_tempo(self, tempo, status):
281
+ if status == "rapid":
282
+ return "May indicate anxiety or agitation"
283
+ elif status == "slow":
284
+ return "Could suggest fatigue or low mood"
285
+ return "Normal pacing indicates balanced emotional state"
286
+
287
+ def _interpret_energy(self, energy, status):
288
+ if status == "high":
289
+ return "May indicate heightened emotional state or agitation"
290
+ elif status == "low":
291
+ return "Could suggest reduced emotional expression or fatigue"
292
+ return "Appropriate energy level suggests emotional stability"
293
 
294
  def create_feature_plots(features):
295
  """Create visualizations for voice features."""
 
419
  global clinical_analyzer
420
  if clinical_analyzer is None:
421
  clinical_analyzer = ClinicalVoiceAnalyzer()
422
+
423
+ print("Initiating clinical analysis...") # Debug log
424
  clinical_analysis = clinical_analyzer.analyze_voice_metrics(
425
  features, emotion_scores, transcription
426
  )
427
+ print("Clinical analysis completed") # Debug log
428
 
429
  # Create summary with fixed string formatting
430
  summary = f"""Voice Analysis Summary:
 
456
  try:
457
  print("===== Application Startup =====")
458
 
459
+ # Load required models with authentication
460
  if not load_models():
461
  raise RuntimeError("Model loading failed")
462
 
463
+ # Initialize clinical analyzer with authentication
464
  clinical_analyzer = ClinicalVoiceAnalyzer()
465
  print("Clinical analyzer initialized")
466
 
 
517
 
518
  if __name__ == "__main__":
519
  demo.launch(
520
+ server_name="0.0.0.0", # Allow external access
521
+ server_port=7860, # Default Gradio port
522
+ share=False, # Disable public URL generation
523
+ debug=False # Disable debug mode in production
524
  )
525
 
526
  except Exception as e: