invincible-jha commited on
Commit
40d9220
·
verified ·
1 Parent(s): d894230

Delete src

Browse files
src/models/__init__.py DELETED
@@ -1,5 +0,0 @@
1
- from .model_manager import ModelManager
2
- from .audio_processor import AudioProcessor
3
- from .analyzer import Analyzer
4
-
5
- __all__ = ['ModelManager', 'AudioProcessor', 'Analyzer']
 
 
 
 
 
 
src/models/analyzer.py DELETED
@@ -1,61 +0,0 @@
1
- from .model_manager import ModelManager
2
- from .audio_processor import AudioProcessor
3
- from typing import Dict
4
-
5
- class Analyzer:
6
- def __init__(self, model_manager: ModelManager, audio_processor: AudioProcessor):
7
- self.model_manager = model_manager
8
- self.audio_processor = audio_processor
9
- self.model_manager.load_models()
10
-
11
- def analyze(self, audio_path: str) -> Dict:
12
- # Process audio
13
- waveform, features = self.audio_processor.process_audio(audio_path)
14
-
15
- # Get transcription
16
- transcription = self.model_manager.transcribe(waveform)
17
-
18
- # Analyze emotions
19
- emotions = self.model_manager.analyze_emotions(transcription)
20
-
21
- # Analyze mental health indicators
22
- mental_health = self.model_manager.analyze_mental_health(transcription)
23
-
24
- # Combine analysis with audio features
25
- mental_health = self._combine_analysis(mental_health, features)
26
-
27
- return {
28
- 'transcription': transcription,
29
- 'emotions': {
30
- 'scores': emotions,
31
- 'dominant_emotion': max(emotions.items(), key=lambda x: x[1])[0]
32
- },
33
- 'mental_health_indicators': mental_health,
34
- 'audio_features': features
35
- }
36
-
37
- def _combine_analysis(self, mental_health: Dict, features: Dict) -> Dict:
38
- """Combine mental health analysis with audio features"""
39
- # Adjust risk scores based on audio features
40
- energy_level = features['energy']['mean']
41
- pitch_variability = features['pitch']['std']
42
-
43
- # Simple risk score adjustment based on audio features
44
- mental_health['depression_risk'] = (
45
- mental_health['depression_risk'] * 0.7 +
46
- (1 - energy_level) * 0.3 # Lower energy may indicate depression
47
- )
48
-
49
- mental_health['anxiety_risk'] = (
50
- mental_health['anxiety_risk'] * 0.7 +
51
- pitch_variability * 0.3 # Higher pitch variability may indicate anxiety
52
- )
53
-
54
- # Add confidence scores
55
- mental_health['confidence'] = {
56
- 'depression': 0.8, # Example confidence scores
57
- 'anxiety': 0.8,
58
- 'stress': 0.7
59
- }
60
-
61
- return mental_health
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/models/audio-processor.py DELETED
@@ -1,55 +0,0 @@
1
- import librosa
2
- import numpy as np
3
- from typing import Dict, Tuple
4
-
5
- class AudioProcessor:
6
- def __init__(self):
7
- self.sample_rate = 16000
8
- self.n_mfcc = 13
9
- self.n_mels = 128
10
-
11
- def process_audio(self, audio_path: str) -> Tuple[np.ndarray, Dict]:
12
- # Load and preprocess audio
13
- waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
14
-
15
- # Extract features
16
- features = {
17
- 'mfcc': self._extract_mfcc(waveform),
18
- 'pitch': self._extract_pitch(waveform),
19
- 'energy': self._extract_energy(waveform)
20
- }
21
-
22
- return waveform, features
23
-
24
- def _extract_mfcc(self, waveform: np.ndarray) -> np.ndarray:
25
- mfccs = librosa.feature.mfcc(
26
- y=waveform,
27
- sr=self.sample_rate,
28
- n_mfcc=self.n_mfcc
29
- )
30
- return mfccs.mean(axis=1)
31
-
32
- def _extract_pitch(self, waveform: np.ndarray) -> Dict:
33
- f0, voiced_flag, voiced_probs = librosa.pyin(
34
- waveform,
35
- fmin=librosa.note_to_hz('C2'),
36
- fmax=librosa.note_to_hz('C7'),
37
- sr=self.sample_rate
38
- )
39
-
40
- return {
41
- 'mean': float(np.nanmean(f0)),
42
- 'std': float(np.nanstd(f0)),
43
- 'max': float(np.nanmax(f0)),
44
- 'min': float(np.nanmin(f0))
45
- }
46
-
47
- def _extract_energy(self, waveform: np.ndarray) -> Dict:
48
- rms = librosa.feature.rms(y=waveform)[0]
49
-
50
- return {
51
- 'mean': float(np.mean(rms)),
52
- 'std': float(np.std(rms)),
53
- 'max': float(np.max(rms)),
54
- 'min': float(np.min(rms))
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/models/model-manager.py DELETED
@@ -1,79 +0,0 @@
1
- from transformers import (
2
- WhisperProcessor, WhisperForConditionalGeneration,
3
- AutoModelForSequenceClassification, AutoTokenizer
4
- )
5
- import torch
6
-
7
- class ModelManager:
8
- def __init__(self):
9
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
- self.models = {}
11
- self.tokenizers = {}
12
- self.processors = {}
13
-
14
- def load_models(self):
15
- # Load Whisper for speech recognition
16
- self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
17
- self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained(
18
- "openai/whisper-base"
19
- ).to(self.device)
20
-
21
- # Load EmoBERTa for emotion detection
22
- self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("arpanghoshal/EmoRoBERTa")
23
- self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained(
24
- "arpanghoshal/EmoRoBERTa"
25
- ).to(self.device)
26
-
27
- # Load ClinicalBERT for analysis
28
- self.tokenizers['clinical'] = AutoTokenizer.from_pretrained(
29
- "emilyalsentzer/Bio_ClinicalBERT"
30
- )
31
- self.models['clinical'] = AutoModelForSequenceClassification.from_pretrained(
32
- "emilyalsentzer/Bio_ClinicalBERT"
33
- ).to(self.device)
34
-
35
- def transcribe(self, audio_input):
36
- inputs = self.processors['whisper'](
37
- audio_input,
38
- return_tensors="pt"
39
- ).input_features.to(self.device)
40
-
41
- generated_ids = self.models['whisper'].generate(inputs)
42
- transcription = self.processors['whisper'].batch_decode(
43
- generated_ids,
44
- skip_special_tokens=True
45
- )[0]
46
- return transcription
47
-
48
- def analyze_emotions(self, text):
49
- inputs = self.tokenizers['emotion'](
50
- text,
51
- return_tensors="pt",
52
- padding=True,
53
- truncation=True,
54
- max_length=512
55
- ).to(self.device)
56
-
57
- outputs = self.models['emotion'](**inputs)
58
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
59
-
60
- emotions = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
61
- return {emotion: float(prob) for emotion, prob in zip(emotions, probs[0])}
62
-
63
- def analyze_mental_health(self, text):
64
- inputs = self.tokenizers['clinical'](
65
- text,
66
- return_tensors="pt",
67
- padding=True,
68
- truncation=True,
69
- max_length=512
70
- ).to(self.device)
71
-
72
- outputs = self.models['clinical'](**inputs)
73
- scores = torch.sigmoid(outputs.logits)
74
-
75
- return {
76
- 'depression_risk': float(scores[0][0]),
77
- 'anxiety_risk': float(scores[0][1]),
78
- 'stress_level': float(scores[0][2])
79
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils/__init__.py DELETED
@@ -1,11 +0,0 @@
1
- from .gpu_optimizer import GPUOptimizer
2
- from .model_cache import ModelCache
3
- from .visualizer import create_emotion_plot, create_pitch_plot, create_energy_plot
4
-
5
- __all__ = [
6
- 'GPUOptimizer',
7
- 'ModelCache',
8
- 'create_emotion_plot',
9
- 'create_pitch_plot',
10
- 'create_energy_plot'
11
- ]
 
 
 
 
 
 
 
 
 
 
 
 
src/utils/gpu-optimizer.py DELETED
@@ -1,30 +0,0 @@
1
- import torch
2
- import gc
3
-
4
- class GPUOptimizer:
5
- def __init__(self):
6
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
-
8
- def optimize(self):
9
- if torch.cuda.is_available():
10
- # Clear cache
11
- torch.cuda.empty_cache()
12
- gc.collect()
13
-
14
- # Set memory fraction
15
- torch.cuda.set_per_process_memory_fraction(0.9)
16
-
17
- # Enable TF32 for better performance
18
- torch.backends.cuda.matmul.allow_tf32 = True
19
- torch.backends.cudnn.allow_tf32 = True
20
-
21
- # Enable autocast for mixed precision
22
- torch.cuda.amp.autocast(enabled=True)
23
-
24
- def get_memory_usage(self):
25
- if torch.cuda.is_available():
26
- return {
27
- 'allocated': torch.cuda.memory_allocated() / 1024**2, # MB
28
- 'reserved': torch.cuda.memory_reserved() / 1024**2 # MB
29
- }
30
- return {'allocated': 0, 'reserved': 0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils/model-cache.py DELETED
@@ -1,18 +0,0 @@
1
- from functools import lru_cache
2
- import hashlib
3
- import json
4
-
5
- class ModelCache:
6
- def __init__(self, cache_size=128):
7
- self.cache_size = cache_size
8
-
9
- @lru_cache(maxsize=128)
10
- def cache_result(self, input_key, result):
11
- return result
12
-
13
- def get_cache_key(self, audio_data):
14
- # Create hash of audio data for cache key
15
- return hashlib.md5(audio_data).hexdigest()
16
-
17
- def clear_cache(self):
18
- self.cache_result.cache_clear()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils/visualizer.py DELETED
@@ -1,74 +0,0 @@
1
- import plotly.graph_objects as go
2
- from typing import Dict
3
-
4
- def create_emotion_plot(emotions: Dict[str, float]) -> str:
5
- """Create emotion distribution plot"""
6
- fig = go.Figure()
7
-
8
- # Add bar plot
9
- fig.add_trace(go.Bar(
10
- x=list(emotions.keys()),
11
- y=list(emotions.values()),
12
- marker_color='rgb(55, 83, 109)'
13
- ))
14
-
15
- # Update layout
16
- fig.update_layout(
17
- title='Emotion Distribution',
18
- xaxis_title='Emotion',
19
- yaxis_title='Score',
20
- yaxis_range=[0, 1],
21
- template='plotly_white',
22
- height=400
23
- )
24
-
25
- return fig.to_html(include_plotlyjs=True)
26
-
27
- def create_pitch_plot(pitch_data: Dict) -> str:
28
- """Create pitch analysis plot"""
29
- fig = go.Figure()
30
-
31
- # Add box plot
32
- fig.add_trace(go.Box(
33
- y=[pitch_data['min'], pitch_data['mean'], pitch_data['max']],
34
- name='Pitch Distribution',
35
- boxpoints='all'
36
- ))
37
-
38
- # Update layout
39
- fig.update_layout(
40
- title='Pitch Analysis',
41
- yaxis_title='Frequency (Hz)',
42
- template='plotly_white',
43
- height=400
44
- )
45
-
46
- return fig.to_html(include_plotlyjs=True)
47
-
48
- def create_energy_plot(energy_data: Dict) -> str:
49
- """Create energy analysis plot"""
50
- fig = go.Figure()
51
-
52
- # Add indicator
53
- fig.add_trace(go.Indicator(
54
- mode='gauge+number',
55
- value=energy_data['mean'],
56
- title={'text': 'Voice Energy Level'},
57
- gauge={
58
- 'axis': {'range': [0, 1]},
59
- 'bar': {'color': 'darkblue'},
60
- 'steps': [
61
- {'range': [0, 0.3], 'color': 'lightgray'},
62
- {'range': [0.3, 0.7], 'color': 'gray'},
63
- {'range': [0.7, 1], 'color': 'darkgray'}
64
- ]
65
- }
66
- ))
67
-
68
- # Update layout
69
- fig.update_layout(
70
- height=300,
71
- template='plotly_white'
72
- )
73
-
74
- return fig.to_html(include_plotlyjs=True)