invincible-jha commited on
Commit
0e04908
1 Parent(s): e10faf1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -6
app.py CHANGED
@@ -14,16 +14,102 @@ class ModelManager:
14
  self.load_models()
15
 
16
  def load_models(self):
17
- # Load Whisper
18
  self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
19
  self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to(self.device)
20
 
21
- # Load emotion model (using open-source model)
22
  self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
23
  self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base").to(self.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- # Load clinical model (using open-source model)
26
- self.tokenizers['clinical'] = AutoTokenizer.from_pretrained("medicalai/ClinicalBERT")
27
- self.models['clinical'] = AutoModelForSequenceClassification.from_pretrained("medicalai/ClinicalBERT").to(self.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Rest of the code remains the same...
 
 
 
14
  self.load_models()
15
 
16
  def load_models(self):
17
+ print("Loading Whisper model...")
18
  self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
19
  self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to(self.device)
20
 
21
+ print("Loading emotion model...")
22
  self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
23
  self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base").to(self.device)
24
+
25
+ class AudioProcessor:
26
+ def __init__(self):
27
+ self.sample_rate = 16000
28
+ self.n_mfcc = 13
29
+
30
+ def process_audio(self, audio_path):
31
+ waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
32
+ return waveform, self._extract_features(waveform)
33
+
34
+ def _extract_features(self, waveform):
35
+ return {
36
+ 'mfcc': librosa.feature.mfcc(y=waveform, sr=self.sample_rate, n_mfcc=self.n_mfcc),
37
+ 'energy': librosa.feature.rms(y=waveform)[0]
38
+ }
39
+
40
+ class Analyzer:
41
+ def __init__(self):
42
+ print("Initializing Analyzer...")
43
+ self.model_manager = ModelManager()
44
+ self.audio_processor = AudioProcessor()
45
+ print("Analyzer initialization complete")
46
+
47
+ def analyze(self, audio_path):
48
+ print(f"Processing audio file: {audio_path}")
49
+ # Process audio
50
+ waveform, features = self.audio_processor.process_audio(audio_path)
51
+
52
+ # Transcribe
53
+ print("Transcribing audio...")
54
+ inputs = self.model_manager.processors['whisper'](waveform, return_tensors="pt").input_features.to(self.model_manager.device)
55
+ predicted_ids = self.model_manager.models['whisper'].generate(inputs)
56
+ transcription = self.model_manager.processors['whisper'].batch_decode(predicted_ids, skip_special_tokens=True)[0]
57
 
58
+ # Analyze emotions
59
+ print("Analyzing emotions...")
60
+ inputs = self.model_manager.tokenizers['emotion'](transcription, return_tensors="pt", padding=True, truncation=True)
61
+ outputs = self.model_manager.models['emotion'](**inputs)
62
+ emotions = torch.nn.functional.softmax(outputs.logits, dim=-1)
63
+
64
+ emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
65
+ emotion_scores = {
66
+ label: float(score)
67
+ for label, score in zip(emotion_labels, emotions[0])
68
+ }
69
+
70
+ return {
71
+ 'transcription': transcription,
72
+ 'emotions': emotion_scores
73
+ }
74
+
75
+ def create_emotion_plot(emotions):
76
+ fig = go.Figure(data=[
77
+ go.Bar(x=list(emotions.keys()), y=list(emotions.values()))
78
+ ])
79
+ fig.update_layout(
80
+ title='Emotion Analysis',
81
+ yaxis_range=[0, 1]
82
+ )
83
+ return fig.to_html()
84
+
85
+ print("Initializing application...")
86
+ analyzer = Analyzer()
87
+
88
+ def process_audio(audio_file):
89
+ try:
90
+ print(f"Processing audio file: {audio_file}")
91
+ results = analyzer.analyze(audio_file)
92
+
93
+ return (
94
+ results['transcription'],
95
+ create_emotion_plot(results['emotions'])
96
+ )
97
+ except Exception as e:
98
+ print(f"Error processing audio: {str(e)}")
99
+ return str(e), "Error in analysis"
100
+
101
+ print("Creating Gradio interface...")
102
+ interface = gr.Interface(
103
+ fn=process_audio,
104
+ inputs=gr.Audio(source="microphone", type="filepath"),
105
+ outputs=[
106
+ gr.Textbox(label="Transcription"),
107
+ gr.HTML(label="Emotion Analysis")
108
+ ],
109
+ title="Vocal Biomarker Analysis",
110
+ description="Analyze voice for emotional indicators"
111
+ )
112
 
113
+ print("Launching application...")
114
+ if __name__ == "__main__":
115
+ interface.launch()