Boltz79 commited on
Commit
7ff24b4
·
verified ·
1 Parent(s): 935540d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -27
app.py CHANGED
@@ -5,82 +5,103 @@ import torch
5
  def load_models():
6
  """Load and verify models with error checking"""
7
  try:
8
- # Check CUDA availability
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
  print(f"Using device: {device}")
11
 
12
- # Load a smaller Whisper model for better compatibility
13
  transcriber = pipeline(
14
  "automatic-speech-recognition",
15
  model="openai/whisper-tiny",
16
  device=device
17
  )
18
 
19
- # Load a simpler sentiment model
20
- sentiment = pipeline(
21
- "sentiment-analysis",
22
- model="distilbert-base-uncased-finetuned-sst-2-english",
23
  device=device
24
  )
25
 
26
- return transcriber, sentiment
27
  except Exception as e:
28
  print(f"Error loading models: {str(e)}")
29
  return None, None
30
 
31
  def analyze_audio(audio_path):
32
  """
33
- Analyze audio tone with robust error handling
34
  """
35
  if audio_path is None:
36
- return "Please provide an audio input", "No audio detected"
37
 
38
  try:
39
  # Load models
40
- transcriber, sentiment = load_models()
41
- if transcriber is None or sentiment is None:
42
  return "Error loading models", "Model initialization failed"
43
 
44
- # Transcribe with error checking
45
  try:
46
  result = transcriber(audio_path)
47
  text = result["text"]
48
  if not text.strip():
49
  return "No speech detected", "Empty transcription"
 
50
  except Exception as e:
51
  return f"Transcription error: {str(e)}", "Failed to process audio"
52
 
53
- # Analyze sentiment
54
  try:
55
- sentiment_result = sentiment(text)[0]
56
- tone = sentiment_result["label"]
57
- confidence = f"{sentiment_result['score']:.2%}"
58
- return tone, confidence
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  except Exception as e:
60
- return f"Sentiment analysis error: {str(e)}", "Analysis failed"
61
 
62
  except Exception as e:
63
  return f"Unexpected error: {str(e)}", "Process failed"
64
 
65
- # Create interface with simplified components
66
  interface = gr.Interface(
67
  fn=analyze_audio,
68
  inputs=gr.Audio(
69
  sources=["microphone", "upload"],
70
  type="filepath",
 
71
  ),
72
  outputs=[
73
- gr.Textbox(label="Tone"),
74
- gr.Textbox(label="Confidence Level")
75
  ],
76
- title="Simple Speech Tone Analyzer",
77
- description="Record or upload audio to analyze its tone. The analysis may take a few moments.",
78
- examples=None,
79
- cache_examples=False,
80
- theme=gr.themes.Base(),
 
 
 
 
 
 
 
81
  )
82
 
83
- # Launch with specific parameters for better stability
84
  if __name__ == "__main__":
85
  interface.launch(
86
  debug=True,
 
5
  def load_models():
6
  """Load and verify models with error checking"""
7
  try:
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
  print(f"Using device: {device}")
10
 
11
+ # Load Whisper for speech recognition
12
  transcriber = pipeline(
13
  "automatic-speech-recognition",
14
  model="openai/whisper-tiny",
15
  device=device
16
  )
17
 
18
+ # Load emotion recognition model
19
+ emotion_analyzer = pipeline(
20
+ "text-classification",
21
+ model="j-hartmann/emotion-english-distilroberta-base",
22
  device=device
23
  )
24
 
25
+ return transcriber, emotion_analyzer
26
  except Exception as e:
27
  print(f"Error loading models: {str(e)}")
28
  return None, None
29
 
30
  def analyze_audio(audio_path):
31
  """
32
+ Analyze audio for emotional content with detailed output
33
  """
34
  if audio_path is None:
35
+ return "Please provide audio", "No audio detected"
36
 
37
  try:
38
  # Load models
39
+ transcriber, emotion_analyzer = load_models()
40
+ if transcriber is None or emotion_analyzer is None:
41
  return "Error loading models", "Model initialization failed"
42
 
43
+ # Transcribe speech
44
  try:
45
  result = transcriber(audio_path)
46
  text = result["text"]
47
  if not text.strip():
48
  return "No speech detected", "Empty transcription"
49
+ print(f"Transcribed text: {text}") # Debug output
50
  except Exception as e:
51
  return f"Transcription error: {str(e)}", "Failed to process audio"
52
 
53
+ # Analyze emotion
54
  try:
55
+ emotion_result = emotion_analyzer(text)[0]
56
+ emotion = emotion_result["label"].title() # Capitalize emotion
57
+ confidence = f"{emotion_result['score']:.2%}"
58
+
59
+ # Map technical emotion labels to more natural descriptions
60
+ emotion_mapping = {
61
+ "Joy": "Happy/Joyful",
62
+ "Sadness": "Sad/Melancholic",
63
+ "Anger": "Angry/Frustrated",
64
+ "Fear": "Anxious/Fearful",
65
+ "Surprise": "Surprised/Astonished",
66
+ "Love": "Warm/Affectionate",
67
+ "Neutral": "Neutral/Calm"
68
+ }
69
+
70
+ display_emotion = emotion_mapping.get(emotion, emotion)
71
+ return display_emotion, confidence
72
+
73
  except Exception as e:
74
+ return f"Emotion analysis error: {str(e)}", "Analysis failed"
75
 
76
  except Exception as e:
77
  return f"Unexpected error: {str(e)}", "Process failed"
78
 
79
+ # Create interface with better labeling
80
  interface = gr.Interface(
81
  fn=analyze_audio,
82
  inputs=gr.Audio(
83
  sources=["microphone", "upload"],
84
  type="filepath",
85
+ label="Record or Upload Audio"
86
  ),
87
  outputs=[
88
+ gr.Textbox(label="Detected Emotion"),
89
+ gr.Textbox(label="Confidence Score")
90
  ],
91
+ title="Speech Emotion Analyzer",
92
+ description="""
93
+ This tool analyzes the emotional tone of speech, detecting emotions like:
94
+ - Happy/Joyful
95
+ - Sad/Melancholic
96
+ - Angry/Frustrated
97
+ - Anxious/Fearful
98
+ - Surprised/Astonished
99
+ - Warm/Affectionate
100
+ - Neutral/Calm
101
+ """,
102
+ theme=gr.themes.Base()
103
  )
104
 
 
105
  if __name__ == "__main__":
106
  interface.launch(
107
  debug=True,