Navsatitagain commited on
Commit
204bcef
Β·
verified Β·
1 Parent(s): cab3375

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -6
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  from PIL import Image
 
4
 
5
  TEXT_MODEL = "j-hartmann/emotion-english-distilroberta-base"
6
  IMAGE_MODEL = "trpakov/vit-face-expression"
@@ -11,10 +12,11 @@ image_pipe = pipeline("image-classification", model=IMAGE_MODEL, top_k=None)
11
  audio_pipe = pipeline("audio-classification", model=AUDIO_MODEL, top_k=None)
12
 
13
  def _as_label_dict(preds):
14
-
15
  preds_sorted = sorted(preds, key=lambda p: p["score"], reverse=True)
16
  return {p["label"]: float(round(p["score"], 4)) for p in preds_sorted}
17
 
 
18
  def analyze_text(text: str):
19
  if not text or not text.strip():
20
  return {"(enter some text)": 1.0}
@@ -34,15 +36,56 @@ def analyze_face(img):
34
  def analyze_voice(audio_path):
35
  if audio_path is None:
36
  return {"(no audio)": 1.0}
37
- preds = audio_pipe(audio_path)
38
  return _as_label_dict(preds)
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  with gr.Blocks(title="Empath AI β€” Multimodal Emotion Detection") as demo:
41
  gr.Markdown(
42
  """
43
- # Empath AI β€” Emotion Detection (Text β€’ Face β€’ Voice)
44
- Grant permission when the browser asks for **camera/microphone**.
45
- Nothing is stored; analysis happens in memory and the scores are shown back to you.
 
46
  """
47
  )
48
 
@@ -65,4 +108,15 @@ with gr.Blocks(title="Empath AI β€” Multimodal Emotion Detection") as demo:
65
  a_out = gr.Label(num_top_classes=3)
66
  a_btn.click(analyze_voice, inputs=a_in, outputs=a_out)
67
 
68
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  from PIL import Image
4
+ import imageio
5
 
6
  TEXT_MODEL = "j-hartmann/emotion-english-distilroberta-base"
7
  IMAGE_MODEL = "trpakov/vit-face-expression"
 
12
  audio_pipe = pipeline("audio-classification", model=AUDIO_MODEL, top_k=None)
13
 
14
  def _as_label_dict(preds):
15
+ """Convert HF predictions to {label: score} sorted desc."""
16
  preds_sorted = sorted(preds, key=lambda p: p["score"], reverse=True)
17
  return {p["label"]: float(round(p["score"], 4)) for p in preds_sorted}
18
 
19
+ # ---------- Text ----------
20
  def analyze_text(text: str):
21
  if not text or not text.strip():
22
  return {"(enter some text)": 1.0}
 
36
  def analyze_voice(audio_path):
37
  if audio_path is None:
38
  return {"(no audio)": 1.0}
39
+ preds = audio_pipe(audio_path)
40
  return _as_label_dict(preds)
41
 
42
+ def analyze_video(video_path, sample_fps=2, max_frames=120):
43
+ """
44
+ Read the video, sample ~sample_fps frames/second (up to max_frames),
45
+ run face-expression model on each, and return the average scores.
46
+ """
47
+ if video_path is None:
48
+ return {"(no video)": 1.0}, "No file provided."
49
+
50
+ try:
51
+ reader = imageio.get_reader(video_path)
52
+ meta = reader.get_meta_data()
53
+ fps = int(meta.get("fps", 25))
54
+ step = max(int(round(fps / max(1, sample_fps))), 1)
55
+
56
+ totals = {}
57
+ used = 0
58
+
59
+ for i, frame in enumerate(reader):
60
+ if i % step != 0:
61
+ continue
62
+ if used >= max_frames:
63
+ break
64
+ pil = Image.fromarray(frame)
65
+ preds = image_pipe(pil)
66
+ for p in preds:
67
+ label = p["label"]
68
+ totals[label] = totals.get(label, 0.0) + float(p["score"])
69
+ used += 1
70
+
71
+ if used == 0:
72
+ return {"(no frames sampled)": 1.0}, "Could not sample frames; try a shorter/different video."
73
+
74
+ avg = {k: round(v / used, 4) for k, v in totals.items()}
75
+ avg_sorted = dict(sorted(avg.items(), key=lambda x: x[1], reverse=True))
76
+ info = f"Frames analyzed: {used} β€’ Sampling β‰ˆ{sample_fps} fps β€’ Max frames: {max_frames}"
77
+ return avg_sorted, info
78
+
79
+ except Exception as e:
80
+ return {"(error)": 1.0}, f"Video read error: {e}"
81
+
82
  with gr.Blocks(title="Empath AI β€” Multimodal Emotion Detection") as demo:
83
  gr.Markdown(
84
  """
85
+ # Empath AI β€” Emotion Detection (Text β€’ Face β€’ Voice β€’ Video)
86
+ - Allow **camera** and **microphone** permissions when prompted.
87
+ - Keep videos **short (≀15s)** for faster results.
88
+ - No data is stored; analysis happens in memory and results are shown back to you.
89
  """
90
  )
91
 
 
108
  a_out = gr.Label(num_top_classes=3)
109
  a_btn.click(analyze_voice, inputs=a_in, outputs=a_out)
110
 
111
+ with gr.Tab("Video (Record or Upload)"):
112
+ # Gradio will show a camera-record button and an upload option.
113
+ v_in = gr.Video(sources=["webcam", "upload"], label="Record or upload a short video (≀15s)", height=280)
114
+ with gr.Row():
115
+ fps = gr.Slider(1, 5, value=2, step=1, label="Sampling FPS (frames analyzed per second)")
116
+ maxf = gr.Slider(30, 240, value=120, step=10, label="Max Frames to Analyze")
117
+ v_btn = gr.Button("Analyze Video", variant="primary")
118
+ v_out = gr.Label(num_top_classes=3, label="Average Emotion (video)")
119
+ v_info = gr.Markdown()
120
+ v_btn.click(analyze_video, inputs=[v_in, fps, maxf], outputs=[v_out, v_info])
121
+
122
+ demo.launch()