Shreneek commited on
Commit
5ffbf24
·
verified ·
1 Parent(s): 10f2aa3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +398 -0
app.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.linear_model import LogisticRegression
7
+ import pickle
8
+ import os
9
+
10
+ # Set the visual style
11
+ plt.style.use('ggplot')
12
+ sns.set_context("talk")
13
+ plt.rcParams['figure.figsize'] = (12, 8)
14
+
15
+ # Function to generate synthetic meeting data
16
+ def generate_meeting_data(n_meetings=500):
17
+ """Generate synthetic meeting data with various parameters."""
18
+
19
+ np.random.seed(42) # For reproducibility
20
+
21
+ # Generate random meeting features
22
+ data = {
23
+ 'meeting_id': range(1, n_meetings + 1),
24
+ 'duration_minutes': np.random.choice(
25
+ [15, 30, 45, 60, 90, 120],
26
+ size=n_meetings,
27
+ p=[0.1, 0.25, 0.2, 0.3, 0.1, 0.05]
28
+ ),
29
+ 'n_participants': np.random.randint(2, 15, size=n_meetings),
30
+ 'presenter_talk_percent': np.random.uniform(30, 95, size=n_meetings),
31
+ 'questions_asked': np.random.randint(0, 12, size=n_meetings),
32
+ 'actionable_items': np.random.randint(0, 8, size=n_meetings),
33
+ 'silence_percent': np.random.uniform(0, 40, size=n_meetings),
34
+ 'topic_changes': np.random.randint(1, 10, size=n_meetings),
35
+ 'slides_count': np.random.randint(0, 40, size=n_meetings)
36
+ }
37
+
38
+ # Add meeting types
39
+ meeting_topics = [
40
+ "Weekly Status Update", "Quarterly Planning", "Project Kickoff",
41
+ "Brainstorming Session", "Customer Feedback Review", "Budget Review",
42
+ "Team Building", "Product Demo", "Strategic Alignment", "Post-Mortem",
43
+ "OKR Review", "All-Hands", "Happy Hour Planning"
44
+ ]
45
+
46
+ data['meeting_type'] = np.random.choice(meeting_topics, size=n_meetings)
47
+
48
+ # Convert to dataframe
49
+ df = pd.DataFrame(data)
50
+
51
+ # Calculate the "email score" based on various factors
52
+ df['email_score'] = (
53
+ # Longer meetings get lower scores (less email-able)
54
+ -0.2 * df['duration_minutes'] +
55
+ # More participants = less email-able
56
+ -0.5 * df['n_participants'] +
57
+ # If one person does all the talking, could be an email
58
+ 0.3 * df['presenter_talk_percent'] +
59
+ # Few questions = could be an email
60
+ -3 * df['questions_asked'] +
61
+ # Few action items = could be an email
62
+ -5 * df['actionable_items'] +
63
+ # Lots of silence = waste of time
64
+ 0.5 * df['silence_percent'] +
65
+ # Lots of topic changes = less email-able
66
+ -2 * df['topic_changes'] +
67
+ # Many slides = information dump, could be emailed
68
+ 0.2 * df['slides_count'] +
69
+ # Random noise
70
+ np.random.normal(0, 15, size=n_meetings)
71
+ )
72
+
73
+ # Normalize to 0-100 scale
74
+ df['email_score'] = (df['email_score'] - df['email_score'].min()) / (df['email_score'].max() - df['email_score'].min()) * 100
75
+ df['email_score'] = df['email_score'].round(1)
76
+
77
+ # Add binary classification (could have been an email or not)
78
+ df['could_be_email'] = (df['email_score'] > 65).astype(int)
79
+
80
+ return df
81
+
82
+ # Function to train the model
83
+ def train_model(df):
84
+ # Select features
85
+ features = [
86
+ 'duration_minutes', 'n_participants', 'presenter_talk_percent',
87
+ 'questions_asked', 'actionable_items', 'silence_percent',
88
+ 'topic_changes', 'slides_count'
89
+ ]
90
+
91
+ X = df[features]
92
+ y = df['could_be_email']
93
+
94
+ # Train model
95
+ model = LogisticRegression(random_state=42)
96
+ model.fit(X, y)
97
+
98
+ return model, features
99
+
100
+ # Function to predict whether a meeting could be an email
101
+ def predict_meeting(
102
+ duration, participants, presenter_talk, questions,
103
+ action_items, silence, topic_changes, slides
104
+ ):
105
+ # Create a dataframe with the input values
106
+ input_data = pd.DataFrame({
107
+ 'duration_minutes': [duration],
108
+ 'n_participants': [participants],
109
+ 'presenter_talk_percent': [presenter_talk],
110
+ 'questions_asked': [questions],
111
+ 'actionable_items': [action_items],
112
+ 'silence_percent': [silence],
113
+ 'topic_changes': [topic_changes],
114
+ 'slides_count': [slides]
115
+ })
116
+
117
+ # Make prediction
118
+ probability = model.predict_proba(input_data)[0][1] * 100
119
+ is_email = model.predict(input_data)[0]
120
+
121
+ # Calculate wasted time
122
+ wasted_minutes = duration * participants if is_email else duration * participants * 0.2
123
+ wasted_workdays = wasted_minutes / (8 * 60) # assuming 8-hour workday
124
+
125
+ # Generate visualization
126
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
127
+
128
+ # Email-ability gauge chart
129
+ import matplotlib.patches as mpatches
130
+
131
+ # Create a semicircular gauge
132
+ theta = np.linspace(0, np.pi, 100)
133
+ r = 1.0
134
+
135
+ # Convert email probability to color (red for high, green for low)
136
+ from matplotlib.colors import LinearSegmentedColormap
137
+ colors = [(0.0, 0.7, 0.0), (1.0, 1.0, 0.0), (1.0, 0.0, 0.0)] # green -> yellow -> red
138
+ cmap = LinearSegmentedColormap.from_list('email_cmap', colors, N=100)
139
+ gauge_color = cmap(probability / 100)
140
+
141
+ # Draw the gauge
142
+ ax1.plot(r * np.cos(theta), r * np.sin(theta), color='gray', linewidth=3)
143
+
144
+ # Calculate the position for the needle
145
+ needle_theta = np.pi * probability / 100
146
+ ax1.plot([0, r * np.cos(needle_theta)], [0, r * np.sin(needle_theta)], color='black', linewidth=4)
147
+
148
+ # Draw colored arc for the current probability
149
+ theta_prob = np.linspace(0, needle_theta, 100)
150
+ ax1.fill_between(r * np.cos(theta_prob), 0, r * np.sin(theta_prob), color=gauge_color, alpha=0.7)
151
+
152
+ # Add probability text
153
+ ax1.text(0, -0.2, f"{probability:.1f}% Email-able", ha='center', fontsize=24, fontweight='bold')
154
+
155
+ # Add labels
156
+ ax1.text(-1, 0.1, "Meeting", fontsize=16)
157
+ ax1.text(1, 0.1, "Email", fontsize=16)
158
+
159
+ # Decision text
160
+ if is_email:
161
+ decision_text = "VERDICT: This could have been an email!"
162
+ else:
163
+ decision_text = "VERDICT: This meeting seems necessary."
164
+
165
+ ax1.text(0, -0.4, decision_text, ha='center', fontsize=20,
166
+ fontweight='bold', color='red' if is_email else 'green')
167
+
168
+ # Set axis limits and remove ticks
169
+ ax1.set_xlim(-1.2, 1.2)
170
+ ax1.set_ylim(-0.5, 1.2)
171
+ ax1.axis('off')
172
+ ax1.set_title("Meeting Email-ability Meter", fontsize=18)
173
+
174
+ # Second chart: Wasted time visualization
175
+ labels = ['This Meeting', 'Annual Impact\n(if weekly)']
176
+ values = [wasted_minutes, wasted_minutes * 52] # Weekly for a year
177
+
178
+ ax2.bar(labels, values, color=['#ff9999', '#ff5555'])
179
+
180
+ # Add value labels on top of bars
181
+ for i, v in enumerate(values):
182
+ if i == 0:
183
+ ax2.text(i, v + 5, f"{v:.0f} person-minutes", ha='center', fontsize=14)
184
+ else:
185
+ hours = v / 60
186
+ days = hours / 8
187
+ ax2.text(i, v + 5, f"{hours:.0f} hours\n({days:.1f} workdays)", ha='center', fontsize=14)
188
+
189
+ ax2.set_title("Time Impact Analysis", fontsize=18)
190
+ ax2.set_ylabel("Wasted Time (person-minutes)", fontsize=14)
191
+
192
+ plt.tight_layout()
193
+
194
+ return fig, probability, is_email, wasted_minutes, wasted_workdays
195
+
196
+ # Create a personalized report
197
+ def generate_report(
198
+ meeting_type, duration, participants, presenter_talk, questions,
199
+ action_items, silence, topic_changes, slides, is_email, probability,
200
+ wasted_minutes, wasted_workdays
201
+ ):
202
+ if is_email:
203
+ title = "📧 THIS MEETING COULD HAVE BEEN AN EMAIL 📧"
204
+ color = "red"
205
+ else:
206
+ title = "✅ This meeting appears to be necessary"
207
+ color = "green"
208
+
209
+ report = f"""
210
+ <div style="font-family: Arial, sans-serif; padding: 20px; max-width: 800px; margin: 0 auto;">
211
+ <h1 style="color: {color}; text-align: center;">{title}</h1>
212
+
213
+ <div style="background-color: #f5f5f5; border-radius: 10px; padding: 20px; margin-top: 20px;">
214
+ <h2>Meeting Analysis</h2>
215
+ <p><strong>Meeting Type:</strong> {meeting_type}</p>
216
+ <p><strong>Duration:</strong> {duration} minutes</p>
217
+ <p><strong>Participants:</strong> {participants} people</p>
218
+ <p><strong>Email-ability Score:</strong> <span style="font-size: 1.2em; font-weight: bold;">{probability:.1f}%</span></p>
219
+ </div>
220
+
221
+ <div style="background-color: #fff3f3; border-radius: 10px; padding: 20px; margin-top: 20px;">
222
+ <h2>Economic Impact</h2>
223
+ <p><strong>Time Wasted in This Meeting:</strong> {wasted_minutes:.0f} person-minutes</p>
224
+ <p><strong>Equivalent Workdays:</strong> {wasted_workdays:.2f} days</p>
225
+ <p><strong>Annual Impact (if held weekly):</strong> {wasted_workdays * 52:.1f} workdays</p>
226
+ <p><strong>Estimated Annual Cost:</strong> ${wasted_minutes * 52 * 0.5:.0f}</p>
227
+ </div>
228
+ """
229
+
230
+ # Add recommendations based on the analysis
231
+ report += """
232
+ <div style="background-color: #f0f8ff; border-radius: 10px; padding: 20px; margin-top: 20px;">
233
+ <h2>Recommendations</h2>
234
+ """
235
+
236
+ if is_email:
237
+ report += """
238
+ <ul>
239
+ <li>Convert this meeting to an async email or Slack thread</li>
240
+ <li>If a meeting is necessary, reduce the participant count by 50%</li>
241
+ <li>Consider recording a 5-minute video update instead</li>
242
+ <li>Create a shared document for status updates</li>
243
+ </ul>
244
+ """
245
+ else:
246
+ report += """
247
+ <ul>
248
+ <li>This meeting seems justified, but consider reducing duration</li>
249
+ <li>Send an agenda in advance to increase focus</li>
250
+ <li>Use a timer to keep discussions on track</li>
251
+ <li>End with clear action items and owners</li>
252
+ </ul>
253
+ """
254
+
255
+ report += """
256
+ </div>
257
+
258
+ <div style="text-align: center; font-style: italic; margin-top: 30px; color: #666;">
259
+ <p>Analysis generated by the Meeting-That-Could-Have-Been-An-Email Detector</p>
260
+ <p>Results are for entertainment purposes. Actual productivity may vary.</p>
261
+ </div>
262
+ </div>
263
+ """
264
+
265
+ return report
266
+
267
+ # Generate dataset and train model when the app starts
268
+ print("Generating synthetic data and training model...")
269
+ df = generate_meeting_data()
270
+ model, features = train_model(df)
271
+
272
+ # Create Gradio interface
273
+ with gr.Blocks(title="Meeting Email Detector") as demo:
274
+ gr.Markdown(
275
+ """
276
+ # 📧 The Meeting-That-Could-Have-Been-An-Email Detector
277
+
278
+ Have you ever sat through a meeting thinking "this could have been an email"?
279
+ Now you can scientifically prove it! Enter your meeting details below to analyze
280
+ whether your meeting is necessary or could be replaced with an email.
281
+
282
+ *Note: This is a humor project using synthetic data. Results are meant to be entertaining, not prescriptive.*
283
+ """
284
+ )
285
+
286
+ with gr.Row():
287
+ with gr.Column():
288
+ meeting_type = gr.Dropdown(
289
+ choices=[
290
+ "Weekly Status Update", "Quarterly Planning", "Project Kickoff",
291
+ "Brainstorming Session", "Customer Feedback Review", "Budget Review",
292
+ "Team Building", "Product Demo", "Strategic Alignment", "Post-Mortem",
293
+ "OKR Review", "All-Hands", "Happy Hour Planning"
294
+ ],
295
+ label="Meeting Type",
296
+ value="Weekly Status Update"
297
+ )
298
+
299
+ duration = gr.Slider(
300
+ minimum=15, maximum=120, value=60, step=15,
301
+ label="Duration (minutes)"
302
+ )
303
+
304
+ participants = gr.Slider(
305
+ minimum=2, maximum=20, value=6, step=1,
306
+ label="Number of Participants"
307
+ )
308
+
309
+ presenter_talk = gr.Slider(
310
+ minimum=10, maximum=100, value=70, step=5,
311
+ label="Presenter Talk Percentage (%)"
312
+ )
313
+
314
+ questions = gr.Slider(
315
+ minimum=0, maximum=15, value=4, step=1,
316
+ label="Expected Questions from Audience"
317
+ )
318
+
319
+ with gr.Column():
320
+ action_items = gr.Slider(
321
+ minimum=0, maximum=10, value=3, step=1,
322
+ label="Actionable Items Expected"
323
+ )
324
+
325
+ silence = gr.Slider(
326
+ minimum=0, maximum=50, value=15, step=5,
327
+ label="Expected Silence/Awkward Pauses (%)"
328
+ )
329
+
330
+ topic_changes = gr.Slider(
331
+ minimum=1, maximum=15, value=4, step=1,
332
+ label="Number of Distinct Topics"
333
+ )
334
+
335
+ slides = gr.Slider(
336
+ minimum=0, maximum=50, value=10, step=1,
337
+ label="Number of Slides/Visual Aids"
338
+ )
339
+
340
+ analyze_btn = gr.Button("Analyze This Meeting", variant="primary")
341
+
342
+ with gr.Row():
343
+ with gr.Column():
344
+ result_plot = gr.Plot(label="Analysis Results")
345
+
346
+ with gr.Column():
347
+ with gr.Row():
348
+ email_score = gr.Number(label="Email-ability Score (%)")
349
+ is_email = gr.Checkbox(label="Could Be An Email?")
350
+
351
+ with gr.Row():
352
+ wasted_time = gr.Number(label="Time Wasted (person-minutes)")
353
+ wasted_days = gr.Number(label="Equivalent Workdays")
354
+
355
+ report_html = gr.HTML(label="Detailed Report")
356
+
357
+ analyze_btn.click(
358
+ fn=lambda *args: predict_meeting(*args) + (args[0],), # Include meeting_type in output
359
+ inputs=[
360
+ duration, participants, presenter_talk, questions,
361
+ action_items, silence, topic_changes, slides
362
+ ],
363
+ outputs=[result_plot, email_score, is_email, wasted_time, wasted_days]
364
+ ).then(
365
+ fn=generate_report,
366
+ inputs=[
367
+ meeting_type, duration, participants, presenter_talk, questions,
368
+ action_items, silence, topic_changes, slides, is_email, email_score,
369
+ wasted_time, wasted_days
370
+ ],
371
+ outputs=report_html
372
+ )
373
+
374
+ gr.Markdown(
375
+ """
376
+ ## How It Works
377
+
378
+ This tool uses a machine learning model trained on synthetic data representing thousands of meetings.
379
+ The model analyzes meeting characteristics to determine whether the meeting could be replaced with asynchronous communication.
380
+
381
+ Key factors that make a meeting "email-able":
382
+ - High presenter talk percentage (one-way information flow)
383
+ - Few questions from participants
384
+ - Few actionable outcomes
385
+ - Many participants relative to the decisions being made
386
+
387
+ ## About This Project
388
+
389
+ This is a humor project that pokes fun at corporate meeting culture. While the analysis uses real data science techniques,
390
+ the underlying data is synthetic. The tool is meant to be entertaining while making us think about how we use our time at work.
391
+
392
+ Created as a data science portfolio project to demonstrate data visualization, interactive web apps, and a bit of workplace humor.
393
+ """
394
+ )
395
+
396
+ # Launch the app
397
+ if __name__ == "__main__":
398
+ demo.launch()