Kuberwastaken commited on
Commit
2cadc3d
·
1 Parent(s): 00de4e8

Initial Commit

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. README.md +7 -10
  3. gradio_app.py +586 -0
  4. model/analyzer.py +224 -0
  5. requirements.txt +10 -0
  6. script_search_api.py +279 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ treat-r1
README.md CHANGED
@@ -1,13 +1,10 @@
1
  ---
2
- title: TREAT R1
3
- emoji: 👀
4
- colorFrom: green
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.13.1
8
- app_file: app.py
9
- pinned: false
10
- short_description: 'A DeepSeek R1 version of TREAT: an AI web application to Ana'
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: TREAT-R1
3
+ emoji: 🍩
4
+ colorFrom: black
5
+ colorTo: gray
6
  sdk: gradio
7
+ sdk_version: "5.11.0" # Replace with the correct version if different
8
+ app_file: gradio_app.py
9
+ pinned: true
 
10
  ---
 
 
gradio_app.py ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from model.analyzer import analyze_content
3
+ import asyncio
4
+ import time
5
+ import httpx
6
+ import subprocess
7
+ import atexit
8
+
9
+ # Start the API server
10
+ def start_api_server():
11
+ # Start uvicorn in a subprocess
12
+ process = subprocess.Popen(["uvicorn", "script_search_api:app", "--reload"])
13
+ return process
14
+
15
+ # Stop the API server
16
+ def stop_api_server(process):
17
+ process.terminate()
18
+
19
+ # Register the exit handler
20
+ api_process = start_api_server()
21
+ atexit.register(stop_api_server, api_process)
22
+
23
+
24
+ custom_css = """
25
+ * {
26
+ font-family: 'Inter', system-ui, sans-serif;
27
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
28
+ }
29
+
30
+ .gradio-container {
31
+ background: #0a0a0f !important;
32
+ color: #fff !important;
33
+ min-height: 100vh;
34
+ position: relative;
35
+ overflow: hidden;
36
+ }
37
+
38
+ /* Animated Background */
39
+ .gradio-container::before {
40
+ content: '';
41
+ position: fixed;
42
+ top: 0;
43
+ left: 0;
44
+ right: 0;
45
+ bottom: 0;
46
+ background:
47
+ linear-gradient(125deg,
48
+ #0a0a0f 0%,
49
+ rgba(99, 102, 241, 0.05) 30%,
50
+ rgba(99, 102, 241, 0.1) 50%,
51
+ rgba(99, 102, 241, 0.05) 70%,
52
+ #0a0a0f 100%);
53
+ animation: gradientMove 15s ease infinite;
54
+ background-size: 400% 400%;
55
+ z-index: 0;
56
+ }
57
+
58
+ /* Floating Particles */
59
+ .gradio-container::after {
60
+ content: '';
61
+ position: fixed;
62
+ top: 0;
63
+ left: 0;
64
+ width: 100%;
65
+ height: 100%;
66
+ background: radial-gradient(circle at center, transparent 0%, #0a0a0f 70%),
67
+ url("data:image/svg+xml,%3Csvg width='100' height='100' viewBox='0 0 100 100' xmlns='http://www.w3.org/2000/svg'%3E%3Ccircle cx='50' cy='50' r='1' fill='rgba(99, 102, 241, 0.15)'/%3E%3C/svg%3E");
68
+ opacity: 0.5;
69
+ animation: floatingParticles 20s linear infinite;
70
+ z-index: 1;
71
+ }
72
+
73
+ /* Futuristic Header */
74
+ .treat-title {
75
+ text-align: center;
76
+ padding: 3rem 1rem;
77
+ position: relative;
78
+ overflow: hidden;
79
+ z-index: 2;
80
+ background: linear-gradient(180deg,
81
+ rgba(99, 102, 241, 0.1),
82
+ transparent 70%);
83
+ }
84
+
85
+ .treat-title::before {
86
+ content: '';
87
+ position: absolute;
88
+ top: 0;
89
+ left: 50%;
90
+ width: 80%;
91
+ height: 1px;
92
+ background: linear-gradient(90deg,
93
+ transparent,
94
+ rgba(99, 102, 241, 0.5),
95
+ transparent);
96
+ transform: translateX(-50%);
97
+ animation: scanline 3s ease-in-out infinite;
98
+ }
99
+
100
+ .treat-title h1 {
101
+ font-size: 4.5rem;
102
+ font-weight: 800;
103
+ background: linear-gradient(135deg,
104
+ #2a2b55 0%,
105
+ #6366f1 50%,
106
+ #2a2b55 100%);
107
+ background-size: 200% auto;
108
+ -webkit-background-clip: text;
109
+ -webkit-text-fill-color: transparent;
110
+ margin-bottom: 0.5rem;
111
+ letter-spacing: -0.05em;
112
+ animation: gradientFlow 8s ease infinite;
113
+ position: relative;
114
+ }
115
+
116
+ .treat-title h1::after {
117
+ content: attr(data-text);
118
+ position: absolute;
119
+ left: 0;
120
+ top: 0;
121
+ width: 100%;
122
+ height: 100%;
123
+ background: linear-gradient(135deg,
124
+ transparent 0%,
125
+ rgba(99, 102, 241, 0.4) 50%,
126
+ transparent 100%);
127
+ background-size: 200% auto;
128
+ -webkit-background-clip: text;
129
+ -webkit-text-fill-color: transparent;
130
+ opacity: 0.5;
131
+ animation: textGlow 4s ease-in-out infinite;
132
+ }
133
+
134
+ .treat-title p {
135
+ font-size: 1.1rem;
136
+ color: rgba(255, 255, 255, 0.7);
137
+ max-width: 600px;
138
+ margin: 0 auto;
139
+ position: relative;
140
+ animation: fadeInUp 1s ease-out;
141
+ }
142
+
143
+ /* Tabs Styling */
144
+ .tabs {
145
+ background: rgba(17, 17, 27, 0.7);
146
+ border: 1px solid rgba(99, 102, 241, 0.2);
147
+ border-radius: 16px;
148
+ padding: 1rem;
149
+ margin: 0 1rem 2rem 1rem;
150
+ position: relative;
151
+ z-index: 2;
152
+ backdrop-filter: blur(10px);
153
+ box-shadow: 0 0 30px rgba(99, 102, 241, 0.1);
154
+ animation: floatIn 1s ease-out;
155
+ }
156
+
157
+ .tabs::before {
158
+ content: '';
159
+ position: absolute;
160
+ top: -1px;
161
+ left: -1px;
162
+ right: -1px;
163
+ bottom: -1px;
164
+ background: linear-gradient(45deg,
165
+ rgba(99, 102, 241, 0.1),
166
+ transparent,
167
+ rgba(99, 102, 241, 0.1));
168
+ border-radius: 16px;
169
+ z-index: -1;
170
+ animation: borderGlow 4s ease-in-out infinite;
171
+ }
172
+
173
+ /* Content Area */
174
+ .content-area {
175
+ background: rgba(17, 17, 27, 0.7) !important;
176
+ border: 1px solid rgba(99, 102, 241, 0.2) !important;
177
+ border-radius: 12px !important;
178
+ padding: 1.5rem !important;
179
+ backdrop-filter: blur(10px);
180
+ position: relative;
181
+ overflow: hidden;
182
+ animation: fadeScale 0.5s ease-out;
183
+ }
184
+
185
+ .content-area::before {
186
+ content: '';
187
+ position: absolute;
188
+ top: -50%;
189
+ left: -50%;
190
+ width: 200%;
191
+ height: 200%;
192
+ background: radial-gradient(circle at center,
193
+ rgba(99, 102, 241, 0.1) 0%,
194
+ transparent 70%);
195
+ animation: rotateGradient 10s linear infinite;
196
+ }
197
+
198
+ /* Input Fields */
199
+ .gradio-textbox textarea {
200
+ background: rgba(17, 17, 27, 0.6) !important;
201
+ border: 1px solid rgba(99, 102, 241, 0.3) !important;
202
+ border-radius: 8px !important;
203
+ color: rgba(255, 255, 255, 0.9) !important;
204
+ font-size: 0.95rem !important;
205
+ line-height: 1.6 !important;
206
+ padding: 1rem !important;
207
+ transition: all 0.3s ease;
208
+ position: relative;
209
+ z-index: 2;
210
+ }
211
+
212
+ .gradio-textbox textarea:focus {
213
+ border-color: #6366f1 !important;
214
+ box-shadow: 0 0 20px rgba(99, 102, 241, 0.2) !important;
215
+ background: rgba(17, 17, 27, 0.8) !important;
216
+ transform: translateY(-2px);
217
+ }
218
+
219
+ /* Buttons */
220
+ .gradio-button {
221
+ background: linear-gradient(45deg,
222
+ #6366f1,
223
+ #818cf8,
224
+ #6366f1) !important;
225
+ background-size: 200% auto !important;
226
+ border: none !important;
227
+ border-radius: 8px !important;
228
+ color: white !important;
229
+ font-weight: 600 !important;
230
+ font-size: 0.95rem !important;
231
+ padding: 0.75rem 1.5rem !important;
232
+ letter-spacing: 0.025em !important;
233
+ position: relative;
234
+ overflow: hidden;
235
+ transition: all 0.3s ease !important;
236
+ animation: gradientFlow 3s ease infinite;
237
+ }
238
+
239
+ .gradio-button::before {
240
+ content: '';
241
+ position: absolute;
242
+ top: -50%;
243
+ left: -50%;
244
+ width: 200%;
245
+ height: 200%;
246
+ background: radial-gradient(circle at center,
247
+ rgba(255, 255, 255, 0.2) 0%,
248
+ transparent 70%);
249
+ transform: scale(0);
250
+ transition: transform 0.5s ease;
251
+ }
252
+
253
+ .gradio-button:hover {
254
+ transform: translateY(-2px);
255
+ box-shadow: 0 5px 20px rgba(99, 102, 241, 0.4) !important;
256
+ }
257
+
258
+ .gradio-button:hover::before {
259
+ transform: scale(1);
260
+ }
261
+
262
+ /* Results Area */
263
+ .results-area {
264
+ background: rgba(17, 17, 27, 0.7) !important;
265
+ border: 1px solid rgba(99, 102, 241, 0.2) !important;
266
+ border-radius: 12px !important;
267
+ margin-top: 2rem !important;
268
+ backdrop-filter: blur(10px);
269
+ animation: slideUp 0.5s ease-out;
270
+ position: relative;
271
+ overflow: hidden;
272
+ }
273
+
274
+ .footer {
275
+ text-align: center;
276
+ padding: 2rem 0;
277
+ margin-top: 3rem;
278
+ font-size: 1.0rem;
279
+ position: relative;
280
+ z-index: 2;
281
+ }
282
+
283
+ .footer p {
284
+ color: rgba(255, 255, 255, 0.8);
285
+ display: flex;
286
+ align-items: center;
287
+ justify-content: center;
288
+ gap: 0.5rem;
289
+ }
290
+
291
+ .footer .heart {
292
+ color: #6366f1;
293
+ display: inline-block;
294
+ position: relative;
295
+ font-size: 1.0rem;
296
+ transform-origin: center;
297
+ animation: heartbeat 1.5s ease infinite;
298
+ }
299
+
300
+ .footer .heart::before,
301
+ .footer .heart::after {
302
+ content: '✦';
303
+ position: absolute;
304
+ opacity: 0;
305
+ font-size: 0.6rem;
306
+ animation: sparkle 1.5s ease infinite;
307
+ }
308
+
309
+ .footer .heart::before {
310
+ top: -8px;
311
+ left: -8px;
312
+ animation-delay: 0.2s;
313
+ }
314
+
315
+ .footer .heart::after {
316
+ top: -8px;
317
+ right: -8px;
318
+ animation-delay: 0.4s;
319
+ }
320
+
321
+ .footer .name {
322
+ color: #6366f1;
323
+ text-decoration: none;
324
+ position: relative;
325
+ transition: all 0.3s ease;
326
+ padding: 0 4px;
327
+ }
328
+
329
+ .footer .name:hover {
330
+ color: #818cf8;
331
+ }
332
+
333
+ footer {
334
+ visibility: hidden;
335
+ }
336
+
337
+ /* Animations */
338
+ @keyframes gradientMove {
339
+ 0% { background-position: 0% 50%; }
340
+ 50% { background-position: 100% 50%; }
341
+ 100% { background-position: 0% 50%; }
342
+ }
343
+
344
+ @keyframes floatingParticles {
345
+ 0% { transform: translateY(0); }
346
+ 100% { transform: translateY(-100%); }
347
+ }
348
+
349
+ @keyframes scanline {
350
+ 0% { transform: translateX(-150%) scaleX(0.5); opacity: 0; }
351
+ 50% { transform: translateX(-50%) scaleX(1); opacity: 1; }
352
+ 100% { transform: translateX(50%) scaleX(0.5); opacity: 0; }
353
+ }
354
+
355
+ @keyframes gradientFlow {
356
+ 0% { background-position: 0% 50%; }
357
+ 50% { background-position: 100% 50%; }
358
+ 100% { background-position: 0% 50%; }
359
+ }
360
+
361
+ @keyframes textGlow {
362
+ 0% { opacity: 0.3; transform: scale(1); }
363
+ 50% { opacity: 0.5; transform: scale(1.02); }
364
+ 100% { opacity: 0.3; transform: scale(1); }
365
+ }
366
+
367
+ @keyframes borderGlow {
368
+ 0% { opacity: 0.5; }
369
+ 50% { opacity: 1; }
370
+ 100% { opacity: 0.5; }
371
+ }
372
+
373
+ @keyframes rotateGradient {
374
+ 0% { transform: rotate(0deg); }
375
+ 100% { transform: rotate(360deg); }
376
+ }
377
+
378
+ @keyframes fadeScale {
379
+ 0% { opacity: 0; transform: scale(0.95); }
380
+ 100% { opacity: 1; transform: scale(1); }
381
+ }
382
+
383
+ @keyframes slideUp {
384
+ 0% { opacity: 0; transform: translateY(20px); }
385
+ 100% { opacity: 1; transform: translateY(0); }
386
+ }
387
+
388
+ @keyframes floatIn {
389
+ 0% { opacity: 0; transform: translateY(20px); }
390
+ 100% { opacity: 1; transform: translateY(0); }
391
+ }
392
+
393
+ @keyframes fadeInUp {
394
+ 0% { opacity: 0; transform: translateY(10px); }
395
+ 100% { opacity: 1; transform: translateY(0); }
396
+ }
397
+
398
+ @keyframes heartbeat {
399
+ 0% { transform: scale(1); }
400
+ 10% { transform: scale(1.2); }
401
+ 20% { transform: scale(0.9); }
402
+ 30% { transform: scale(1.1); }
403
+ 40% { transform: scale(0.95); }
404
+ 50% { transform: scale(1); }
405
+ 100% { transform: scale(1); }
406
+ }
407
+
408
+ @keyframes sparkle {
409
+ 0% { transform: scale(0); opacity: 0; }
410
+ 50% { transform: scale(1.2); opacity: 1; }
411
+ 100% { transform: scale(0); opacity: 0; }
412
+ }
413
+ """
414
+ # Start the API server
415
+ def start_api_server():
416
+ # Start uvicorn in a subprocess
417
+ process = subprocess.Popen(["uvicorn", "script_search_api:app", "--reload"])
418
+ return process
419
+
420
+ # Stop the API server
421
+ def stop_api_server(process):
422
+ process.terminate()
423
+
424
+ # Register the exit handler
425
+ api_process = start_api_server()
426
+ atexit.register(stop_api_server, api_process)
427
+
428
+ async def analyze_with_progress(movie_name, progress=gr.Progress()):
429
+ """Handle analysis with progress updates in Gradio"""
430
+ try:
431
+ async with httpx.AsyncClient(timeout=60.0) as client:
432
+ # Start the analysis
433
+ response = await client.get(
434
+ "http://localhost:8000/api/start_analysis",
435
+ params={"movie_name": movie_name}
436
+ )
437
+ response.raise_for_status()
438
+ task_id = response.json()["task_id"]
439
+
440
+ # Poll for progress
441
+ while True:
442
+ progress_response = await client.get(
443
+ f"http://localhost:8000/api/progress/{task_id}"
444
+ )
445
+ progress_response.raise_for_status()
446
+ status = progress_response.json()
447
+
448
+ # Update Gradio progress
449
+ progress(status["progress"], desc=status["status"])
450
+
451
+ if status["is_complete"]:
452
+ if status["error"]:
453
+ return f"Error: {status['error']}"
454
+ elif status["result"]:
455
+ triggers = status["result"].get("detected_triggers", [])
456
+ if not triggers or triggers == ["None"]:
457
+ return "✓ No triggers detected in the content."
458
+ else:
459
+ trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
460
+ return f"⚠ Triggers Detected:\n{trigger_list}"
461
+ break
462
+
463
+ await asyncio.sleep(0.5)
464
+
465
+ except Exception as e:
466
+ return f"Error: {str(e)}"
467
+
468
+ def analyze_with_loading(text, progress=gr.Progress()):
469
+ """
470
+ Synchronous wrapper for the async analyze_content function with smooth progress updates
471
+ """
472
+ # Initialize progress
473
+ progress(0, desc="Starting analysis...")
474
+
475
+ # Initial setup phase - smoother progression
476
+ for i in range(25):
477
+ time.sleep(0.04) # Slightly longer sleep for smoother animation
478
+ progress((i + 1) / 100, desc="Initializing analysis...")
479
+
480
+ # Pre-processing phase
481
+ for i in range(25, 45):
482
+ time.sleep(0.03)
483
+ progress((i + 1) / 100, desc="Pre-processing content...")
484
+
485
+ # Perform analysis
486
+ progress(0.45, desc="Analyzing content...")
487
+ try:
488
+ result = asyncio.run(analyze_content(text))
489
+
490
+ # Analysis progress simulation
491
+ for i in range(45, 75):
492
+ time.sleep(0.03)
493
+ progress((i + 1) / 100, desc="Processing results...")
494
+
495
+ except Exception as e:
496
+ return f"Error during analysis: {str(e)}"
497
+
498
+ # Final processing with smooth progression
499
+ for i in range(75, 100):
500
+ time.sleep(0.02)
501
+ progress((i + 1) / 100, desc="Finalizing results...")
502
+
503
+ # Format the results
504
+ triggers = result["detected_triggers"]
505
+ if triggers == ["None"]:
506
+ return "✓ No triggers detected in the content."
507
+ else:
508
+ trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
509
+ return f"⚠ Triggers Detected:\n{trigger_list}"
510
+
511
+ # Update the Gradio interface with new styling
512
+ import gradio as gr
513
+ from model.analyzer import analyze_content
514
+ import asyncio
515
+ import time
516
+ import httpx
517
+ import subprocess
518
+ import atexit
519
+
520
+ # Keep your existing CSS and server setup code...
521
+ # [Previous code until the interface definition remains the same]
522
+
523
+ # Update the Gradio interface with fixed button handling
524
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
525
+ # Title section
526
+ gr.HTML("""
527
+ <div class="treat-title">
528
+ <h1 data-text="TREAT">TREAT</h1>
529
+ <p>Trigger Recognition for Enjoyable and Appropriate Television</p>
530
+ </div>
531
+ """)
532
+
533
+ with gr.Tabs() as tabs:
534
+ with gr.Tab("Content Analysis"): # Changed from TabItem to Tab
535
+ with gr.Column():
536
+ input_text = gr.Textbox(
537
+ label="ANALYZE CONTENT",
538
+ placeholder="Enter the content you want to analyze...",
539
+ lines=8
540
+ )
541
+ analyze_btn = gr.Button("✨ Analyze")
542
+
543
+ with gr.Tab("Movie Search"): # Changed from TabItem to Tab
544
+ with gr.Column():
545
+ search_query = gr.Textbox(
546
+ label="SEARCH MOVIES",
547
+ placeholder="Type a movie title to search...",
548
+ lines=1
549
+ )
550
+ search_button = gr.Button("🔍 Search")
551
+
552
+ output_text = gr.Textbox(
553
+ label="ANALYSIS RESULTS",
554
+ lines=5,
555
+ interactive=False
556
+ )
557
+
558
+ status_text = gr.Markdown(
559
+ value=""
560
+ )
561
+
562
+ # Define click events
563
+ analyze_btn.click(
564
+ fn=analyze_with_loading,
565
+ inputs=input_text,
566
+ outputs=output_text
567
+ )
568
+
569
+ search_button.click(
570
+ fn=analyze_with_progress,
571
+ inputs=search_query,
572
+ outputs=output_text
573
+ )
574
+
575
+ gr.HTML("""
576
+ <div class="footer">
577
+ <p>Made with <span class="heart">💖</span> by <a href="https://www.linkedin.com/in/kubermehta/" target="_blank">Kuber Mehta</a></p>
578
+ </div>
579
+ """)
580
+
581
+ if __name__ == "__main__":
582
+ iface.launch(
583
+ share=False,
584
+ debug=True,
585
+ show_error=True
586
+ )
model/analyzer.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ from datetime import datetime
5
+ import gradio as gr
6
+ from typing import Dict, List, Union, Optional
7
+ import logging
8
+ import traceback
9
+
10
+ # Configure logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class ContentAnalyzer:
15
+ def __init__(self):
16
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ self.model = None
18
+ self.tokenizer = None
19
+ logger.info(f"Initialized analyzer with device: {self.device}")
20
+
21
+ async def load_model(self, progress=None) -> None:
22
+ """Load the model and tokenizer with progress updates and detailed logging."""
23
+ try:
24
+ print("\n=== Starting Model Loading ===")
25
+ print(f"Time: {datetime.now()}")
26
+
27
+ if progress:
28
+ progress(0.1, "Loading tokenizer...")
29
+
30
+ print("Loading tokenizer...")
31
+ self.tokenizer = AutoTokenizer.from_pretrained(
32
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
33
+ use_fast=True
34
+ )
35
+
36
+ if progress:
37
+ progress(0.3, "Loading model...")
38
+
39
+ print(f"Loading model on {self.device}...")
40
+ self.model = AutoModelForCausalLM.from_pretrained(
41
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
42
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
43
+ device_map="auto"
44
+ )
45
+
46
+ if progress:
47
+ progress(0.5, "Model loaded successfully")
48
+
49
+ print("Model and tokenizer loaded successfully")
50
+ logger.info(f"Model loaded successfully on {self.device}")
51
+ except Exception as e:
52
+ logger.error(f"Error loading model: {str(e)}")
53
+ print(f"\nERROR DURING MODEL LOADING: {str(e)}")
54
+ print("Stack trace:")
55
+ traceback.print_exc()
56
+ raise
57
+
58
+ def _chunk_text(self, text: str, chunk_size: int = 2048, overlap: int = 256) -> List[str]:
59
+ """Split text into overlapping chunks for processing."""
60
+ chunks = []
61
+ for i in range(0, len(text), chunk_size - overlap):
62
+ chunk = text[i:i + chunk_size]
63
+ chunks.append(chunk)
64
+ print(f"Split text into {len(chunks)} chunks with {overlap} token overlap")
65
+ return chunks
66
+
67
+ async def analyze_chunk(
68
+ self,
69
+ chunk: str,
70
+ progress: Optional[gr.Progress] = None,
71
+ current_progress: float = 0,
72
+ progress_step: float = 0
73
+ ) -> List[str]:
74
+ """Analyze a single chunk of text for triggers with detailed logging."""
75
+ print(f"\n--- Processing Chunk ---")
76
+ print(f"Chunk text (preview): {chunk[:50]}...")
77
+
78
+ # Comprehensive trigger categories
79
+ categories = [
80
+ "Violence", "Death", "Substance Use", "Gore",
81
+ "Vomit", "Sexual Content", "Sexual Abuse",
82
+ "Self-Harm", "Gun Use", "Animal Cruelty",
83
+ "Mental Health Issues"
84
+ ]
85
+
86
+ # Comprehensive prompt for single-pass analysis
87
+ prompt = f"""Comprehensive Content Sensitivity Analysis
88
+
89
+ Carefully analyze the following text for sensitive content categories:
90
+ {', '.join(categories)}
91
+
92
+ Detailed Requirements:
93
+ 1. Thoroughly examine entire text chunk
94
+ 2. Identify presence of ANY of these categories
95
+ 3. Provide clear, objective assessment
96
+ 4. Minimal subjective interpretation
97
+
98
+ TEXT CHUNK:
99
+ {chunk}
100
+
101
+ RESPONSE FORMAT:
102
+ - List categories DEFINITIVELY present
103
+ - Brief objective justification for each
104
+ - Strict YES/NO categorization"""
105
+
106
+ try:
107
+ print("Sending prompt to model...")
108
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096)
109
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
110
+
111
+ with torch.no_grad():
112
+ print("Generating response...")
113
+ outputs = self.model.generate(
114
+ **inputs,
115
+ max_new_tokens=256,
116
+ do_sample=True,
117
+ temperature=0.2,
118
+ top_p=0.9,
119
+ pad_token_id=self.tokenizer.eos_token_id
120
+ )
121
+
122
+ response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
123
+ print("Full Model Response:", response_text)
124
+
125
+ # Parse detected triggers
126
+ detected_triggers = []
127
+ for category in categories:
128
+ if category.upper() in response_text.upper():
129
+ detected_triggers.append(category)
130
+
131
+ print(f"Detected triggers in chunk: {detected_triggers}")
132
+
133
+ if progress:
134
+ current_progress += progress_step
135
+ progress(min(current_progress, 0.9), "Analyzing chunk...")
136
+
137
+ return detected_triggers
138
+
139
+ except Exception as e:
140
+ logger.error(f"Error analyzing chunk: {str(e)}")
141
+ print(f"Error during chunk analysis: {str(e)}")
142
+ traceback.print_exc()
143
+ return []
144
+
145
+ async def analyze_script(self, script: str, progress: Optional[gr.Progress] = None) -> List[str]:
146
+ """Analyze the entire script for triggers with progress updates."""
147
+ print("\n=== Starting Script Analysis ===")
148
+ print(f"Time: {datetime.now()}")
149
+
150
+ if not self.model or not self.tokenizer:
151
+ await self.load_model(progress)
152
+
153
+ chunks = self._chunk_text(script)
154
+ identified_triggers = set()
155
+ progress_step = 0.4 / len(chunks)
156
+ current_progress = 0.5 # Starting after model loading
157
+
158
+ for chunk_idx, chunk in enumerate(chunks, 1):
159
+ chunk_triggers = await self.analyze_chunk(
160
+ chunk,
161
+ progress,
162
+ current_progress,
163
+ progress_step
164
+ )
165
+ identified_triggers.update(chunk_triggers)
166
+
167
+ if progress:
168
+ progress(0.95, "Finalizing results...")
169
+
170
+ final_triggers = list(identified_triggers)
171
+ print("\n=== Analysis Complete ===")
172
+ print("Final Results:", final_triggers)
173
+
174
+ return final_triggers if final_triggers else ["None"]
175
+
176
+ async def analyze_content(
177
+ script: str,
178
+ progress: Optional[gr.Progress] = None
179
+ ) -> Dict[str, Union[List[str], str]]:
180
+ """Main analysis function for the Gradio interface."""
181
+ print("\n=== Starting Content Analysis ===")
182
+ print(f"Time: {datetime.now()}")
183
+
184
+ analyzer = ContentAnalyzer()
185
+
186
+ try:
187
+ triggers = await analyzer.analyze_script(script, progress)
188
+
189
+ if progress:
190
+ progress(1.0, "Analysis complete!")
191
+
192
+ result = {
193
+ "detected_triggers": triggers,
194
+ "confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
195
+ "model": "DeepSeek-R1-Distill-Qwen-1.5B",
196
+ "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
197
+ }
198
+
199
+ print("\nFinal Result Dictionary:", result)
200
+ return result
201
+
202
+ except Exception as e:
203
+ logger.error(f"Analysis error: {str(e)}")
204
+ print(f"\nERROR OCCURRED: {str(e)}")
205
+ print("Stack trace:")
206
+ traceback.print_exc()
207
+ return {
208
+ "detected_triggers": ["Error occurred during analysis"],
209
+ "confidence": "Error",
210
+ "model": "DeepSeek-R1-Distill-Qwen-1.5B",
211
+ "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
212
+ "error": str(e)
213
+ }
214
+
215
+ if __name__ == "__main__":
216
+ # Gradio interface
217
+ iface = gr.Interface(
218
+ fn=analyze_content,
219
+ inputs=gr.Textbox(lines=8, label="Input Text"),
220
+ outputs=gr.JSON(),
221
+ title="Content Sensitivity Analysis",
222
+ description="Analyze text content for sensitive topics using DeepSeek R1"
223
+ )
224
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ flask_cors
3
+ torch
4
+ gradio
5
+ transformers
6
+ accelerate
7
+ safetensors
8
+ huggingface-hub
9
+ beautifulsoup4
10
+ fastapi
script_search_api.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # script_search_api.py
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ import asyncio
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, Optional
7
+ from pydantic import BaseModel
8
+ from dataclasses import dataclass
9
+ import logging
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
+ from difflib import get_close_matches
13
+ from model.analyzer import analyze_content
14
+
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ app = FastAPI()
19
+
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=["*"],
23
+ allow_credentials=True,
24
+ allow_methods=["*"],
25
+ allow_headers=["*"],
26
+ )
27
+
28
+ @dataclass
29
+ class ProgressState:
30
+ progress: float
31
+ status: str
32
+ timestamp: datetime
33
+ task_id: str
34
+ is_complete: bool = False
35
+ result: Optional[dict] = None
36
+ error: Optional[str] = None
37
+
38
+ class ProgressResponse(BaseModel):
39
+ progress: float
40
+ status: str
41
+ is_complete: bool
42
+ result: Optional[dict] = None
43
+ error: Optional[str] = None
44
+
45
+ # Global progress tracker
46
+ progress_tracker: Dict[str, ProgressState] = {}
47
+
48
+ BASE_URL = "https://imsdb.com"
49
+ ALL_SCRIPTS_URL = f"{BASE_URL}/all-scripts.html"
50
+
51
+ def create_task_id(movie_name: str) -> str:
52
+ """Create a unique task ID for a movie analysis request"""
53
+ return f"{movie_name}-{datetime.now().timestamp()}"
54
+
55
+ async def cleanup_old_tasks():
56
+ """Remove tasks older than 1 hour"""
57
+ while True:
58
+ current_time = datetime.now()
59
+ expired_tasks = [
60
+ task_id for task_id, state in progress_tracker.items()
61
+ if current_time - state.timestamp > timedelta(hours=1)
62
+ ]
63
+ for task_id in expired_tasks:
64
+ del progress_tracker[task_id]
65
+ await asyncio.sleep(300) # Cleanup every 5 minutes
66
+
67
+ @app.on_event("startup")
68
+ async def startup_event():
69
+ """Initialize the server and start cleanup task"""
70
+ progress_tracker.clear()
71
+ asyncio.create_task(cleanup_old_tasks())
72
+ logger.info("Server started, progress tracker initialized")
73
+
74
+ def update_progress(task_id: str, progress: float, status: str, result: Optional[dict] = None, error: Optional[str] = None):
75
+ """Update progress state for a task"""
76
+ is_complete = progress >= 1.0
77
+ progress_tracker[task_id] = ProgressState(
78
+ progress=progress,
79
+ status=status,
80
+ timestamp=datetime.now(),
81
+ task_id=task_id,
82
+ is_complete=is_complete,
83
+ result=result,
84
+ error=error
85
+ )
86
+ logger.info(f"Task {task_id}: {status} (Progress: {progress * 100:.0f}%)")
87
+
88
+ @app.get("/api/start_analysis")
89
+ async def start_analysis(movie_name: str):
90
+ """Start a new analysis task"""
91
+ task_id = create_task_id(movie_name)
92
+ update_progress(task_id, 0.0, "Starting analysis...")
93
+
94
+ # Start the analysis task in the background
95
+ asyncio.create_task(run_analysis(task_id, movie_name))
96
+
97
+ return {"task_id": task_id}
98
+
99
+ @app.get("/api/progress/{task_id}")
100
+ async def get_progress(task_id: str) -> ProgressResponse:
101
+ """Get current progress for a task"""
102
+ if task_id not in progress_tracker:
103
+ raise HTTPException(status_code=404, detail="Task not found")
104
+
105
+ state = progress_tracker[task_id]
106
+ return ProgressResponse(
107
+ progress=state.progress,
108
+ status=state.status,
109
+ is_complete=state.is_complete,
110
+ result=state.result,
111
+ error=state.error
112
+ )
113
+
114
+ def find_movie_link(movie_name: str, soup: BeautifulSoup) -> str | None:
115
+ """Find the closest matching movie link from the script database."""
116
+ movie_links = {link.text.strip().lower(): link['href'] for link in soup.find_all('a', href=True)}
117
+ close_matches = get_close_matches(movie_name.lower(), movie_links.keys(), n=1, cutoff=0.6)
118
+
119
+ if close_matches:
120
+ logger.info(f"Close match found: {close_matches[0]}")
121
+ return BASE_URL + movie_links[close_matches[0]]
122
+
123
+ logger.info("No close match found.")
124
+ return None
125
+
126
+ def find_script_link(soup: BeautifulSoup, movie_name: str) -> str | None:
127
+ """Find the script download link for a given movie."""
128
+ patterns = [
129
+ f'Read "{movie_name}" Script',
130
+ f'Read "{movie_name.title()}" Script',
131
+ f'Read "{movie_name.upper()}" Script',
132
+ f'Read "{movie_name.lower()}" Script'
133
+ ]
134
+
135
+ for link in soup.find_all('a', href=True):
136
+ link_text = link.text.strip()
137
+ if any(pattern.lower() in link_text.lower() for pattern in patterns):
138
+ return link['href']
139
+ elif all(word.lower() in link_text.lower() for word in ["Read", "Script", movie_name]):
140
+ return link['href']
141
+ return None
142
+
143
+ def fetch_script(movie_name: str) -> str | None:
144
+ """Fetch and extract the script content for a given movie."""
145
+ # Initial page load
146
+ update_progress(movie_name, 0.1, "Fetching the script database...")
147
+ try:
148
+ response = requests.get(ALL_SCRIPTS_URL)
149
+ response.raise_for_status()
150
+ except requests.RequestException as e:
151
+ logger.error(f"Failed to load the main page: {str(e)}")
152
+ return None
153
+
154
+ # Search for movie
155
+ update_progress(movie_name, 0.2, "Searching for the movie...")
156
+ soup = BeautifulSoup(response.text, 'html.parser')
157
+ movie_link = find_movie_link(movie_name, soup)
158
+
159
+ if not movie_link:
160
+ logger.error(f"Script for '{movie_name}' not found.")
161
+ return None
162
+
163
+ # Fetch movie page
164
+ update_progress(movie_name, 0.3, "Loading movie details...")
165
+ try:
166
+ response = requests.get(movie_link)
167
+ response.raise_for_status()
168
+ except requests.RequestException as e:
169
+ logger.error(f"Failed to load the movie page: {str(e)}")
170
+ return None
171
+
172
+ # Find script link
173
+ update_progress(movie_name, 0.4, "Locating script download...")
174
+ soup = BeautifulSoup(response.text, 'html.parser')
175
+ script_link = find_script_link(soup, movie_name)
176
+
177
+ if not script_link:
178
+ logger.error(f"Unable to find script link for '{movie_name}'.")
179
+ return None
180
+
181
+ # Fetch script content
182
+ script_page_url = BASE_URL + script_link
183
+ update_progress(movie_name, 0.5, "Downloading script content...")
184
+
185
+ try:
186
+ response = requests.get(script_page_url)
187
+ response.raise_for_status()
188
+ except requests.RequestException as e:
189
+ logger.error(f"Failed to load the script: {str(e)}")
190
+ return None
191
+
192
+ # Extract script text
193
+ update_progress(movie_name, 0.6, "Extracting script text...")
194
+ soup = BeautifulSoup(response.text, 'html.parser')
195
+ script_content = soup.find('pre')
196
+
197
+ if script_content:
198
+ update_progress(movie_name, 0.7, "Script extracted successfully")
199
+ return script_content.get_text()
200
+ else:
201
+ logger.error("Failed to extract script content.")
202
+ return None
203
+
204
+ async def run_analysis(task_id: str, movie_name: str):
205
+ """Run the actual analysis task"""
206
+ try:
207
+ # Fetch script
208
+ update_progress(task_id, 0.2, "Fetching script...")
209
+ script_text = fetch_script(movie_name)
210
+ if not script_text:
211
+ raise Exception("Script not found")
212
+
213
+ # Analyze content
214
+ update_progress(task_id, 0.6, "Analyzing content...")
215
+ result = await analyze_content(script_text)
216
+
217
+ # Complete
218
+ update_progress(task_id, 1.0, "Analysis complete", result=result)
219
+
220
+ except Exception as e:
221
+ logger.error(f"Error in analysis: {str(e)}", exc_info=True)
222
+ update_progress(task_id, 1.0, "Error occurred", error=str(e))
223
+
224
+ @app.get("/api/fetch_and_analyze")
225
+ async def fetch_and_analyze(movie_name: str):
226
+ """Fetch and analyze a movie script, with progress tracking."""
227
+ try:
228
+ # Initialize progress
229
+ task_id = create_task_id(movie_name)
230
+ update_progress(task_id, 0.0, "Starting script search...")
231
+
232
+ # Fetch script
233
+ script_text = fetch_script(movie_name)
234
+ if not script_text:
235
+ raise HTTPException(status_code=404, detail="Script not found or error occurred")
236
+
237
+ # Analyze content
238
+ update_progress(task_id, 0.8, "Analyzing script content...")
239
+ result = await analyze_content(script_text)
240
+
241
+ # Finalize
242
+ update_progress(task_id, 1.0, "Analysis complete!")
243
+ return result
244
+
245
+ except Exception as e:
246
+ logger.error(f"Error in fetch_and_analyze: {str(e)}", exc_info=True)
247
+ # Clean up progress tracker in case of error
248
+ if movie_name in progress_tracker:
249
+ del progress_tracker[movie_name]
250
+ raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
251
+
252
+ @app.get("/api/progress")
253
+ def get_progress(movie_name: str):
254
+ """Get the current progress and status for a movie analysis."""
255
+ if movie_name not in progress_tracker:
256
+ return {
257
+ "progress": 0,
258
+ "status": "Waiting to start..."
259
+ }
260
+
261
+ progress_info = progress_tracker[movie_name]
262
+
263
+ # Clean up old entries (optional)
264
+ current_time = datetime.now()
265
+ if (current_time - progress_info.timestamp).total_seconds() > 3600: # 1 hour timeout
266
+ del progress_tracker[movie_name]
267
+ return {
268
+ "progress": 0,
269
+ "status": "Session expired. Please try again."
270
+ }
271
+
272
+ return {
273
+ "progress": progress_info.progress,
274
+ "status": progress_info.status
275
+ }
276
+
277
+ if __name__ == "__main__":
278
+ import uvicorn
279
+ uvicorn.run(app, host="0.0.0.0", port=8000)