Spaces:

Kuberwastaken
/

TREAT-R1

Running

App Files Files Community

Kuberwastaken commited on Jan 26

Commit

2cadc3d

1 Parent(s): 00de4e8

Initial Commit

Browse files

Files changed (6) hide show

.gitignore +1 -0
README.md +7 -10
gradio_app.py +586 -0
model/analyzer.py +224 -0
requirements.txt +10 -0
script_search_api.py +279 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ treat-r1

README.md CHANGED Viewed

@@ -1,13 +1,10 @@
 ---
-title: TREAT R1
-emoji: 👀
-colorFrom: green
-colorTo: yellow
 sdk: gradio
-sdk_version: 5.13.1
-app_file: app.py
-pinned: false
-short_description: 'A DeepSeek R1 version of TREAT: an AI web application to Ana'
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: TREAT-R1
+emoji: 🍩
+colorFrom: black
+colorTo: gray
 sdk: gradio
+sdk_version: "5.11.0"  # Replace with the correct version if different
+app_file: gradio_app.py
+pinned: true
 ---

gradio_app.py ADDED Viewed

	@@ -0,0 +1,586 @@

+import gradio as gr
+from model.analyzer import analyze_content
+import asyncio
+import time
+import httpx
+import subprocess
+import atexit
+# Start the API server
+def start_api_server():
+    # Start uvicorn in a subprocess
+    process = subprocess.Popen(["uvicorn", "script_search_api:app", "--reload"])
+    return process
+# Stop the API server
+def stop_api_server(process):
+    process.terminate()
+# Register the exit handler
+api_process = start_api_server()
+atexit.register(stop_api_server, api_process)
+custom_css = """
+* {
+    font-family: 'Inter', system-ui, sans-serif;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+}
+.gradio-container {
+    background: #0a0a0f !important;
+    color: #fff !important;
+    min-height: 100vh;
+    position: relative;
+    overflow: hidden;
+}
+/* Animated Background */
+.gradio-container::before {
+    content: '';
+    position: fixed;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background:
+        linear-gradient(125deg,
+            #0a0a0f 0%,
+            rgba(99, 102, 241, 0.05) 30%,
+            rgba(99, 102, 241, 0.1) 50%,
+            rgba(99, 102, 241, 0.05) 70%,
+            #0a0a0f 100%);
+    animation: gradientMove 15s ease infinite;
+    background-size: 400% 400%;
+    z-index: 0;
+}
+/* Floating Particles */
+.gradio-container::after {
+    content: '';
+    position: fixed;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background: radial-gradient(circle at center, transparent 0%, #0a0a0f 70%),
+                url("data:image/svg+xml,%3Csvg width='100' height='100' viewBox='0 0 100 100' xmlns='http://www.w3.org/2000/svg'%3E%3Ccircle cx='50' cy='50' r='1' fill='rgba(99, 102, 241, 0.15)'/%3E%3C/svg%3E");
+    opacity: 0.5;
+    animation: floatingParticles 20s linear infinite;
+    z-index: 1;
+}
+/* Futuristic Header */
+.treat-title {
+    text-align: center;
+    padding: 3rem 1rem;
+    position: relative;
+    overflow: hidden;
+    z-index: 2;
+    background: linear-gradient(180deg,
+        rgba(99, 102, 241, 0.1),
+        transparent 70%);
+}
+.treat-title::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 50%;
+    width: 80%;
+    height: 1px;
+    background: linear-gradient(90deg,
+        transparent,
+        rgba(99, 102, 241, 0.5),
+        transparent);
+    transform: translateX(-50%);
+    animation: scanline 3s ease-in-out infinite;
+}
+.treat-title h1 {
+    font-size: 4.5rem;
+    font-weight: 800;
+    background: linear-gradient(135deg,
+        #2a2b55 0%,
+        #6366f1 50%,
+        #2a2b55 100%);
+    background-size: 200% auto;
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    margin-bottom: 0.5rem;
+    letter-spacing: -0.05em;
+    animation: gradientFlow 8s ease infinite;
+    position: relative;
+}
+.treat-title h1::after {
+    content: attr(data-text);
+    position: absolute;
+    left: 0;
+    top: 0;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(135deg,
+        transparent 0%,
+        rgba(99, 102, 241, 0.4) 50%,
+        transparent 100%);
+    background-size: 200% auto;
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    opacity: 0.5;
+    animation: textGlow 4s ease-in-out infinite;
+}
+.treat-title p {
+    font-size: 1.1rem;
+    color: rgba(255, 255, 255, 0.7);
+    max-width: 600px;
+    margin: 0 auto;
+    position: relative;
+    animation: fadeInUp 1s ease-out;
+}
+/* Tabs Styling */
+.tabs {
+    background: rgba(17, 17, 27, 0.7);
+    border: 1px solid rgba(99, 102, 241, 0.2);
+    border-radius: 16px;
+    padding: 1rem;
+    margin: 0 1rem 2rem 1rem;
+    position: relative;
+    z-index: 2;
+    backdrop-filter: blur(10px);
+    box-shadow: 0 0 30px rgba(99, 102, 241, 0.1);
+    animation: floatIn 1s ease-out;
+}
+.tabs::before {
+    content: '';
+    position: absolute;
+    top: -1px;
+    left: -1px;
+    right: -1px;
+    bottom: -1px;
+    background: linear-gradient(45deg,
+        rgba(99, 102, 241, 0.1),
+        transparent,
+        rgba(99, 102, 241, 0.1));
+    border-radius: 16px;
+    z-index: -1;
+    animation: borderGlow 4s ease-in-out infinite;
+}
+/* Content Area */
+.content-area {
+    background: rgba(17, 17, 27, 0.7) !important;
+    border: 1px solid rgba(99, 102, 241, 0.2) !important;
+    border-radius: 12px !important;
+    padding: 1.5rem !important;
+    backdrop-filter: blur(10px);
+    position: relative;
+    overflow: hidden;
+    animation: fadeScale 0.5s ease-out;
+}
+.content-area::before {
+    content: '';
+    position: absolute;
+    top: -50%;
+    left: -50%;
+    width: 200%;
+    height: 200%;
+    background: radial-gradient(circle at center,
+        rgba(99, 102, 241, 0.1) 0%,
+        transparent 70%);
+    animation: rotateGradient 10s linear infinite;
+}
+/* Input Fields */
+.gradio-textbox textarea {
+    background: rgba(17, 17, 27, 0.6) !important;
+    border: 1px solid rgba(99, 102, 241, 0.3) !important;
+    border-radius: 8px !important;
+    color: rgba(255, 255, 255, 0.9) !important;
+    font-size: 0.95rem !important;
+    line-height: 1.6 !important;
+    padding: 1rem !important;
+    transition: all 0.3s ease;
+    position: relative;
+    z-index: 2;
+}
+.gradio-textbox textarea:focus {
+    border-color: #6366f1 !important;
+    box-shadow: 0 0 20px rgba(99, 102, 241, 0.2) !important;
+    background: rgba(17, 17, 27, 0.8) !important;
+    transform: translateY(-2px);
+}
+/* Buttons */
+.gradio-button {
+    background: linear-gradient(45deg,
+        #6366f1,
+        #818cf8,
+        #6366f1) !important;
+    background-size: 200% auto !important;
+    border: none !important;
+    border-radius: 8px !important;
+    color: white !important;
+    font-weight: 600 !important;
+    font-size: 0.95rem !important;
+    padding: 0.75rem 1.5rem !important;
+    letter-spacing: 0.025em !important;
+    position: relative;
+    overflow: hidden;
+    transition: all 0.3s ease !important;
+    animation: gradientFlow 3s ease infinite;
+}
+.gradio-button::before {
+    content: '';
+    position: absolute;
+    top: -50%;
+    left: -50%;
+    width: 200%;
+    height: 200%;
+    background: radial-gradient(circle at center,
+        rgba(255, 255, 255, 0.2) 0%,
+        transparent 70%);
+    transform: scale(0);
+    transition: transform 0.5s ease;
+}
+.gradio-button:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 5px 20px rgba(99, 102, 241, 0.4) !important;
+}
+.gradio-button:hover::before {
+    transform: scale(1);
+}
+/* Results Area */
+.results-area {
+    background: rgba(17, 17, 27, 0.7) !important;
+    border: 1px solid rgba(99, 102, 241, 0.2) !important;
+    border-radius: 12px !important;
+    margin-top: 2rem !important;
+    backdrop-filter: blur(10px);
+    animation: slideUp 0.5s ease-out;
+    position: relative;
+    overflow: hidden;
+}
+.footer {
+    text-align: center;
+    padding: 2rem 0;
+    margin-top: 3rem;
+    font-size: 1.0rem;
+    position: relative;
+    z-index: 2;
+}
+.footer p {
+    color: rgba(255, 255, 255, 0.8);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 0.5rem;
+}
+.footer .heart {
+    color: #6366f1;
+    display: inline-block;
+    position: relative;
+    font-size: 1.0rem;
+    transform-origin: center;
+    animation: heartbeat 1.5s ease infinite;
+}
+.footer .heart::before,
+.footer .heart::after {
+    content: '✦';
+    position: absolute;
+    opacity: 0;
+    font-size: 0.6rem;
+    animation: sparkle 1.5s ease infinite;
+}
+.footer .heart::before {
+    top: -8px;
+    left: -8px;
+    animation-delay: 0.2s;
+}
+.footer .heart::after {
+    top: -8px;
+    right: -8px;
+    animation-delay: 0.4s;
+}
+.footer .name {
+    color: #6366f1;
+    text-decoration: none;
+    position: relative;
+    transition: all 0.3s ease;
+    padding: 0 4px;
+}
+.footer .name:hover {
+    color: #818cf8;
+}
+footer {
+    visibility: hidden;
+}
+/* Animations */
+@keyframes gradientMove {
+    0% { background-position: 0% 50%; }
+    50% { background-position: 100% 50%; }
+    100% { background-position: 0% 50%; }
+}
+@keyframes floatingParticles {
+    0% { transform: translateY(0); }
+    100% { transform: translateY(-100%); }
+}
+@keyframes scanline {
+    0% { transform: translateX(-150%) scaleX(0.5); opacity: 0; }
+    50% { transform: translateX(-50%) scaleX(1); opacity: 1; }
+    100% { transform: translateX(50%) scaleX(0.5); opacity: 0; }
+}
+@keyframes gradientFlow {
+    0% { background-position: 0% 50%; }
+    50% { background-position: 100% 50%; }
+    100% { background-position: 0% 50%; }
+}
+@keyframes textGlow {
+    0% { opacity: 0.3; transform: scale(1); }
+    50% { opacity: 0.5; transform: scale(1.02); }
+    100% { opacity: 0.3; transform: scale(1); }
+}
+@keyframes borderGlow {
+    0% { opacity: 0.5; }
+    50% { opacity: 1; }
+    100% { opacity: 0.5; }
+}
+@keyframes rotateGradient {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+@keyframes fadeScale {
+    0% { opacity: 0; transform: scale(0.95); }
+    100% { opacity: 1; transform: scale(1); }
+}
+@keyframes slideUp {
+    0% { opacity: 0; transform: translateY(20px); }
+    100% { opacity: 1; transform: translateY(0); }
+}
+@keyframes floatIn {
+    0% { opacity: 0; transform: translateY(20px); }
+    100% { opacity: 1; transform: translateY(0); }
+}
+@keyframes fadeInUp {
+    0% { opacity: 0; transform: translateY(10px); }
+    100% { opacity: 1; transform: translateY(0); }
+}
+@keyframes heartbeat {
+    0% { transform: scale(1); }
+    10% { transform: scale(1.2); }
+    20% { transform: scale(0.9); }
+    30% { transform: scale(1.1); }
+    40% { transform: scale(0.95); }
+    50% { transform: scale(1); }
+    100% { transform: scale(1); }
+}
+@keyframes sparkle {
+    0% { transform: scale(0); opacity: 0; }
+    50% { transform: scale(1.2); opacity: 1; }
+    100% { transform: scale(0); opacity: 0; }
+    }
+"""
+# Start the API server
+def start_api_server():
+    # Start uvicorn in a subprocess
+    process = subprocess.Popen(["uvicorn", "script_search_api:app", "--reload"])
+    return process
+# Stop the API server
+def stop_api_server(process):
+    process.terminate()
+# Register the exit handler
+api_process = start_api_server()
+atexit.register(stop_api_server, api_process)
+async def analyze_with_progress(movie_name, progress=gr.Progress()):
+    """Handle analysis with progress updates in Gradio"""
+    try:
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            # Start the analysis
+            response = await client.get(
+                "http://localhost:8000/api/start_analysis",
+                params={"movie_name": movie_name}
+            )
+            response.raise_for_status()
+            task_id = response.json()["task_id"]
+            # Poll for progress
+            while True:
+                progress_response = await client.get(
+                    f"http://localhost:8000/api/progress/{task_id}"
+                )
+                progress_response.raise_for_status()
+                status = progress_response.json()
+                # Update Gradio progress
+                progress(status["progress"], desc=status["status"])
+                if status["is_complete"]:
+                    if status["error"]:
+                        return f"Error: {status['error']}"
+                    elif status["result"]:
+                        triggers = status["result"].get("detected_triggers", [])
+                        if not triggers or triggers == ["None"]:
+                            return "✓ No triggers detected in the content."
+                        else:
+                            trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
+                            return f"⚠ Triggers Detected:\n{trigger_list}"
+                    break
+                await asyncio.sleep(0.5)
+    except Exception as e:
+        return f"Error: {str(e)}"
+def analyze_with_loading(text, progress=gr.Progress()):
+    """
+    Synchronous wrapper for the async analyze_content function with smooth progress updates
+    """
+    # Initialize progress
+    progress(0, desc="Starting analysis...")
+    # Initial setup phase - smoother progression
+    for i in range(25):
+        time.sleep(0.04)  # Slightly longer sleep for smoother animation
+        progress((i + 1) / 100, desc="Initializing analysis...")
+    # Pre-processing phase
+    for i in range(25, 45):
+        time.sleep(0.03)
+        progress((i + 1) / 100, desc="Pre-processing content...")
+    # Perform analysis
+    progress(0.45, desc="Analyzing content...")
+    try:
+        result = asyncio.run(analyze_content(text))
+        # Analysis progress simulation
+        for i in range(45, 75):
+            time.sleep(0.03)
+            progress((i + 1) / 100, desc="Processing results...")
+    except Exception as e:
+        return f"Error during analysis: {str(e)}"
+    # Final processing with smooth progression
+    for i in range(75, 100):
+        time.sleep(0.02)
+        progress((i + 1) / 100, desc="Finalizing results...")
+    # Format the results
+    triggers = result["detected_triggers"]
+    if triggers == ["None"]:
+        return "✓ No triggers detected in the content."
+    else:
+        trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
+        return f"⚠ Triggers Detected:\n{trigger_list}"
+# Update the Gradio interface with new styling
+import gradio as gr
+from model.analyzer import analyze_content
+import asyncio
+import time
+import httpx
+import subprocess
+import atexit
+# Keep your existing CSS and server setup code...
+# [Previous code until the interface definition remains the same]
+# Update the Gradio interface with fixed button handling
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
+    # Title section
+    gr.HTML("""
+        <div class="treat-title">
+            <h1 data-text="TREAT">TREAT</h1>
+            <p>Trigger Recognition for Enjoyable and Appropriate Television</p>
+        </div>
+    """)
+    with gr.Tabs() as tabs:
+        with gr.Tab("Content Analysis"):  # Changed from TabItem to Tab
+            with gr.Column():
+                input_text = gr.Textbox(
+                    label="ANALYZE CONTENT",
+                    placeholder="Enter the content you want to analyze...",
+                    lines=8
+                )
+                analyze_btn = gr.Button("✨ Analyze")
+        with gr.Tab("Movie Search"):  # Changed from TabItem to Tab
+            with gr.Column():
+                search_query = gr.Textbox(
+                    label="SEARCH MOVIES",
+                    placeholder="Type a movie title to search...",
+                    lines=1
+                )
+                search_button = gr.Button("🔍 Search")
+    output_text = gr.Textbox(
+        label="ANALYSIS RESULTS",
+        lines=5,
+        interactive=False
+    )
+    status_text = gr.Markdown(
+        value=""
+    )
+    # Define click events
+    analyze_btn.click(
+        fn=analyze_with_loading,
+        inputs=input_text,
+        outputs=output_text
+    )
+    search_button.click(
+        fn=analyze_with_progress,
+        inputs=search_query,
+        outputs=output_text
+    )
+    gr.HTML("""
+        <div class="footer">
+            <p>Made with <span class="heart">💖</span> by <a href="https://www.linkedin.com/in/kubermehta/" target="_blank">Kuber Mehta</a></p>
+        </div>
+    """)
+if __name__ == "__main__":
+    iface.launch(
+        share=False,
+        debug=True,
+        show_error=True
+    )

model/analyzer.py ADDED Viewed

	@@ -0,0 +1,224 @@

+import os
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from datetime import datetime
+import gradio as gr
+from typing import Dict, List, Union, Optional
+import logging
+import traceback
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ContentAnalyzer:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = None
+        self.tokenizer = None
+        logger.info(f"Initialized analyzer with device: {self.device}")
+    async def load_model(self, progress=None) -> None:
+        """Load the model and tokenizer with progress updates and detailed logging."""
+        try:
+            print("\n=== Starting Model Loading ===")
+            print(f"Time: {datetime.now()}")
+            if progress:
+                progress(0.1, "Loading tokenizer...")
+            print("Loading tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                use_fast=True
+            )
+            if progress:
+                progress(0.3, "Loading model...")
+            print(f"Loading model on {self.device}...")
+            self.model = AutoModelForCausalLM.from_pretrained(
+                "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                device_map="auto"
+            )
+            if progress:
+                progress(0.5, "Model loaded successfully")
+            print("Model and tokenizer loaded successfully")
+            logger.info(f"Model loaded successfully on {self.device}")
+        except Exception as e:
+            logger.error(f"Error loading model: {str(e)}")
+            print(f"\nERROR DURING MODEL LOADING: {str(e)}")
+            print("Stack trace:")
+            traceback.print_exc()
+            raise
+    def _chunk_text(self, text: str, chunk_size: int = 2048, overlap: int = 256) -> List[str]:
+        """Split text into overlapping chunks for processing."""
+        chunks = []
+        for i in range(0, len(text), chunk_size - overlap):
+            chunk = text[i:i + chunk_size]
+            chunks.append(chunk)
+        print(f"Split text into {len(chunks)} chunks with {overlap} token overlap")
+        return chunks
+    async def analyze_chunk(
+        self,
+        chunk: str,
+        progress: Optional[gr.Progress] = None,
+        current_progress: float = 0,
+        progress_step: float = 0
+    ) -> List[str]:
+        """Analyze a single chunk of text for triggers with detailed logging."""
+        print(f"\n--- Processing Chunk ---")
+        print(f"Chunk text (preview): {chunk[:50]}...")
+        # Comprehensive trigger categories
+        categories = [
+            "Violence", "Death", "Substance Use", "Gore",
+            "Vomit", "Sexual Content", "Sexual Abuse",
+            "Self-Harm", "Gun Use", "Animal Cruelty",
+            "Mental Health Issues"
+        ]
+        # Comprehensive prompt for single-pass analysis
+        prompt = f"""Comprehensive Content Sensitivity Analysis
+Carefully analyze the following text for sensitive content categories:
+{', '.join(categories)}
+Detailed Requirements:
+1. Thoroughly examine entire text chunk
+2. Identify presence of ANY of these categories
+3. Provide clear, objective assessment
+4. Minimal subjective interpretation
+TEXT CHUNK:
+{chunk}
+RESPONSE FORMAT:
+- List categories DEFINITIVELY present
+- Brief objective justification for each
+- Strict YES/NO categorization"""
+        try:
+            print("Sending prompt to model...")
+            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096)
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                print("Generating response...")
+                outputs = self.model.generate(
+                    **inputs,
+                    max_new_tokens=256,
+                    do_sample=True,
+                    temperature=0.2,
+                    top_p=0.9,
+                    pad_token_id=self.tokenizer.eos_token_id
+                )
+            response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
+            print("Full Model Response:", response_text)
+            # Parse detected triggers
+            detected_triggers = []
+            for category in categories:
+                if category.upper() in response_text.upper():
+                    detected_triggers.append(category)
+            print(f"Detected triggers in chunk: {detected_triggers}")
+            if progress:
+                current_progress += progress_step
+                progress(min(current_progress, 0.9), "Analyzing chunk...")
+            return detected_triggers
+        except Exception as e:
+            logger.error(f"Error analyzing chunk: {str(e)}")
+            print(f"Error during chunk analysis: {str(e)}")
+            traceback.print_exc()
+            return []
+    async def analyze_script(self, script: str, progress: Optional[gr.Progress] = None) -> List[str]:
+        """Analyze the entire script for triggers with progress updates."""
+        print("\n=== Starting Script Analysis ===")
+        print(f"Time: {datetime.now()}")
+        if not self.model or not self.tokenizer:
+            await self.load_model(progress)
+        chunks = self._chunk_text(script)
+        identified_triggers = set()
+        progress_step = 0.4 / len(chunks)
+        current_progress = 0.5  # Starting after model loading
+        for chunk_idx, chunk in enumerate(chunks, 1):
+            chunk_triggers = await self.analyze_chunk(
+                chunk,
+                progress,
+                current_progress,
+                progress_step
+            )
+            identified_triggers.update(chunk_triggers)
+        if progress:
+            progress(0.95, "Finalizing results...")
+        final_triggers = list(identified_triggers)
+        print("\n=== Analysis Complete ===")
+        print("Final Results:", final_triggers)
+        return final_triggers if final_triggers else ["None"]
+async def analyze_content(
+    script: str,
+    progress: Optional[gr.Progress] = None
+) -> Dict[str, Union[List[str], str]]:
+    """Main analysis function for the Gradio interface."""
+    print("\n=== Starting Content Analysis ===")
+    print(f"Time: {datetime.now()}")
+    analyzer = ContentAnalyzer()
+    try:
+        triggers = await analyzer.analyze_script(script, progress)
+        if progress:
+            progress(1.0, "Analysis complete!")
+        result = {
+            "detected_triggers": triggers,
+            "confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
+            "model": "DeepSeek-R1-Distill-Qwen-1.5B",
+            "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        }
+        print("\nFinal Result Dictionary:", result)
+        return result
+    except Exception as e:
+        logger.error(f"Analysis error: {str(e)}")
+        print(f"\nERROR OCCURRED: {str(e)}")
+        print("Stack trace:")
+        traceback.print_exc()
+        return {
+            "detected_triggers": ["Error occurred during analysis"],
+            "confidence": "Error",
+            "model": "DeepSeek-R1-Distill-Qwen-1.5B",
+            "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            "error": str(e)
+        }
+if __name__ == "__main__":
+    # Gradio interface
+    iface = gr.Interface(
+        fn=analyze_content,
+        inputs=gr.Textbox(lines=8, label="Input Text"),
+        outputs=gr.JSON(),
+        title="Content Sensitivity Analysis",
+        description="Analyze text content for sensitive topics using DeepSeek R1"
+    )
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+flask
+flask_cors
+torch
+gradio
+transformers
+accelerate
+safetensors
+huggingface-hub
+beautifulsoup4
+fastapi

script_search_api.py ADDED Viewed

	@@ -0,0 +1,279 @@

+# script_search_api.py
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+import asyncio
+from datetime import datetime, timedelta
+from typing import Dict, Optional
+from pydantic import BaseModel
+from dataclasses import dataclass
+import logging
+import requests
+from bs4 import BeautifulSoup
+from difflib import get_close_matches
+from model.analyzer import analyze_content
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@dataclass
+class ProgressState:
+    progress: float
+    status: str
+    timestamp: datetime
+    task_id: str
+    is_complete: bool = False
+    result: Optional[dict] = None
+    error: Optional[str] = None
+class ProgressResponse(BaseModel):
+    progress: float
+    status: str
+    is_complete: bool
+    result: Optional[dict] = None
+    error: Optional[str] = None
+# Global progress tracker
+progress_tracker: Dict[str, ProgressState] = {}
+BASE_URL = "https://imsdb.com"
+ALL_SCRIPTS_URL = f"{BASE_URL}/all-scripts.html"
+def create_task_id(movie_name: str) -> str:
+    """Create a unique task ID for a movie analysis request"""
+    return f"{movie_name}-{datetime.now().timestamp()}"
+async def cleanup_old_tasks():
+    """Remove tasks older than 1 hour"""
+    while True:
+        current_time = datetime.now()
+        expired_tasks = [
+            task_id for task_id, state in progress_tracker.items()
+            if current_time - state.timestamp > timedelta(hours=1)
+        ]
+        for task_id in expired_tasks:
+            del progress_tracker[task_id]
+        await asyncio.sleep(300)  # Cleanup every 5 minutes
+@app.on_event("startup")
+async def startup_event():
+    """Initialize the server and start cleanup task"""
+    progress_tracker.clear()
+    asyncio.create_task(cleanup_old_tasks())
+    logger.info("Server started, progress tracker initialized")
+def update_progress(task_id: str, progress: float, status: str, result: Optional[dict] = None, error: Optional[str] = None):
+    """Update progress state for a task"""
+    is_complete = progress >= 1.0
+    progress_tracker[task_id] = ProgressState(
+        progress=progress,
+        status=status,
+        timestamp=datetime.now(),
+        task_id=task_id,
+        is_complete=is_complete,
+        result=result,
+        error=error
+    )
+    logger.info(f"Task {task_id}: {status} (Progress: {progress * 100:.0f}%)")
+@app.get("/api/start_analysis")
+async def start_analysis(movie_name: str):
+    """Start a new analysis task"""
+    task_id = create_task_id(movie_name)
+    update_progress(task_id, 0.0, "Starting analysis...")
+    # Start the analysis task in the background
+    asyncio.create_task(run_analysis(task_id, movie_name))
+    return {"task_id": task_id}
+@app.get("/api/progress/{task_id}")
+async def get_progress(task_id: str) -> ProgressResponse:
+    """Get current progress for a task"""
+    if task_id not in progress_tracker:
+        raise HTTPException(status_code=404, detail="Task not found")
+    state = progress_tracker[task_id]
+    return ProgressResponse(
+        progress=state.progress,
+        status=state.status,
+        is_complete=state.is_complete,
+        result=state.result,
+        error=state.error
+    )
+def find_movie_link(movie_name: str, soup: BeautifulSoup) -> str | None:
+    """Find the closest matching movie link from the script database."""
+    movie_links = {link.text.strip().lower(): link['href'] for link in soup.find_all('a', href=True)}
+    close_matches = get_close_matches(movie_name.lower(), movie_links.keys(), n=1, cutoff=0.6)
+    if close_matches:
+        logger.info(f"Close match found: {close_matches[0]}")
+        return BASE_URL + movie_links[close_matches[0]]
+    logger.info("No close match found.")
+    return None
+def find_script_link(soup: BeautifulSoup, movie_name: str) -> str | None:
+    """Find the script download link for a given movie."""
+    patterns = [
+        f'Read "{movie_name}" Script',
+        f'Read "{movie_name.title()}" Script',
+        f'Read "{movie_name.upper()}" Script',
+        f'Read "{movie_name.lower()}" Script'
+    ]
+    for link in soup.find_all('a', href=True):
+        link_text = link.text.strip()
+        if any(pattern.lower() in link_text.lower() for pattern in patterns):
+            return link['href']
+        elif all(word.lower() in link_text.lower() for word in ["Read", "Script", movie_name]):
+            return link['href']
+    return None
+def fetch_script(movie_name: str) -> str | None:
+    """Fetch and extract the script content for a given movie."""
+    # Initial page load
+    update_progress(movie_name, 0.1, "Fetching the script database...")
+    try:
+        response = requests.get(ALL_SCRIPTS_URL)
+        response.raise_for_status()
+    except requests.RequestException as e:
+        logger.error(f"Failed to load the main page: {str(e)}")
+        return None
+    # Search for movie
+    update_progress(movie_name, 0.2, "Searching for the movie...")
+    soup = BeautifulSoup(response.text, 'html.parser')
+    movie_link = find_movie_link(movie_name, soup)
+    if not movie_link:
+        logger.error(f"Script for '{movie_name}' not found.")
+        return None
+    # Fetch movie page
+    update_progress(movie_name, 0.3, "Loading movie details...")
+    try:
+        response = requests.get(movie_link)
+        response.raise_for_status()
+    except requests.RequestException as e:
+        logger.error(f"Failed to load the movie page: {str(e)}")
+        return None
+    # Find script link
+    update_progress(movie_name, 0.4, "Locating script download...")
+    soup = BeautifulSoup(response.text, 'html.parser')
+    script_link = find_script_link(soup, movie_name)
+    if not script_link:
+        logger.error(f"Unable to find script link for '{movie_name}'.")
+        return None
+    # Fetch script content
+    script_page_url = BASE_URL + script_link
+    update_progress(movie_name, 0.5, "Downloading script content...")
+    try:
+        response = requests.get(script_page_url)
+        response.raise_for_status()
+    except requests.RequestException as e:
+        logger.error(f"Failed to load the script: {str(e)}")
+        return None
+    # Extract script text
+    update_progress(movie_name, 0.6, "Extracting script text...")
+    soup = BeautifulSoup(response.text, 'html.parser')
+    script_content = soup.find('pre')
+    if script_content:
+        update_progress(movie_name, 0.7, "Script extracted successfully")
+        return script_content.get_text()
+    else:
+        logger.error("Failed to extract script content.")
+        return None
+async def run_analysis(task_id: str, movie_name: str):
+    """Run the actual analysis task"""
+    try:
+        # Fetch script
+        update_progress(task_id, 0.2, "Fetching script...")
+        script_text = fetch_script(movie_name)
+        if not script_text:
+            raise Exception("Script not found")
+        # Analyze content
+        update_progress(task_id, 0.6, "Analyzing content...")
+        result = await analyze_content(script_text)
+        # Complete
+        update_progress(task_id, 1.0, "Analysis complete", result=result)
+    except Exception as e:
+        logger.error(f"Error in analysis: {str(e)}", exc_info=True)
+        update_progress(task_id, 1.0, "Error occurred", error=str(e))
+@app.get("/api/fetch_and_analyze")
+async def fetch_and_analyze(movie_name: str):
+    """Fetch and analyze a movie script, with progress tracking."""
+    try:
+        # Initialize progress
+        task_id = create_task_id(movie_name)
+        update_progress(task_id, 0.0, "Starting script search...")
+        # Fetch script
+        script_text = fetch_script(movie_name)
+        if not script_text:
+            raise HTTPException(status_code=404, detail="Script not found or error occurred")
+        # Analyze content
+        update_progress(task_id, 0.8, "Analyzing script content...")
+        result = await analyze_content(script_text)
+        # Finalize
+        update_progress(task_id, 1.0, "Analysis complete!")
+        return result
+    except Exception as e:
+        logger.error(f"Error in fetch_and_analyze: {str(e)}", exc_info=True)
+        # Clean up progress tracker in case of error
+        if movie_name in progress_tracker:
+            del progress_tracker[movie_name]
+        raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
+@app.get("/api/progress")
+def get_progress(movie_name: str):
+    """Get the current progress and status for a movie analysis."""
+    if movie_name not in progress_tracker:
+        return {
+            "progress": 0,
+            "status": "Waiting to start..."
+        }
+    progress_info = progress_tracker[movie_name]
+    # Clean up old entries (optional)
+    current_time = datetime.now()
+    if (current_time - progress_info.timestamp).total_seconds() > 3600:  # 1 hour timeout
+        del progress_tracker[movie_name]
+        return {
+            "progress": 0,
+            "status": "Session expired. Please try again."
+        }
+    return {
+        "progress": progress_info.progress,
+        "status": progress_info.status
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)