from transformers import AutoTokenizer, AutoModelForCausalLM import torch from datetime import datetime def analyze_script(script): # Starting the script analysis print("\n=== Starting Analysis ===") print(f"Time: {datetime.now()}") # Outputting the current timestamp print("Loading model and tokenizer...") try: # Load the tokenizer and model, selecting the appropriate device (CPU or CUDA) tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_fast=True) device = "cuda" if torch.cuda.is_available() else "cpu" # Use CUDA if available, else use CPU print(f"Using device: {device}") model = AutoModelForCausalLM.from_pretrained( "meta-llama/Llama-3.2-1B", torch_dtype=torch.float16 if device == "cuda" else torch.float32, # Use 16-bit precision for CUDA, 32-bit for CPU device_map="auto" # Automatically map model to available device ) print("Model loaded successfully") # Define trigger categories with their descriptions trigger_categories = { "Violence": { "mapped_name": "Violence", "description": ( "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. " "Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), " "or large-scale events like wars, riots, or violent protests." ) }, "Death": { "mapped_name": "Death References", "description": ( "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, " "or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, " "grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death." ) }, "Substance Use": { "mapped_name": "Substance Use", "description": ( "Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. " "Includes scenes of drinking, smoking, or drug use, whether recreational or addictive. May also cover references to withdrawal symptoms, " "rehabilitation, or substance-related paraphernalia (e.g., needles, bottles, pipes)." ) }, "Gore": { "mapped_name": "Gore", "description": ( "Extremely detailed and graphic depictions of highly severe physical injuries, mutilation, or extreme bodily harm, often accompanied by descriptions of heavy blood, exposed organs, " "or dismemberment. This includes war scenes with severe casualties, horror scenarios involving grotesque creatures, or medical procedures depicted with excessive detail." "only answer yes if you're completely certain." ) }, "Vomit": { "mapped_name": "Vomit", "description": ( "Any explicit reference to vomiting, whether directly described, implied, or depicted. This includes detailed sounds, visual descriptions, mentions of nausea explicitly leading to vomiting, or any aftermath involving vomit." "Respond 'yes' only if the scene unambiguously and clearly involves vomiting, with no room for doubt." ) }, "Sexual Content": { "mapped_name": "Sexual Content", "description": ( "Any depiction or mention of sexual activity, intimacy, or sexual behavior, ranging from implied scenes to explicit descriptions. " "This includes romantic encounters, physical descriptions of characters in a sexual context, sexual dialogue, or references to sexual themes (e.g., harassment, innuendos)." ) }, "Sexual Abuse": { "mapped_name": "Sexual Abuse", "description": ( "Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force. " "This includes incidents of sexual assault, molestation, exploitation, harassment, and any acts where an individual is subjected to sexual acts against their will or without their consent. " "It also covers discussions or depictions of the aftermath of such abuse, such as trauma, emotional distress, legal proceedings, or therapy. " "References to inappropriate sexual advances, groping, or any other form of sexual misconduct are also included, as well as the psychological and emotional impact on survivors. " "Scenes where individuals are placed in sexually compromising situations, even if not directly acted upon, may also fall under this category." "only answer yes if you're completely certain of it's presence." ) }, "Self-Harm": { "mapped_name": "Self-Harm", "description": ( "Any mention or depiction of behaviors where an individual intentionally causes harm to themselves. This includes cutting, burning, or other forms of physical injury, " "as well as suicidal ideation, suicide attempts, or discussions of self-destructive thoughts and actions. References to scars, bruises, or other lasting signs of self-harm are also included." "only answer yes if you're completely certain." ) }, "Gun Use": { "mapped_name": "Gun Use", "description": ( "Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner. This includes scenes of gun violence, references to shootings, " "gun-related accidents, or the presence of firearms in a tense or dangerous context (e.g., holstered weapons during an argument)." ) }, "Animal Cruelty": { "mapped_name": "Animal Cruelty", "description": ( "Any act of harm or abuse toward animals, whether intentional or accidental. This includes physical abuse (e.g., hitting, injuring, or killing animals), " "mental or emotional mistreatment (e.g., starvation, isolation), and scenes where animals are subjected to pain or suffering for human entertainment or experimentation." "Respond 'yes' only if the scene unambiguously and clearly involves Animal Cruelty, with no room for doubt" ) }, "Mental Health Issues": { "mapped_name": "Mental Health Issues", "description": ( "Any reference to mental health struggles, disorders, or psychological distress. This includes mentions of depression, anxiety, PTSD, bipolar disorder, schizophrenia, " "or other conditions. Scenes depicting destructive coping mechanisms are also included." "like a character expressing feelings of worthlessness, hopelessness, or detachment from reality." ) } } print("\nProcessing text...") # Output indicating the text is being processed chunk_size = 256 # Set the chunk size for text processing overlap = 15 # Overlap between chunks for context preservation script_chunks = [] # List to store script chunks # Split the script into smaller chunks for i in range(0, len(script), chunk_size - overlap): chunk = script[i:i + chunk_size] script_chunks.append(chunk) print(f"Split into {len(script_chunks)} chunks with {overlap} token overlap") # Inform about the chunking identified_triggers = {} # Dictionary to store the identified triggers # Process each chunk of the script for chunk_idx, chunk in enumerate(script_chunks, 1): print(f"\n--- Processing Chunk {chunk_idx}/{len(script_chunks)} ---") print(f"Chunk text (preview): {chunk[:50]}...") # Preview of the current chunk # Check each category for triggers for category, info in trigger_categories.items(): mapped_name = info["mapped_name"] description = info["description"] print(f"\nAnalyzing for {mapped_name}...") prompt = f""" Check this text for any indication of {mapped_name} ({description}). Be sensitive to subtle references or implications, make sure the text is not metaphorical. Respond concisely with: YES, NO, or MAYBE. Text: {chunk} Answer: """ print(f"Sending prompt to model...") # Indicate that prompt is being sent to the model inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) # Tokenize the prompt inputs = {k: v.to(device) for k, v in inputs.items()} # Send inputs to the chosen device with torch.no_grad(): # Disable gradient calculation for inference print("Generating response...") # Indicate that the model is generating a response outputs = model.generate( **inputs, max_new_tokens=10, # Limit response length do_sample=True, # Enable sampling for more diverse output temperature=0.5, # Control randomness of the output top_p=0.9, # Use nucleus sampling pad_token_id=tokenizer.eos_token_id # Pad token ID ) response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper() # Decode and format the response first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO" # Get the first word of the response print(f"Model response for {mapped_name}: {first_word}") # Update identified triggers based on model response if first_word == "YES": print(f"Detected {mapped_name} in this chunk!") # Trigger detected identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 1 elif first_word == "MAYBE": print(f"Possible {mapped_name} detected, marking for further review.") # Possible trigger detected identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 0.5 else: print(f"No {mapped_name} detected in this chunk.") # No trigger detected print("\n=== Analysis Complete ===") # Indicate that analysis is complete print("Final Results:") final_triggers = [] # List to store final triggers # Filter and output the final trigger results for mapped_name, count in identified_triggers.items(): if count > 0.5: final_triggers.append(mapped_name) print(f"- {mapped_name}: found in {count} chunks") if not final_triggers: print("No triggers detected") # No triggers detected final_triggers = ["None"] print("\nReturning results...") return final_triggers # Return the list of detected triggers except Exception as e: # Handle errors and provide stack trace print(f"\nERROR OCCURRED: {str(e)}") print("Stack trace:") import traceback traceback.print_exc() return {"error": str(e)} def get_detailed_analysis(script): print("\n=== Starting Detailed Analysis ===") triggers = analyze_script(script) # Call the analyze_script function if isinstance(triggers, list) and triggers != ["None"]: result = { "detected_triggers": triggers, "confidence": "High - Content detected", "model": "Llama-3.2-1B", "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") } else: result = { "detected_triggers": ["None"], "confidence": "High - No concerning content detected", "model": "Llama-3.2-1B", "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") } print("\nFinal Result Dictionary:", result) # Output the final result dictionary return result