Kuberwastaken commited on
Commit
475e55f
·
1 Parent(s): 03b7a6a

Trying to fix model.py

Browse files
Files changed (1) hide show
  1. model/model.py +47 -96
model/model.py CHANGED
@@ -74,107 +74,58 @@ class ContentAnalyzer:
74
  if not self.load_model():
75
  return {"error": "Model loading failed"}
76
 
77
- # Original trigger categories
 
 
 
78
  trigger_categories = {
79
- "Violence": {
80
- "mapped_name": "Violence",
81
- "description": "Any act involving physical force or aggression intended to cause harm, injury, or death."
82
- },
83
- "Death": {
84
- "mapped_name": "Death References",
85
- "description": "Any mention, implication, or depiction of the loss of life, including direct deaths or abstract references to mortality."
86
- },
87
- "Substance_Use": {
88
- "mapped_name": "Substance Use",
89
- "description": "References to consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances."
90
- },
91
- "Gore": {
92
- "mapped_name": "Gore",
93
- "description": "Graphic depictions of severe physical injuries, mutilation, or extreme bodily harm."
94
- },
95
- "Sexual_Content": {
96
- "mapped_name": "Sexual Content",
97
- "description": "Depictions or mentions of sexual activity, intimacy, or sexual behavior."
98
- },
99
- "Self_Harm": {
100
- "mapped_name": "Self-Harm",
101
- "description": "Behaviors where an individual intentionally causes harm to themselves."
102
- },
103
- "Mental_Health": {
104
- "mapped_name": "Mental Health Issues",
105
- "description": "References to mental health struggles, disorders, or psychological distress."
106
- }
107
  }
108
 
109
- try:
110
- # Optimize chunk processing
111
- chunk_size = 200 # Reduced chunk size for better memory management
112
- overlap = 10
113
- chunks = []
114
-
115
- # Create chunks with overlap
116
- for i in range(0, len(text), chunk_size - overlap):
117
- chunk = text[i:i + chunk_size]
118
- chunks.append(chunk)
119
-
120
- trigger_scores = {}
121
- trigger_occurrences = {}
122
-
123
- # Initialize tracking dictionaries
124
  for category, info in trigger_categories.items():
125
- trigger_scores[info["mapped_name"]] = 0
126
- trigger_occurrences[info["mapped_name"]] = []
127
-
128
- # Process all chunks for all categories
129
- for chunk_idx, chunk in enumerate(chunks):
130
- print(f"\nProcessing chunk {chunk_idx + 1}/{len(chunks)}")
131
- chunk_triggers = {}
132
-
133
- for category, info in trigger_categories.items():
134
- score, response = self.analyze_chunk(chunk, info)
135
-
136
- if score > 0:
137
- mapped_name = info["mapped_name"]
138
- trigger_scores[mapped_name] += score
139
- trigger_occurrences[mapped_name].append({
140
- 'chunk_idx': chunk_idx,
141
- 'response': response,
142
- 'score': score
143
- })
144
- print(f"Found {mapped_name} in chunk {chunk_idx + 1} (Response: {response})")
145
-
146
- # Cleanup after processing each chunk
147
- if self.device == "cuda":
148
- self.cleanup()
149
-
150
- # Collect all triggers that meet the threshold
151
- detected_triggers = []
152
- for name, score in trigger_scores.items():
153
- if score >= 0.5: # Threshold for considering a trigger as detected
154
- occurrences = len(trigger_occurrences[name])
155
- detected_triggers.append(name)
156
- print(f"\nTrigger '{name}' detected in {occurrences} chunks with total score {score}")
157
-
158
- result = {
159
- "detected_triggers": detected_triggers if detected_triggers else ["None"],
160
- "confidence": "High - Content detected" if detected_triggers else "High - No concerning content detected",
161
- "model": self.model_name,
162
- "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
163
- "trigger_details": {
164
- name: {
165
- "total_score": trigger_scores[name],
166
- "occurrences": trigger_occurrences[name]
167
- } for name in detected_triggers if name != "None"
168
- }
169
- }
170
-
171
- return result
172
 
173
- except Exception as e:
174
- return {"error": str(e)}
175
- finally:
176
- self.cleanup()
 
 
 
177
 
178
  def get_detailed_analysis(script):
179
  analyzer = ContentAnalyzer()
180
- return analyzer.analyze_text(script)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  if not self.load_model():
75
  return {"error": "Model loading failed"}
76
 
77
+ chunk_size = 256 # Set the chunk size for text processing
78
+ overlap = 15 # Overlap between chunks for context preservation
79
+ script_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - overlap)]
80
+
81
  trigger_categories = {
82
+ "Violence": {"mapped_name": "Violence", "description": "Any act involving physical force or aggression intended to cause harm, injury, or death."},
83
+ "Death": {"mapped_name": "Death References", "description": "Any mention, implication, or depiction of the loss of life, including direct deaths or abstract references to mortality."},
84
+ "Substance_Use": {"mapped_name": "Substance Use", "description": "References to consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances."},
85
+ "Gore": {"mapped_name": "Gore", "description": "Graphic depictions of severe physical injuries, mutilation, or extreme bodily harm."},
86
+ "Sexual_Content": {"mapped_name": "Sexual Content", "description": "Depictions or mentions of sexual activity, intimacy, or sexual behavior."},
87
+ "Self_Harm": {"mapped_name": "Self-Harm", "description": "Behaviors where an individual intentionally causes harm to themselves."},
88
+ "Mental_Health": {"mapped_name": "Mental Health Issues", "description": "References to mental health struggles, disorders, or psychological distress."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
 
91
+ identified_triggers = {}
92
+
93
+ for chunk_idx, chunk in enumerate(script_chunks, 1):
94
+ print(f"\n--- Processing Chunk {chunk_idx}/{len(script_chunks)} ---")
 
 
 
 
 
 
 
 
 
 
 
95
  for category, info in trigger_categories.items():
96
+ score, response = self.analyze_chunk(chunk, info)
97
+ if response == "YES":
98
+ identified_triggers[category] = identified_triggers.get(category, 0) + 1
99
+ elif response == "MAYBE":
100
+ identified_triggers[category] = identified_triggers.get(category, 0) + 0.5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ final_triggers = [category for category, count in identified_triggers.items() if count > 0.5]
103
+ self.cleanup()
104
+
105
+ if not final_triggers:
106
+ final_triggers = ["None"]
107
+
108
+ return final_triggers
109
 
110
  def get_detailed_analysis(script):
111
  analyzer = ContentAnalyzer()
112
+ print("\n=== Starting Detailed Analysis ===")
113
+ triggers = analyzer.analyze_text(script)
114
+
115
+ if isinstance(triggers, list) and triggers != ["None"]:
116
+ result = {
117
+ "detected_triggers": triggers,
118
+ "confidence": "High - Content detected",
119
+ "model": "Llama-3.2-1B",
120
+ "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
121
+ }
122
+ else:
123
+ result = {
124
+ "detected_triggers": ["None"],
125
+ "confidence": "High - No concerning content detected",
126
+ "model": "Llama-3.2-1B",
127
+ "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
128
+ }
129
+
130
+ print("\nFinal Result Dictionary:", result)
131
+ return result