Spaces:
Running
Running
Commit
·
475e55f
1
Parent(s):
03b7a6a
Trying to fix model.py
Browse files- model/model.py +47 -96
model/model.py
CHANGED
@@ -74,107 +74,58 @@ class ContentAnalyzer:
|
|
74 |
if not self.load_model():
|
75 |
return {"error": "Model loading failed"}
|
76 |
|
77 |
-
#
|
|
|
|
|
|
|
78 |
trigger_categories = {
|
79 |
-
"Violence": {
|
80 |
-
|
81 |
-
|
82 |
-
},
|
83 |
-
"
|
84 |
-
|
85 |
-
|
86 |
-
},
|
87 |
-
"Substance_Use": {
|
88 |
-
"mapped_name": "Substance Use",
|
89 |
-
"description": "References to consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances."
|
90 |
-
},
|
91 |
-
"Gore": {
|
92 |
-
"mapped_name": "Gore",
|
93 |
-
"description": "Graphic depictions of severe physical injuries, mutilation, or extreme bodily harm."
|
94 |
-
},
|
95 |
-
"Sexual_Content": {
|
96 |
-
"mapped_name": "Sexual Content",
|
97 |
-
"description": "Depictions or mentions of sexual activity, intimacy, or sexual behavior."
|
98 |
-
},
|
99 |
-
"Self_Harm": {
|
100 |
-
"mapped_name": "Self-Harm",
|
101 |
-
"description": "Behaviors where an individual intentionally causes harm to themselves."
|
102 |
-
},
|
103 |
-
"Mental_Health": {
|
104 |
-
"mapped_name": "Mental Health Issues",
|
105 |
-
"description": "References to mental health struggles, disorders, or psychological distress."
|
106 |
-
}
|
107 |
}
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
chunks = []
|
114 |
-
|
115 |
-
# Create chunks with overlap
|
116 |
-
for i in range(0, len(text), chunk_size - overlap):
|
117 |
-
chunk = text[i:i + chunk_size]
|
118 |
-
chunks.append(chunk)
|
119 |
-
|
120 |
-
trigger_scores = {}
|
121 |
-
trigger_occurrences = {}
|
122 |
-
|
123 |
-
# Initialize tracking dictionaries
|
124 |
for category, info in trigger_categories.items():
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
print(f"\nProcessing chunk {chunk_idx + 1}/{len(chunks)}")
|
131 |
-
chunk_triggers = {}
|
132 |
-
|
133 |
-
for category, info in trigger_categories.items():
|
134 |
-
score, response = self.analyze_chunk(chunk, info)
|
135 |
-
|
136 |
-
if score > 0:
|
137 |
-
mapped_name = info["mapped_name"]
|
138 |
-
trigger_scores[mapped_name] += score
|
139 |
-
trigger_occurrences[mapped_name].append({
|
140 |
-
'chunk_idx': chunk_idx,
|
141 |
-
'response': response,
|
142 |
-
'score': score
|
143 |
-
})
|
144 |
-
print(f"Found {mapped_name} in chunk {chunk_idx + 1} (Response: {response})")
|
145 |
-
|
146 |
-
# Cleanup after processing each chunk
|
147 |
-
if self.device == "cuda":
|
148 |
-
self.cleanup()
|
149 |
-
|
150 |
-
# Collect all triggers that meet the threshold
|
151 |
-
detected_triggers = []
|
152 |
-
for name, score in trigger_scores.items():
|
153 |
-
if score >= 0.5: # Threshold for considering a trigger as detected
|
154 |
-
occurrences = len(trigger_occurrences[name])
|
155 |
-
detected_triggers.append(name)
|
156 |
-
print(f"\nTrigger '{name}' detected in {occurrences} chunks with total score {score}")
|
157 |
-
|
158 |
-
result = {
|
159 |
-
"detected_triggers": detected_triggers if detected_triggers else ["None"],
|
160 |
-
"confidence": "High - Content detected" if detected_triggers else "High - No concerning content detected",
|
161 |
-
"model": self.model_name,
|
162 |
-
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
163 |
-
"trigger_details": {
|
164 |
-
name: {
|
165 |
-
"total_score": trigger_scores[name],
|
166 |
-
"occurrences": trigger_occurrences[name]
|
167 |
-
} for name in detected_triggers if name != "None"
|
168 |
-
}
|
169 |
-
}
|
170 |
-
|
171 |
-
return result
|
172 |
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
177 |
|
178 |
def get_detailed_analysis(script):
|
179 |
analyzer = ContentAnalyzer()
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
if not self.load_model():
|
75 |
return {"error": "Model loading failed"}
|
76 |
|
77 |
+
chunk_size = 256 # Set the chunk size for text processing
|
78 |
+
overlap = 15 # Overlap between chunks for context preservation
|
79 |
+
script_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - overlap)]
|
80 |
+
|
81 |
trigger_categories = {
|
82 |
+
"Violence": {"mapped_name": "Violence", "description": "Any act involving physical force or aggression intended to cause harm, injury, or death."},
|
83 |
+
"Death": {"mapped_name": "Death References", "description": "Any mention, implication, or depiction of the loss of life, including direct deaths or abstract references to mortality."},
|
84 |
+
"Substance_Use": {"mapped_name": "Substance Use", "description": "References to consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances."},
|
85 |
+
"Gore": {"mapped_name": "Gore", "description": "Graphic depictions of severe physical injuries, mutilation, or extreme bodily harm."},
|
86 |
+
"Sexual_Content": {"mapped_name": "Sexual Content", "description": "Depictions or mentions of sexual activity, intimacy, or sexual behavior."},
|
87 |
+
"Self_Harm": {"mapped_name": "Self-Harm", "description": "Behaviors where an individual intentionally causes harm to themselves."},
|
88 |
+
"Mental_Health": {"mapped_name": "Mental Health Issues", "description": "References to mental health struggles, disorders, or psychological distress."}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
}
|
90 |
|
91 |
+
identified_triggers = {}
|
92 |
+
|
93 |
+
for chunk_idx, chunk in enumerate(script_chunks, 1):
|
94 |
+
print(f"\n--- Processing Chunk {chunk_idx}/{len(script_chunks)} ---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
for category, info in trigger_categories.items():
|
96 |
+
score, response = self.analyze_chunk(chunk, info)
|
97 |
+
if response == "YES":
|
98 |
+
identified_triggers[category] = identified_triggers.get(category, 0) + 1
|
99 |
+
elif response == "MAYBE":
|
100 |
+
identified_triggers[category] = identified_triggers.get(category, 0) + 0.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
+
final_triggers = [category for category, count in identified_triggers.items() if count > 0.5]
|
103 |
+
self.cleanup()
|
104 |
+
|
105 |
+
if not final_triggers:
|
106 |
+
final_triggers = ["None"]
|
107 |
+
|
108 |
+
return final_triggers
|
109 |
|
110 |
def get_detailed_analysis(script):
|
111 |
analyzer = ContentAnalyzer()
|
112 |
+
print("\n=== Starting Detailed Analysis ===")
|
113 |
+
triggers = analyzer.analyze_text(script)
|
114 |
+
|
115 |
+
if isinstance(triggers, list) and triggers != ["None"]:
|
116 |
+
result = {
|
117 |
+
"detected_triggers": triggers,
|
118 |
+
"confidence": "High - Content detected",
|
119 |
+
"model": "Llama-3.2-1B",
|
120 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
121 |
+
}
|
122 |
+
else:
|
123 |
+
result = {
|
124 |
+
"detected_triggers": ["None"],
|
125 |
+
"confidence": "High - No concerning content detected",
|
126 |
+
"model": "Llama-3.2-1B",
|
127 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
128 |
+
}
|
129 |
+
|
130 |
+
print("\nFinal Result Dictionary:", result)
|
131 |
+
return result
|