V1.0 model performs better than V1.1
#2
by
henryruhs
- opened
I compared both models and V1.0 somehow performs better for me.
Testing both with this values.
iou_threshold = 0.6
conf_threshold = 0.2
Turns out V1.1 needs different values but NMS then "terminates" most of the bounding boxes.
Could you provider another space with the suggested values, so we can compare V1.0 against V1.1?
Thanks
Hi there, your values are quite high, so try lowering it and try again on more samples.
kirale2003
changed discussion status to
closed
kirale2003
changed discussion status to
open
In our final implementation we don't even use bounding boxes and therefore NMS anymore.
V1.0
22841 frames, 16 seconds, 1411.40frame/s, success rate=75.2%
V1.1
22841 frames, 15seconds, 1485.71frame/s, success rate=46.8%
def detect_nsfw(vision_frame : VisionFrame) -> List[Score]:
nsfw_scores = []
model_size = get_model_options().get('size')
temp_vision_frame = resize_frame_resolution(vision_frame, model_size)
detect_vision_frame = prepare_detect_frame(temp_vision_frame)
detection = forward(detect_vision_frame)
detection = numpy.squeeze(detection).T
nsfw_scores_raw = numpy.amax(detection[:, 4:], axis = 1)
keep_indices = numpy.where(nsfw_scores_raw > 0.2)[0]
if numpy.any(keep_indices):
nsfw_scores_raw = nsfw_scores_raw[keep_indices]
nsfw_scores = nsfw_scores_raw.ravel().tolist()
return nsfw_scores
def forward(vision_frame : VisionFrame) -> Detection:
content_analyser = get_inference_pool().get('content_analyser')
with conditional_thread_semaphore():
detection = content_analyser.run(None,
{
'input': vision_frame
})
return detection
def prepare_detect_frame(temp_vision_frame : VisionFrame) -> VisionFrame:
model_size = get_model_options().get('size')
detect_vision_frame = numpy.zeros((model_size[0], model_size[1], 3))
detect_vision_frame[:temp_vision_frame.shape[0], :temp_vision_frame.shape[1], :] = temp_vision_frame
detect_vision_frame = detect_vision_frame / 255.0
detect_vision_frame = numpy.expand_dims(detect_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32)
return detect_vision_frame
def resize_frame_resolution(vision_frame : VisionFrame, max_resolution : Resolution) -> VisionFrame:
height, width = vision_frame.shape[:2]
max_width, max_height = max_resolution
if height > max_height or width > max_width:
scale = min(max_height / height, max_width / width)
new_width = int(width * scale)
new_height = int(height * scale)
return cv2.resize(vision_frame, (new_width, new_height))
return vision_frame