V1.0 model performs better than V1.1

#2
by henryruhs - opened

I compared both models and V1.0 somehow performs better for me.

Testing both with this values.

iou_threshold = 0.6
conf_threshold = 0.2

Turns out V1.1 needs different values but NMS then "terminates" most of the bounding boxes.
Could you provider another space with the suggested values, so we can compare V1.0 against V1.1?

Thanks

EraX JS Company org

Hi there, your values ​​are quite high, so try lowering it and try again on more samples.

kirale2003 changed discussion status to closed
kirale2003 changed discussion status to open

In our final implementation we don't even use bounding boxes and therefore NMS anymore.

V1.0

22841 frames, 16 seconds, 1411.40frame/s, success rate=75.2%

V1.1

22841 frames, 15seconds, 1485.71frame/s, success rate=46.8%

def detect_nsfw(vision_frame : VisionFrame) -> List[Score]:
    nsfw_scores = []
    model_size = get_model_options().get('size')
    temp_vision_frame = resize_frame_resolution(vision_frame, model_size)
    detect_vision_frame = prepare_detect_frame(temp_vision_frame)
    detection = forward(detect_vision_frame)
    detection = numpy.squeeze(detection).T
    nsfw_scores_raw = numpy.amax(detection[:, 4:], axis = 1)
    keep_indices = numpy.where(nsfw_scores_raw > 0.2)[0]

    if numpy.any(keep_indices):
        nsfw_scores_raw = nsfw_scores_raw[keep_indices]
        nsfw_scores = nsfw_scores_raw.ravel().tolist()

    return nsfw_scores


def forward(vision_frame : VisionFrame) -> Detection:
    content_analyser = get_inference_pool().get('content_analyser')

    with conditional_thread_semaphore():
        detection = content_analyser.run(None,
        {
            'input': vision_frame
        })

    return detection


def prepare_detect_frame(temp_vision_frame : VisionFrame) -> VisionFrame:
    model_size = get_model_options().get('size')
    detect_vision_frame = numpy.zeros((model_size[0], model_size[1], 3))
    detect_vision_frame[:temp_vision_frame.shape[0], :temp_vision_frame.shape[1], :] = temp_vision_frame
    detect_vision_frame = detect_vision_frame / 255.0
    detect_vision_frame = numpy.expand_dims(detect_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32)
    return detect_vision_frame


def resize_frame_resolution(vision_frame : VisionFrame, max_resolution : Resolution) -> VisionFrame:
    height, width = vision_frame.shape[:2]
    max_width, max_height = max_resolution

    if height > max_height or width > max_width:
        scale = min(max_height / height, max_width / width)
        new_width = int(width * scale)
        new_height = int(height * scale)
        return cv2.resize(vision_frame, (new_width, new_height))
    return vision_frame

Sign up or log in to comment