|
|
|
import argparse |
|
import json |
|
from os.path import basename, splitext |
|
import os |
|
import mmengine |
|
import numpy as np |
|
import pandas as pd |
|
import torch |
|
from numpy.linalg import norm, pinv |
|
from scipy.special import logsumexp, softmax |
|
from sklearn import metrics |
|
from sklearn.covariance import EmpiricalCovariance |
|
from sklearn.metrics import pairwise_distances_argmin_min |
|
from tqdm import tqdm |
|
import pickle |
|
from os.path import dirname |
|
import torchvision as tv |
|
from PIL import Image |
|
from mmpretrain.apis import init_model |
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser(description='Detect an image') |
|
parser.add_argument( |
|
'--cfg', help='Path to config', |
|
default='/dataset/jingyaoli/AD/MOOD_/MOODv2/configs/beit-base-p16_224px.py') |
|
parser.add_argument('--ood_feature', |
|
default=None, help='Path to ood feature file') |
|
parser.add_argument( |
|
'--checkpoint', help='Path to checkpoint', |
|
default='/dataset/jingyaoli/AD/MOODv2/pretrain/beit-base_3rdparty_in1k_20221114-c0a4df23.pth',) |
|
parser.add_argument('--img_path', help='Path to image', |
|
default='/dataset/jingyaoli/AD/MOOD_/MOODv2/imgs/DTD_cracked_0004.jpg') |
|
parser.add_argument('--fc', |
|
default='/dataset/jingyaoli/AD/MOODv2/outputs/beit-224px/fc.pkl', help='Path to fc path') |
|
parser.add_argument('--id_data', default='imagenet', help='id data name') |
|
parser.add_argument('--id_train_feature', |
|
default='/dataset/jingyaoli/AD/MOODv2/outputs/beit-224px/imagenet_train.pkl', help='Path to data') |
|
parser.add_argument('--id_val_feature', |
|
default='/dataset/jingyaoli/AD/MOODv2/outputs/beit-224px/imagenet_test.pkl', help='Path to output file') |
|
parser.add_argument('--ood_features', |
|
default=None, nargs='+', help='Path to ood features') |
|
parser.add_argument( |
|
'--methods', nargs='+', |
|
default=['MSP', 'MaxLogit', 'Energy', 'Energy+React', 'ViM', 'Residual', 'GradNorm', 'Mahalanobis', ], |
|
help='methods') |
|
parser.add_argument( |
|
'--train_label', |
|
default='datalists/imagenet2012_train_random_200k.txt', |
|
help='Path to train labels') |
|
parser.add_argument( |
|
'--clip_quantile', default=0.99, help='Clip quantile to react') |
|
parser.add_argument( |
|
'--fpr', default=95, help='False Positive Rate') |
|
return parser.parse_args() |
|
|
|
def evaluate(method, score_id, score_ood, target_fpr): |
|
threhold = np.percentile(score_id, 100 - target_fpr) |
|
if score_ood >= threhold: |
|
print('\033[94m', method, '\033[0m', 'evaluation:', '\033[92m', 'in-distribution', '\033[0m') |
|
else: |
|
print('\033[94m', method, '\033[0m', 'evaluation:', '\033[91m', 'out-of-distribution', '\033[0m') |
|
|
|
def kl(p, q): |
|
return np.sum(np.where(p != 0, p * np.log(p / q), 0)) |
|
|
|
def gradnorm(x, w, b, num_cls): |
|
fc = torch.nn.Linear(*w.shape[::-1]) |
|
fc.weight.data[...] = torch.from_numpy(w) |
|
fc.bias.data[...] = torch.from_numpy(b) |
|
fc.cuda() |
|
|
|
x = torch.from_numpy(x).float().cuda() |
|
logsoftmax = torch.nn.LogSoftmax(dim=-1).cuda() |
|
|
|
confs = [] |
|
|
|
for i in tqdm(x, desc='Computing Gradnorm ID/OOD score'): |
|
targets = torch.ones((1, num_cls)).cuda() |
|
fc.zero_grad() |
|
loss = torch.mean( |
|
torch.sum(-targets * logsoftmax(fc(i[None])), dim=-1)) |
|
loss.backward() |
|
layer_grad_norm = torch.sum(torch.abs( |
|
fc.weight.grad.data)).cpu().numpy() |
|
confs.append(layer_grad_norm) |
|
|
|
return np.array(confs) |
|
|
|
def extract_image_feature(args): |
|
torch.backends.cudnn.benchmark = True |
|
|
|
print('=> Loading model') |
|
cfg = mmengine.Config.fromfile(args.cfg) |
|
model = init_model(cfg, args.checkpoint, 0).cuda().eval() |
|
|
|
print('=> Loading image') |
|
if hasattr(cfg.model.backbone, 'img_size'): |
|
img_size = cfg.model.backbone.img_size |
|
else: |
|
img_size = 224 |
|
|
|
transform = tv.transforms.Compose([ |
|
tv.transforms.Resize((img_size, img_size)), |
|
tv.transforms.ToTensor(), |
|
tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), |
|
]) |
|
|
|
x = transform(Image.open(args.img_path).convert('RGB')).unsqueeze(0) |
|
|
|
print('=> Extracting feature') |
|
with torch.no_grad(): |
|
x = x.cuda() |
|
if cfg.model.backbone.type == 'BEiTPretrainViT': |
|
|
|
feat_batch = model.backbone( |
|
x, mask=None)[0].mean(1) |
|
elif cfg.model.backbone.type == 'SwinTransformer': |
|
|
|
feat_batch = model.backbone(x)[0] |
|
B, C, H, W = feat_batch.shape |
|
feat_batch = feat_batch.reshape(B, C, -1).mean(-1) |
|
else: |
|
|
|
feat_batch = model.backbone(x)[0] |
|
assert len(feat_batch.shape) == 2 |
|
feature = feat_batch.cpu().numpy() |
|
|
|
print(f'Extracted Feature: {feature.shape}') |
|
return feature |
|
|
|
def main(): |
|
args = parse_args() |
|
if args.ood_feature and os.path.exists(args.ood_feature): |
|
feature_ood = mmengine.load(args.ood_feature) |
|
else: |
|
feature_ood = extract_image_feature(args) |
|
|
|
if os.path.exists(args.fc): |
|
w, b = mmengine.load(args.fc) |
|
print(f'{w.shape=}, {b.shape=}') |
|
num_cls = len(b) |
|
|
|
train_labels = np.array([ |
|
int(line.rsplit(' ', 1)[-1]) |
|
for line in mmengine.list_from_file(args.train_label) |
|
], dtype=int) |
|
|
|
print(f'image path: {args.img_path}') |
|
|
|
print('=> Loading features') |
|
feature_id_train = mmengine.load(args.id_train_feature).squeeze() |
|
feature_id_val = mmengine.load(args.id_val_feature).squeeze() |
|
|
|
print(f'{feature_id_train.shape=}, {feature_id_val.shape=}') |
|
|
|
if os.path.exists(args.fc): |
|
print('=> Computing logits...') |
|
logit_id_train = feature_id_train @ w.T + b |
|
logit_id_val = feature_id_val @ w.T + b |
|
logit_ood = feature_ood @ w.T + b |
|
|
|
print('=> Computing softmax...') |
|
softmax_id_train = softmax(logit_id_train, axis=-1) |
|
softmax_id_val = softmax(logit_id_val, axis=-1) |
|
softmax_ood = softmax(logit_ood, axis=-1) |
|
|
|
u = -np.matmul(pinv(w), b) |
|
|
|
|
|
method = 'MSP' |
|
if method in args.methods: |
|
score_id = softmax_id_val.max(axis=-1) |
|
score_ood = softmax_ood.max(axis=-1) |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
|
|
method = 'MaxLogit' |
|
if method in args.methods: |
|
score_id = logit_id_val.max(axis=-1) |
|
score_ood = logit_ood.max(axis=-1) |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
|
|
method = 'Energy' |
|
if method in args.methods: |
|
score_id = logsumexp(logit_id_val, axis=-1) |
|
score_ood = logsumexp(logit_ood, axis=-1) |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
|
|
method = 'Energy+React' |
|
if method in args.methods: |
|
clip = np.quantile(feature_id_train, args.clip_quantile) |
|
logit_id_val_clip = np.clip( |
|
feature_id_val, a_min=None, a_max=clip) @ w.T + b |
|
score_id = logsumexp(logit_id_val_clip, axis=-1) |
|
|
|
logit_ood_clip = np.clip(feature_ood, a_min=None, a_max=clip) @ w.T + b |
|
score_ood = logsumexp(logit_ood_clip, axis=-1) |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
|
|
method = 'ViM' |
|
if method in args.methods: |
|
if feature_id_val.shape[-1] >= 2048: |
|
DIM = num_cls |
|
elif feature_id_val.shape[-1] >= 768: |
|
DIM = 512 |
|
else: |
|
DIM = feature_id_val.shape[-1] // 2 |
|
|
|
ec = EmpiricalCovariance(assume_centered=True) |
|
ec.fit(feature_id_train - u) |
|
eig_vals, eigen_vectors = np.linalg.eig(ec.covariance_) |
|
NS = np.ascontiguousarray( |
|
(eigen_vectors.T[np.argsort(eig_vals * -1)[DIM:]]).T) |
|
vlogit_id_train = norm(np.matmul(feature_id_train - u, NS), axis=-1) |
|
alpha = logit_id_train.max(axis=-1).mean() / vlogit_id_train.mean() |
|
|
|
vlogit_id_val = norm(np.matmul(feature_id_val - u, NS), axis=-1) * alpha |
|
energy_id_val = logsumexp(logit_id_val, axis=-1) |
|
score_id = -vlogit_id_val + energy_id_val |
|
|
|
energy_ood = logsumexp(logit_ood, axis=-1) |
|
vlogit_ood = norm(np.matmul(feature_ood - u, NS), axis=-1) * alpha |
|
score_ood = -vlogit_ood + energy_ood |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
|
|
method = 'Residual' |
|
if method in args.methods: |
|
if feature_id_val.shape[-1] >= 2048: |
|
DIM = 1000 |
|
elif feature_id_val.shape[-1] >= 768: |
|
DIM = 512 |
|
else: |
|
DIM = feature_id_val.shape[-1] // 2 |
|
ec = EmpiricalCovariance(assume_centered=True) |
|
ec.fit(feature_id_train - u) |
|
eig_vals, eigen_vectors = np.linalg.eig(ec.covariance_) |
|
NS = np.ascontiguousarray( |
|
(eigen_vectors.T[np.argsort(eig_vals * -1)[DIM:]]).T) |
|
|
|
score_id = -norm(np.matmul(feature_id_val - u, NS), axis=-1) |
|
|
|
score_ood = -norm(np.matmul(feature_ood - u, NS), axis=-1) |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
|
|
method = 'GradNorm' |
|
if method in args.methods: |
|
score_ood = gradnorm(feature_ood, w, b, num_cls) |
|
score_id = gradnorm(feature_id_val, w, b, num_cls) |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
|
|
method = 'Mahalanobis' |
|
if method in args.methods: |
|
train_means = [] |
|
train_feat_centered = [] |
|
for i in tqdm(range(train_labels.max() + 1), desc='Computing classwise mean feature'): |
|
fs = feature_id_train[train_labels == i] |
|
_m = fs.mean(axis=0) |
|
train_means.append(_m) |
|
train_feat_centered.extend(fs - _m) |
|
|
|
ec = EmpiricalCovariance(assume_centered=True) |
|
ec.fit(np.array(train_feat_centered).astype(np.float64)) |
|
|
|
mean = torch.from_numpy(np.array(train_means)).cuda().float() |
|
prec = torch.from_numpy(ec.precision_).cuda().float() |
|
|
|
score_id = -np.array( |
|
[(((f - mean) @ prec) * (f - mean)).sum(axis=-1).min().cpu().item() |
|
for f in tqdm(torch.from_numpy(feature_id_val).cuda().float(), desc='Computing Mahalanobis ID score')]) |
|
|
|
score_ood = -np.array([ |
|
(((f - mean) @ prec) * (f - mean)).sum(axis=-1).min().cpu().item() |
|
for f in tqdm(torch.from_numpy(feature_ood).cuda().float(), desc='Computing Mahalanobis OOD score') |
|
]) |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
|
|
method = 'KL-Matching' |
|
if method in args.methods: |
|
|
|
pred_labels_train = np.argmax(softmax_id_train, axis=-1) |
|
mean_softmax_train = [] |
|
for i in tqdm(range(num_cls), desc='Computing classwise mean softmax'): |
|
mean_softmax = softmax_id_train[pred_labels_train == i] |
|
if mean_softmax.shape[0] == 0: |
|
mean_softmax_train.append(np.zeros((num_cls))) |
|
else: |
|
mean_softmax_train.append(np.mean(mean_softmax, axis=0)) |
|
|
|
score_id = -pairwise_distances_argmin_min( |
|
softmax_id_val, np.array(mean_softmax_train), metric=kl)[1] |
|
|
|
score_ood = -pairwise_distances_argmin_min( |
|
softmax_ood, np.array(mean_softmax_train), metric=kl)[1] |
|
result = evaluate(method, score_id, score_ood, args.fpr) |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|