|
import os |
|
import json |
|
import argparse |
|
|
|
avg_f1_score = 0 |
|
|
|
def eval_pope(answers, label_file): |
|
label_list = [json.loads(q)['label'] for q in open(label_file, 'r')] |
|
|
|
for answer in answers: |
|
text = answer['text'] |
|
|
|
|
|
if text.find('.') != -1: |
|
text = text.split('.')[0] |
|
|
|
text = text.replace(',', '') |
|
words = text.split(' ') |
|
if 'No' in words or 'not' in words or 'no' in words: |
|
answer['text'] = 'no' |
|
else: |
|
answer['text'] = 'yes' |
|
|
|
for i in range(len(label_list)): |
|
if label_list[i] == 'no': |
|
label_list[i] = 0 |
|
else: |
|
label_list[i] = 1 |
|
|
|
pred_list = [] |
|
for answer in answers: |
|
if answer['text'] == 'no': |
|
pred_list.append(0) |
|
else: |
|
pred_list.append(1) |
|
|
|
pos = 1 |
|
neg = 0 |
|
yes_ratio = pred_list.count(1) / len(pred_list) |
|
|
|
TP, TN, FP, FN = 0, 0, 0, 0 |
|
for pred, label in zip(pred_list, label_list): |
|
if pred == pos and label == pos: |
|
TP += 1 |
|
elif pred == pos and label == neg: |
|
FP += 1 |
|
elif pred == neg and label == neg: |
|
TN += 1 |
|
elif pred == neg and label == pos: |
|
FN += 1 |
|
|
|
str_to_log = '' |
|
|
|
str_to_log += 'TP\tFP\tTN\tFN\t\n' |
|
str_to_log += '{}\t{}\t{}\t{}\n'.format(TP, FP, TN, FN) |
|
|
|
precision = float(TP) / float(TP + FP) |
|
recall = float(TP) / float(TP + FN) |
|
f1 = 2*precision*recall / (precision + recall) |
|
global avg_f1_score |
|
avg_f1_score += f1 |
|
acc = (TP + TN) / (TP + TN + FP + FN) |
|
str_to_log += 'Accuracy: {}\n'.format(acc) |
|
str_to_log += 'Precision: {}\n'.format(precision) |
|
str_to_log += 'Recall: {}\n'.format(recall) |
|
str_to_log += 'F1 score: {}\n'.format(f1) |
|
str_to_log += 'Yes ratio: {}\n'.format(yes_ratio) |
|
str_to_log += '%.3f, %.3f, %.3f, %.3f, %.3f\n' % (f1, acc, precision, recall, yes_ratio) |
|
print(str_to_log) |
|
return str_to_log |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--annotation-dir", type=str) |
|
parser.add_argument("--question-file", type=str) |
|
parser.add_argument("--result-file", type=str) |
|
parser.add_argument("--model-name", type=str, default='') |
|
args = parser.parse_args() |
|
|
|
questions = [json.loads(line) for line in open(args.question_file)] |
|
questions = {question['question_id']: question for question in questions} |
|
answers = [json.loads(q) for q in open(args.result_file)] |
|
outputs = '' |
|
|
|
|
|
for file in os.listdir(args.annotation_dir): |
|
assert file.startswith('coco_pope_') |
|
assert file.endswith('.json') |
|
category = file[10:-5] |
|
cur_answers = [x for x in answers if questions[x['question_id']]['category'] == category] |
|
outputs += 'Category: {}, # samples: {}\n'.format(category, len(cur_answers)) |
|
print('Category: {}, # samples: {}'.format(category, len(cur_answers))) |
|
outputs += eval_pope(cur_answers, os.path.join(args.annotation_dir, file)) |
|
print("====================================") |
|
print(f"Average F1-score: {avg_f1_score/3:.4f}") |
|
with open(f"/data/naman_deep_singh/project_multimodal/pope_evals/{args.model_name}.txt", 'w') as f: |
|
f.write(outputs) |
|
f.writelines(f"Average F1-score: {avg_f1_score/3:.4f}\n") |