|
import json |
|
import numpy as np |
|
from tqdm import tqdm |
|
from pathlib import Path |
|
|
|
from videollava.constants import DEFAULT_VIDEO_TOKEN |
|
|
|
from infer_utils import run_inference_single |
|
|
|
|
|
def run_cdvqa_inference( |
|
model, |
|
dataset_path, |
|
processor, |
|
tokenizer, |
|
conv_mode, |
|
use_video_data=False, |
|
open_prompt=None, |
|
repeat_frames=None, |
|
prompt_strategy="interleave", |
|
chronological_prefix=True, |
|
data_frac=1, |
|
data_size=None, |
|
delete_system_prompt=False, |
|
last_image=False, |
|
start_ind=None, |
|
end_ind=None, |
|
print_prompt=False, |
|
**kwargs |
|
): |
|
for k, v in kwargs.items(): |
|
print("WARNING: Unused argument:", k, v) |
|
|
|
dataset_path = Path(dataset_path) |
|
data_dir = dataset_path.parent |
|
questions_path = data_dir / dataset_path.name.replace(".json", "_questions.json") |
|
answers_path = data_dir / dataset_path.name.replace(".json", "_answers.json") |
|
images_path = data_dir / dataset_path.name.replace(".json", "_images.json") |
|
|
|
with open(questions_path) as json_data: |
|
questionsJSON = json.load(json_data) |
|
|
|
with open(answers_path) as json_data: |
|
answersJSON = json.load(json_data) |
|
|
|
with open(images_path) as json_data: |
|
imagesJSON = json.load(json_data) |
|
|
|
if data_size is not None: |
|
data_size = min(data_size, len(questionsJSON)) |
|
idx = np.random.choice(len(questionsJSON), data_size, replace=False) |
|
imagesJSON = [imagesJSON[i] for i in idx] |
|
elif data_frac < 1: |
|
idx = np.random.choice(len(questionsJSON), int(len(questionsJSON) * data_frac), replace=False) |
|
imagesJSON = [imagesJSON[i] for i in idx] |
|
|
|
|
|
images_ids = [img['id'] for img in imagesJSON['images'] if img['active']] |
|
|
|
if start_ind is not None and end_ind is not None: |
|
print("Subsetting data from index", start_ind, "to", end_ind) |
|
images_ids = images_ids[start_ind:end_ind] |
|
elif start_ind is not None: |
|
print("Subsetting data from index", start_ind, "to end") |
|
images_ids = images_ids[start_ind:] |
|
elif end_ind is not None: |
|
print("Subsetting data from start to index", end_ind) |
|
images_ids = images_ids[:end_ind] |
|
|
|
|
|
answers = {} |
|
|
|
for id in tqdm(images_ids): |
|
file_name = imagesJSON['images'][id]['file_name'] |
|
|
|
image_paths = [ |
|
str(data_dir / "second_dataset" / "im1" / file_name), |
|
str(data_dir / "second_dataset" / "im2" / file_name), |
|
] |
|
|
|
for questionid in imagesJSON['images'][id]['questions_ids']: |
|
question = questionsJSON['questions'][questionid] |
|
if not question['active']: |
|
continue |
|
inp = "This is a pair of satellite images capturing the same location at different times: " |
|
inp = inp + DEFAULT_VIDEO_TOKEN + '\n' |
|
inp = inp + question["question"] |
|
type_str = question["type"] |
|
answer_str = answersJSON['answers'][question["answers_ids"][0]]['answer'] |
|
|
|
if type_str in ["change_or_not", "increase_or_not", "decrease_or_not"]: |
|
inp = inp + " Answer with yes or no." |
|
|
|
elif type_str == "change_ratio": |
|
inp = inp + " Choose from one of the following options: 0, 0_to_10, 10_to_20, 20_to_30, 30_to_40, 40_to_50, 50_to_60, 60_to_70, 70_to_80, 80_to_90, 90_to_100." |
|
|
|
elif type_str == "change_ratio_types": |
|
inp = inp + " Choose from one of the following options: 0, 0_to_10, 10_to_20, 20_to_30, 30_to_40, 40_to_50, 50_to_60, 60_to_70." |
|
|
|
else: |
|
inp = inp + " Choose from one of the following options: buildings, low_vegetation, nonvegetated ground surface, playgrounds, trees, water." |
|
answer_str = answer_str.replace("NVG_surface", "nonvegetated ground surface") |
|
|
|
outputs = run_inference_single( |
|
model=model, |
|
processor=processor, |
|
tokenizer=tokenizer, |
|
conv_mode=conv_mode, |
|
inp=inp, |
|
image_paths=image_paths, |
|
metadata=None, |
|
use_video_data=use_video_data, |
|
repeat_frames=repeat_frames, |
|
prompt_strategy=prompt_strategy, |
|
chronological_prefix=chronological_prefix, |
|
delete_system_prompt=delete_system_prompt, |
|
last_image=last_image, |
|
print_prompt=print_prompt |
|
) |
|
|
|
answers[f"{id}_{questionid}"] = { |
|
"predicted": outputs, |
|
"ground_truth": answer_str, |
|
"task": type_str |
|
} |
|
|
|
return answers |
|
|