import json import yaml import base64 class Judger: def __init__(self, prompt_path, chat, type): with open(prompt_path,"r",encoding='utf-8') as file: self.prompt = yaml.load(file, yaml.FullLoader)[type] self.chat = chat def encode_image(self, image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def get_response(self, object_res, attribue_res, text_res, fact_res, claim_list, image_path): input = ''' Here is the object detection expert model's result: {object} Here is the scene text recognition expert model's result: {text} Here is the external knowledge: {fact} Here is the claim list: {claims} Output: ''' object_det = "" text_det = "" for object_name, box in zip(object_res["phrases"], object_res["boxes"]): object_det += object_name + " " + str(box) + "\n" if text_res != None: for text_name, box in zip(text_res["phrases"], text_res["boxes"]): text_det += text_name + " " + str(box) + "\n" else: text_det = "none information" img1 = self.encode_image("/home/wcx/wcx/GroundingDINO/LVLM/cot/img_examples/animal.jpg") img2 = self.encode_image("/home/wcx/wcx/GroundingDINO/LVLM/cot/img_examples/ball.jpg") base64_source_image = self.encode_image(image_path) content = [ {"type": "text", "text": self.prompt["user"]}, {"type": "image_url","image_url": f"data:image/jpeg;base64,{img1}"}, {"type": "text", "text": self.prompt["example1"]}, {"type": "image_url","image_url": f"data:image/jpeg;base64,{img2}"}, {"type": "text", "text": self.prompt["example2"]}, {"type": "image_url","image_url": f"data:image/jpeg;base64,{base64_source_image}"}, {"type": "text", "text": input.format(object=object_det,text=text_det,fact=fact_res,claims=claim_list)} ] message = [ { 'role': 'system', 'content': self.prompt["system"] }, { "role": "user", "content": content, } ] response = self.chat.get_response(message=message) try: response = json.loads(response) except Exception as e: print(e) return response