File size: 3,072 Bytes
24c4def
 
 
 
55d9644
24c4def
55d9644
24c4def
 
 
 
 
 
 
55d9644
24c4def
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55d9644
 
 
 
 
 
24c4def
 
55d9644
24c4def
55d9644
24c4def
55d9644
24c4def
 
 
55d9644
 
24c4def
 
 
55d9644
24c4def
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import json
import yaml
import base64
class Judger:
    def __init__(self, prompt_path, chat):
        with open(prompt_path,"r",encoding='utf-8') as file:
            self.prompt = yaml.load(file, yaml.FullLoader)
        self.chat = chat
        
    
    def encode_image(self, image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
        
    def get_response(self, type, object_res, attribue_res, text_res, fact_res, claim_list, image_path):
        input = '''
                Here is the object detection expert model's result:
                {object}

                Here is the scene text recognition expert model's result:
                {text}

                Here is the external knowledge:
                {fact}

                Here is the claim list:
                {claims}

                Output: 
            '''
            
        object_det = ""
        text_det = ""
        for object_name, box in zip(object_res["phrases"], object_res["boxes"]):
            object_det += object_name + " " + str(box) + "\n" 
                
        if text_res != None:
            for text_name, box in zip(text_res["phrases"], text_res["boxes"]):
                text_det += text_name + " " + str(box) +  "\n" 
        else:
            text_det = "none information"
            
        if type == "image-to-text":
            img1 = self.encode_image("/home/wcx/wcx/GroundingDINO/LVLM/cot/img_examples/sandbeach.jpg")
            img2 = self.encode_image("/home/wcx/wcx/GroundingDINO/LVLM/cot/img_examples/football.jpg")
        else:
            img1 = self.encode_image("/home/wcx/wcx/GroundingDINO/LVLM/cot/img_examples/animal.jpg")
            img2 = self.encode_image("/home/wcx/wcx/GroundingDINO/LVLM/cot/img_examples/ball.jpg")
        base64_source_image = self.encode_image(image_path)
        content = [
                    {"type": "text", "text": self.prompt[type]["user"]},
                    {"type": "image_url","image_url": f"data:image/jpeg;base64,{img1}"},
                    {"type": "text", "text": self.prompt[type]["example1"]},
                    {"type": "image_url","image_url": f"data:image/jpeg;base64,{img2}"},
                    {"type": "text", "text": self.prompt[type]["example2"]},
                    {"type": "image_url","image_url": f"data:image/jpeg;base64,{base64_source_image}"},
                    {"type": "text", "text": input.format(object=object_det,text=text_det,fact=fact_res,claims=claim_list)}
                ]

        
        message = [
                {
                    'role': 'system',
                    'content': self.prompt[type]["system"]
                },
                {
                    "role": "user",
                    "content": content,
                }
            ]
            
        response = self.chat.get_response(message=message)
        try:
            response = json.loads(response)
        except Exception as e:
            print(e)
        return response