Spaces:
Sleeping
Sleeping
import yaml | |
import json | |
import shortuuid | |
import base64 | |
from PIL import Image | |
import os | |
from tqdm import tqdm | |
from PIL import Image | |
from openai import OpenAI | |
client = OpenAI(base_url="https://oneapi.xty.app/v1",api_key="sk-jD8DeGdJKrdOxpiQ5bD4845bB53346C3A0E9Ed479bE08676") | |
# import sys | |
# sys.path.append("/home/wcx/wcx/EasyDetect/tool") | |
from pipeline.tool.object_detetction_model import * | |
from pipeline.tool.google_serper import * | |
def get_openai_reply(image_path, text): | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
img = encode_image(image_path) | |
content = [ | |
{"type": "text", "text": text}, | |
{"type": "image_url","image_url": f"data:image/jpeg;base64,{img}"}, | |
] | |
messages=[ | |
{ | |
"role": "user", | |
"content": content, | |
} | |
] | |
resp = client.chat.completions.create( | |
model="gpt-4-vision-preview", | |
messages=messages, | |
max_tokens=1024, | |
) | |
return resp.choices[0].message.content | |
class Tool: | |
def __init__(self): | |
config = yaml.load(open("/home/wcx/wcx/GroundingDINO/LVLM/config/config.yaml", "r"), Loader=yaml.FullLoader) | |
self.detector = GroundingDINO(config=config) | |
self.search = GoogleSerperAPIWrapper() | |
def execute(self, image_path, new_path, objects, attribute_list, scenetext_list, fact_list): | |
use_text_rec = False | |
use_attribute = False | |
for key in scenetext_list: | |
if scenetext_list[key][0] != "none": | |
use_text_rec = True | |
text_res = None | |
if use_text_rec: | |
text_res = self.detector.execute(image_path=image_path,content="word.number",new_path=new_path,use_text_rec=True) | |
object_res = self.detector.execute(image_path=image_path,content=objects,new_path=new_path,use_text_rec=False) | |
queries = "" | |
if use_attribute: | |
cnt = 1 | |
for key in attribute_list: | |
if attribute_list[key][0] != "none": | |
for query in attribute_list[key]: | |
queries += str(cnt) + "." + query + "\n" | |
cnt += 1 | |
# print(queries) | |
if queries == "": | |
attribue_res = "none information" | |
else: | |
attribue_res = get_openai_reply(image_path, queries) | |
# print(attribue_res) | |
fact_res = "" | |
cnt = 1 | |
for key in fact_list: | |
if fact_list[key][0] != "none": | |
evidences = self.search.execute(input="", content=str(fact_list[key])) | |
for evidence in evidences: | |
fact_res += str(cnt) + "." + evidence + "\n" | |
cnt += 1 | |
if fact_res == "": | |
fact_res = "none information" | |
return object_res, attribue_res, text_res, fact_res | |
# if __name__ == '__main__': | |
# tool = Tool() | |
# extractor = Extractor(model="gpt-4-1106-preview", config_path= "/home/wcx/wcx/GroundingDINO/LVLM/prompt-v2-multi-claim/object_extract.yaml", type="image-to-text") | |
# # "/home/wcx/wcx/LVLMHall-test/text-to-image/labeled.json" | |
# query = Query(config_path="/home/wcx/wcx/GroundingDINO/LVLM/prompt-v2-multi-claim/query.yaml",type="image-to-text") | |
# path = "/home/wcx/wcx/LVLMHall-test/MSCOCO/caption/labeled/minigpt4-100-cx-revise-v1.json" | |
# with open(path, "r", encoding="utf-8") as f: | |
# for idx, line in tqdm(enumerate(f.readlines()), total=250): | |
# # if idx < 189: | |
# # continue | |
# data = data2 | |
# #data = json.loads(line) | |
# image_path = data["image_path"]#"/newdisk3/wcx" + data["image_path"] | |
# claim_list = "" | |
# cnt = 1 | |
# for seg in data["segments"]: | |
# for cla in seg["claims"]: | |
# claim_list += "claim" + str(cnt) + ": " + cla["claim"] + "\n" | |
# cnt += 1 | |
# object_list, objects = extractor.get_response(claims=claim_list) | |
# print("pre:" + objects) | |
# attribute_list, scenetext_list, fact_list, objects = query.get_response(claim_list, objects, object_list) | |
# print("after:" + objects) | |
# print(object_list) | |
# print(attribute_list) | |
# print(scenetext_list) | |
# print(fact_list) | |
# object_res, attribue_res, text_res, fact_res = tool.execute(image_path=image_path, | |
# new_path="/newdisk3/wcx/MLLM/image-to-text/minigpt4/", | |
# attribute_list=attribute_list, | |
# scenetext_list=scenetext_list, | |
# fact_list=fact_list, | |
# objects=objects) | |
# # print(object_res) | |
# # print(attribue_res) | |
# # print(text_res) | |
# #print(fact_res[:50]) | |
# print("=============================") | |
# break | |