EasyDetect / pipeline /query_generate.py
sunnychenxiwang's picture
all
55d9644
raw
history blame
6.2 kB
import json
import yaml
import copy
import asyncio
from nltk.corpus import wordnet
class QueryGenerator:
def __init__(self, prompt_path, chat):
self.type = type
with open(prompt_path,"r",encoding='utf-8') as file:
self.prompt = yaml.load(file, yaml.FullLoader)
self.chat = chat
def objects_extract(self, claim_list, use_attribue=False, response=None):
if use_attribue:
user_prompt = self.prompt[self.type]["object"]["user"].format(claims=claim_list)
message = [[
{"role": "system", "content": self.prompt["object"]["system"]},
{"role": "user", "content": user_prompt}
],]
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
response = loop.run_until_complete(self.chat.get_response(messages=message))
try:
response = json.loads(response[0])
except Exception as e:
print(e)
objects = set(()) # 暂时不考虑spacy那种 感觉没啥用 如果是fact需要忽略实体
for key in response:
object_list = response[key].split(".")
response[key] = object_list
for object in object_list:
if object != "none":
objects.add(object)
objects = ".".join([object for object in list(objects)])
return response, objects
def get_hypernyms(self, word):
synsets = wordnet.synsets(word)
hypernyms = []
for synset in synsets:
for hypernym in synset.hypernyms():
hypernyms.extend(hypernym.lemma_names())
hypernyms = list(set(hypernyms))
hypernyms = ".".join([hypernym for hypernym in hypernyms])
return hypernyms
def remove_hypernyms(self, objects):
hypernyms_dict = {}
for object in objects:
hypernyms = self.get_hypernyms(object)
hypernyms_dict[object] = hypernyms
backup = copy.deepcopy(objects)
for object in objects:
hypernyms_list = []
for key in hypernyms_dict:
if key != object:
hypernyms_list.append(hypernyms_dict[key])
hypernyms_list = ".".join([hypernym for hypernym in hypernyms_list])
if object in hypernyms_list:
backup.remove(object)
objects = ".".join([object for object in backup])
return objects
def filter(self, res, object_list, use_attribue=False):
if use_attribue:
attribute_ques_list = json.loads(res[0])
scenetext_ques_list = json.loads(res[1])
fact_ques_list = json.loads(res[2])
objects = set(())
for idx, key in enumerate(fact_ques_list):
if fact_ques_list[key][0] != "none":
object_list[idx] = "none" # 将对应的object赋值为0
if use_attribue:
attribute_ques_list[key] = ["none"]
scenetext_ques_list[key] = ["none"]
else:
for object in object_list[key]:
if object != "none":
objects.add(object)
objects = self.remove_hypernyms(objects)
if use_attribue:
return attribute_ques_list, scenetext_ques_list, fact_ques_list, objects
else:
return scenetext_ques_list, fact_ques_list, objects
def get_response(self, claim_list, type, use_attribute=False):
self.type = type
if use_attribute:
object_list, objects = self.objects_extract(claim_list=claim_list, use_attribue=True)
self.message_list = [
[{"role": "system", "content": self.prompt[type]["attribute"]["system"]}, {"role": "user", "content": self.prompt[type]["attribute"]["user"].format(objects=objects,claims=claim_list)}],
[{"role": "system", "content": self.prompt[type]["scene-text"]["system"]}, {"role": "user", "content": self.prompt[type]["scene-text"]["user"].format(claims=claim_list)}],
[{"role": "system", "content": self.prompt[type]["fact"]["system"]}, {"role": "user", "content": self.prompt[type]["fact"]["user"].format(claims=claim_list)}]
]
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
res = loop.run_until_complete(self.chat.get_response(messages=self.message_list))
if self.type == "image-to-text":
attribute_ques_list, scenetext_ques_list, fact_ques_list, objects = self.filter(res, object_list)
else:
attribute_ques_list, scenetext_ques_list, fact_ques_list = json.loads(res[0]), json.loads(res[1]), json.loads(res[2])
return objects, attribute_ques_list, scenetext_ques_list, fact_ques_list
else:
self.message_list = [
[{"role": "system", "content": self.prompt[type]["object"]["system"]},{"role": "user", "content": self.prompt[type]["object"]["user"].format(claims=claim_list)}],
[{"role": "system", "content": self.prompt[type]["scene-text"]["system"]}, {"role": "user", "content": self.prompt[type]["scene-text"]["user"].format(claims=claim_list)}],
[{"role": "system", "content": self.prompt[type]["fact"]["system"]}, {"role": "user", "content": self.prompt[type]["fact"]["user"].format(claims=claim_list)}]
]
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
res = loop.run_until_complete(self.chat.get_response(messages=self.message_list))
object_list, objects = self.objects_extract(claim_list=claim_list, response=res)
if self.type == "image-to-text":
scenetext_ques_list, fact_ques_list, objects = self.filter(res, object_list)
else:
scenetext_ques_list, fact_ques_list = json.loads(res[1]), json.loads(res[2])
return objects, scenetext_ques_list, fact_ques_list