r""" 中文 该模块将关键词字典转化为描述文本,生成完整的提词,从而降低对比实验成本、提升控制能力和效率。 提词(prompy)对比实验会需要控制关键属性发生变化、其他属性不变的文本对。当需要控制的属性变量发生较大变化时,靠人为复制粘贴进行完成文本撰写工作量会非常大。 该模块主要有三种类,分别是: 1. `BaseAttribute2Text`: 单属性文本转换类 2. `MultiAttr2Text` 多属性文本转化类,输出`List[Tuple[str, str]`。具体如何转换为文本在 `MultiAttr2PromptTemplate`中实现。 3. `MultiAttr2PromptTemplate`:先将2生成的多属性文本字典列表转化为完整的文本,然后再使用内置的模板`template`拼接。拼接后的文本作为实际模型输入的提词。 1. `template`字段若没有{},且有字符,则认为输入就是完整输入网络的`prompt`; 2. `template`字段若含有{key},则认为是带关键词的字符串目标,多个属性由`template`字符串中顺序完全决定。关键词内容由表格中相关列通过`attr2text`转化而来; 3. `template`字段有且只含有一个{},如`a portrait of {}`,则相关内容由 `PresetMultiAttr2PromptTemplate`中预定义好的`attrs`列表指定先后顺序; English This module converts a keyword dictionary into descriptive text, generating complete prompts to reduce the cost of comparison experiments, and improve control and efficiency. Prompt-based comparison experiments require text pairs where the key attributes are controlled while other attributes remain constant. When the variable attributes to be controlled undergo significant changes, manually copying and pasting to write text can be very time-consuming. This module mainly consists of three classes: BaseAttribute2Text: A class for converting single attribute text. MultiAttr2Text: A class for converting multi-attribute text, outputting List[Tuple[str, str]]. The specific implementation of how to convert to text is implemented in MultiAttr2PromptTemplate. MultiAttr2PromptTemplate: First, the list of multi-attribute text dictionaries generated by 2 is converted into complete text, and then the built-in template template is used for concatenation. The concatenated text serves as the prompt for the actual model input. If the template field does not contain {}, and there are characters, the input is considered the complete prompt for the network. If the template field contains {key}, it is considered a string target with keywords, and the order of multiple attributes is completely determined by the template string. The keyword content is generated by attr2text from the relevant columns in the table. If the template field contains only one {}, such as a portrait of {}, the relevant content is specified in the order defined by the attrs list predefined in PresetMultiAttr2PromptTemplate. """ from typing import List, Tuple, Union from mmcm.utils.str_util import ( has_key_brace, merge_near_same_char, get_word_from_key_brace_string, ) from .attributes import MultiAttr2Text, merge_multi_attrtext, AttriributeIsText from . import AttrRegister class MultiAttr2PromptTemplate(object): """ 将多属性转化为模型输入文本的实际类 The actual class that converts multiple attributes into model input text is """ def __init__( self, template: str, attr2text: MultiAttr2Text, name: str, ) -> None: """ Args: template (str): 提词模板, prompt template. 如果`template`含有{key},则根据key来取值。 if the template field contains {key}, it means that the actual value for that part of the prompt will be determined by the corresponding key 如果`template`有且只有1个{},则根据先后顺序对texts中的值进行拼接。if the template field in MultiAttr2PromptTemplate contains only one {} placeholder, such as "a portrait of {}", the order of the attributes is determined by the attrs list predefined in PresetMultiAttr2PromptTemplate. The values of the attributes in the texts list are concatenated in the order specified by the attrs list. attr2text (MultiAttr2Text): 多属性转换类。Class for converting multiple attributes into text prompt. name (str): 该多属性文本模板类的名字,便于记忆. Class Instance name """ self.attr2text = attr2text self.name = name if template == "": template = "{}" self.template = template self.template_has_key_brace = has_key_brace(template) def __call__(self, attributes: dict) -> Union[str, List[str]]: texts = self.attr2text(attributes) if not isinstance(texts, list): texts = [texts] prompts = [merge_multi_attrtext(text, self.template) for text in texts] prompts = [merge_near_same_char(prompt) for prompt in prompts] if len(prompts) == 1: prompts = prompts[0] return prompts class KeywordMultiAttr2PromptTemplate(MultiAttr2PromptTemplate): def __init__(self, template: str, name: str = "keywords") -> None: """关键词模板属性2文本转化类 1. 获取关键词模板字符串中的关键词属性; 2. 从import * 存储在locals()中变量中获取对应的类; 3. 将集成了多属性转换类的`MultiAttr2Text` Args: template (str): 含有{key}的模板字符串 name (str, optional): 该模板字符串名字,暂无实际用处. Defaults to "keywords". class for converting keyword template attributes to text 1. Get the keyword attributes in the keyword template string; 2. Get the corresponding class from the variables stored in locals() by import *; 3. The `MultiAttr2Text` integrated with multiple attribute conversion classes Args: template (str): template string containing {key} name (str, optional): the name of the template string, no actual use. Defaults to "keywords". """ assert has_key_brace( template ), "template should have key brace, but given {}".format(template) keywords = get_word_from_key_brace_string(template) funcs = [] for word in keywords: if word in AttrRegister: func = AttrRegister[word](name=word) else: func = AttriributeIsText(name=word) funcs.append(func) attr2text = MultiAttr2Text(funcs, name=name) super().__init__(template, attr2text, name) class OnlySpacePromptTemplate(MultiAttr2PromptTemplate): def __init__(self, template: str, name: str = "space_prompt") -> None: """纯空模板,无论输入啥,都只返回空格字符串作为prompt。 Args: template (str): 符合只输出空格字符串的模板, name (str, optional): 该模板字符串名字,暂无实际用处. Defaults to "space_prompt". Pure empty template, no matter what the input is, it will only return a space string as the prompt. Args: template (str): template that only outputs a space string, name (str, optional): the name of the template string, no actual use. Defaults to "space_prompt". """ attr2text = None super().__init__(template, attr2text, name) def __call__(self, attributes: dict) -> Union[str, List[str]]: return ""