NoobVPre

Running on Zero

App Files Files Community

Menyu

animelover commited on 9 days ago

Commit

f16fef6

•

1 Parent(s): ecfb95b

正确转换prompt (#1)

Browse files

- 正确转换prompt (b75de531977fdea2e1fa26885437e25dbc4eae8c)

Co-authored-by: animelover <[email protected]>

Files changed (1) hide show

app.py +185 -64

app.py CHANGED Viewed

@@ -8,76 +8,194 @@ from compel import Compel, ReturnedEmbeddingsType
 import re
-def tokenize_line(text, tokenizer):
-    tokens = tokenizer.tokenize(text)
-    return tokens
 def parse_prompt_attention(text):
     res = []
-    pattern = re.compile(r"\(([^)]+):([\d\.]+)\)")
-    matches = pattern.findall(text)
-    for match in matches:
-        res.append((match[0], float(match[1])))
     return res
-def prompt_attention_to_invoke_prompt(attention_list):
-    prompt = ""
-    for item in attention_list:
-        prompt += f"({item[0]}:{item[1]}) "
-    return prompt.strip()
-def merge_embeds(prompts, compel):
-    embeds = []
-    pooled_embeds = []
-    for prompt in prompts:
-        conditioning, pooled = compel(prompt)
-        embeds.append(conditioning)
-        pooled_embeds.append(pooled)
-    # 合并嵌入，这里使用平均值，可以根据需要调整
-    merged_embed = torch.mean(torch.stack(embeds), dim=0)
-    merged_pooled = torch.mean(torch.stack(pooled_embeds), dim=0)
-    return merged_embed, merged_pooled
 def get_embed_new(prompt, pipeline, compel, only_convert_string=False, compel_process_sd=False):
     if compel_process_sd:
         return merge_embeds(tokenize_line(prompt, pipeline.tokenizer), compel)
     else:
         # fix bug weights conversion excessive emphasis
-        prompt = prompt.replace("((", "(").replace("))", ")")
     # Convert to Compel
     attention = parse_prompt_attention(prompt)
-    # 新增处理，当 attention 为空时
-    if not attention:
-        if only_convert_string:
-            return prompt
-        else:
-            conditioning, pooled = compel(prompt)
-            return conditioning, pooled
-    global_attention_chunks = []
-    # 下面的部分保持不变
     for att in attention:
-        for chunk in att[0].split(','):
-            temp_prompt_chunks = tokenize_line(chunk, pipeline.tokenizer)
-            for small_chunk in temp_prompt_chunks:
                 temp_dict = {
                     "weight": round(att[1], 2),
-                    "length": len(pipeline.tokenizer.tokenize(f'{small_chunk},')),
-                    "prompt": f'{small_chunk},'
                 }
-                global_attention_chunks.append(temp_dict)
     max_tokens = pipeline.tokenizer.model_max_length - 2
-    global_prompt_chunks = []
     current_list = []
     current_length = 0
-    for item in global_attention_chunks:
-        if current_length + item['length'] > max_tokens:
-            global_prompt_chunks.append(current_list)
             current_list = [[item['prompt'], item['weight']]]
-            current_length = item['length']
         else:
             if not current_list:
                 current_list.append([item['prompt'], item['weight']])
@@ -86,14 +204,19 @@ def get_embed_new(prompt, pipeline, compel, only_convert_string=False, compel_pr
                     current_list.append([item['prompt'], item['weight']])
                 else:
                     current_list[-1][0] += f" {item['prompt']}"
-            current_length += item['length']
     if current_list:
-        global_prompt_chunks.append(current_list)
     if only_convert_string:
-        return ' '.join([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chunks])
-    return merge_embeds([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chunks], compel)
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>你现在运行在CPU上 但是此项目只支持GPU.</p>"
@@ -138,24 +261,22 @@ def infer(
         tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
         text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
         returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
-        requires_pooled=[False, True]
     )
     # 在 infer 函数中调用 get_embed_new
-    conditioning, pooled = get_embed_new(prompt, pipe, compel_instance)
-    # 处理反向提示（negative_prompt）
-    if use_negative_prompt and negative_prompt:
-        negative_conditioning, negative_pooled = get_embed_new(negative_prompt, pipe, compel_instance)
-    else:
-        negative_conditioning = None
-        negative_pooled = None
     # 在调用 pipe 时，使用新的参数名称（确保参数名称正确）
     image = pipe(
-        prompt_embeds=conditioning,
-        pooled_prompt_embeds=pooled,
-        negative_prompt_embeds=negative_conditioning,
-        negative_pooled_prompt_embeds=negative_pooled,
         width=width,
         height=height,
         guidance_scale=guidance_scale,

 import re
+# =====================================
+# Prompt weights
+# =====================================
+import torch
+import re
 def parse_prompt_attention(text):
+    re_attention = re.compile(r"""
+      \\\(|
+      \\\)|
+      \\\[|
+      \\]|
+      \\\\|
+      \\|
+      \(|
+      \[|
+      :([+-]?[.\d]+)\)|
+      \)|
+      ]|
+      [^\\()\[\]:]+|
+      :
+      """, re.X)
     res = []
+    round_brackets = []
+    square_brackets = []
+    round_bracket_multiplier = 1.1
+    square_bracket_multiplier = 1 / 1.1
+    def multiply_range(start_position, multiplier):
+        for p in range(start_position, len(res)):
+            res[p][1] *= multiplier
+    for m in re_attention.finditer(text):
+        text = m.group(0)
+        weight = m.group(1)
+        if text.startswith('\\'):
+            res.append([text[1:], 1.0])
+        elif text == '(':
+            round_brackets.append(len(res))
+        elif text == '[':
+            square_brackets.append(len(res))
+        elif weight is not None and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), float(weight))
+        elif text == ')' and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), round_bracket_multiplier)
+        elif text == ']' and len(square_brackets) > 0:
+            multiply_range(square_brackets.pop(), square_bracket_multiplier)
+        else:
+            parts = re.split(re.compile(r"\s*\bBREAK\b\s*", re.S), text)
+            for i, part in enumerate(parts):
+                if i > 0:
+                    res.append(["BREAK", -1])
+                res.append([part, 1.0])
+    for pos in round_brackets:
+        multiply_range(pos, round_bracket_multiplier)
+    for pos in square_brackets:
+        multiply_range(pos, square_bracket_multiplier)
+    if len(res) == 0:
+        res = [["", 1.0]]
+    # merge runs of identical weights
+    i = 0
+    while i + 1 < len(res):
+        if res[i][1] == res[i + 1][1]:
+            res[i][0] += res[i + 1][0]
+            res.pop(i + 1)
+        else:
+            i += 1
     return res
+def prompt_attention_to_invoke_prompt(attention):
+    tokens = []
+    for text, weight in attention:
+        # Round weight to 2 decimal places
+        weight = round(weight, 2)
+        if weight == 1.0:
+            tokens.append(text)
+        elif weight < 1.0:
+            if weight < 0.8:
+                tokens.append(f"({text}){weight}")
+            else:
+                tokens.append(f"({text})-" + "-" * int((1.0 - weight) * 10))
+        else:
+            if weight < 1.3:
+                tokens.append(f"({text})" + "+" * int((weight - 1.0) * 10))
+            else:
+                tokens.append(f"({text}){weight}")
+    return "".join(tokens)
+def concat_tensor(t):
+    t_list = torch.split(t, 1, dim=0)
+    t = torch.cat(t_list, dim=1)
+    return t
+def merge_embeds(prompt_chanks, compel):
+    num_chanks = len(prompt_chanks)
+    if num_chanks != 0:
+        power_prompt = 1/(num_chanks*(num_chanks+1)//2)
+        prompt_embs = compel(prompt_chanks)
+        t_list = list(torch.split(prompt_embs, 1, dim=0))
+        for i in range(num_chanks):
+            t_list[-(i+1)] = t_list[-(i+1)] * ((i+1)*power_prompt)
+        prompt_emb = torch.stack(t_list, dim=0).sum(dim=0)
+    else:
+        prompt_emb = compel('')
+    return prompt_emb
+def detokenize(chunk, actual_prompt):
+    chunk[-1] = chunk[-1].replace('</w>', '')
+    chanked_prompt = ''.join(chunk).strip()
+    while '</w>' in chanked_prompt:
+        if actual_prompt[chanked_prompt.find('</w>')] == ' ':
+            chanked_prompt = chanked_prompt.replace('</w>', ' ', 1)
+        else:
+            chanked_prompt = chanked_prompt.replace('</w>', '', 1)
+    actual_prompt = actual_prompt.replace(chanked_prompt,'')
+    return chanked_prompt.strip(), actual_prompt.strip()
+def tokenize_line(line, tokenizer): # split into chunks
+    actual_prompt = line.lower().strip()
+    actual_tokens = tokenizer.tokenize(actual_prompt)
+    max_tokens = tokenizer.model_max_length - 2
+    comma_token = tokenizer.tokenize(',')[0]
+    chunks = []
+    chunk = []
+    for item in actual_tokens:
+        chunk.append(item)
+        if len(chunk) == max_tokens:
+            if chunk[-1] != comma_token:
+                for i in range(max_tokens-1, -1, -1):
+                    if chunk[i] == comma_token:
+                        actual_chunk, actual_prompt = detokenize(chunk[:i+1], actual_prompt)
+                        chunks.append(actual_chunk)
+                        chunk = chunk[i+1:]
+                        break
+                else:
+                    actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
+                    chunks.append(actual_chunk)
+                    chunk = []
+            else:
+                actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
+                chunks.append(actual_chunk)
+                chunk = []
+    if chunk:
+        actual_chunk, _ = detokenize(chunk, actual_prompt)
+        chunks.append(actual_chunk)
+    return chunks
 def get_embed_new(prompt, pipeline, compel, only_convert_string=False, compel_process_sd=False):
     if compel_process_sd:
         return merge_embeds(tokenize_line(prompt, pipeline.tokenizer), compel)
     else:
         # fix bug weights conversion excessive emphasis
+        prompt = prompt.replace("((", "(").replace("))", ")").replace("\\", "\\\\\\")
     # Convert to Compel
     attention = parse_prompt_attention(prompt)
+    global_attention_chanks = []
     for att in attention:
+        for chank in att[0].split(','):
+            temp_prompt_chanks = tokenize_line(chank, pipeline.tokenizer)
+            for small_chank in temp_prompt_chanks:
                 temp_dict = {
                     "weight": round(att[1], 2),
+                    "lenght": len(pipeline.tokenizer.tokenize(f'{small_chank},')),
+                    "prompt": f'{small_chank},'
                 }
+                global_attention_chanks.append(temp_dict)
     max_tokens = pipeline.tokenizer.model_max_length - 2
+    global_prompt_chanks = []
     current_list = []
     current_length = 0
+    for item in global_attention_chanks:
+        if current_length + item['lenght'] > max_tokens:
+            global_prompt_chanks.append(current_list)
             current_list = [[item['prompt'], item['weight']]]
+            current_length = item['lenght']
         else:
             if not current_list:
                 current_list.append([item['prompt'], item['weight']])
                     current_list.append([item['prompt'], item['weight']])
                 else:
                     current_list[-1][0] += f" {item['prompt']}"
+            current_length += item['lenght']
     if current_list:
+        global_prompt_chanks.append(current_list)
     if only_convert_string:
+        return ' '.join([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chanks])
+    return merge_embeds([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chanks], compel)
+def add_comma_after_pattern_ti(text):
+    pattern = re.compile(r'\b\w+_\d+\b')
+    modified_text = pattern.sub(lambda x: x.group() + ',', text)
+    return modified_text
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>你现在运行在CPU上 但是此项目只支持GPU.</p>"
         tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
         text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
         returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+        requires_pooled=[False, True],
+        truncate_long_prompts=False
     )
     # 在 infer 函数中调用 get_embed_new
+    if not use_negative_prompt:
+        negative_prompt = ""
+    prompt = get_embed_new(prompt, pipe, compel, only_convert_string=True)
+    negative_prompt = get_embed_new(negative_prompt, pipe, compel, only_convert_string=True)
+    conditioning, pooled = compel([prompt, neg_prompt]) # 必须同时处理来保证长度相等
     # 在调用 pipe 时，使用新的参数名称（确保参数名称正确）
     image = pipe(
+        prompt_embeds=conditioning[0:1],
+        pooled_prompt_embeds=pooled[0:1],
+        negative_prompt_embeds=conditioning[1:2],
+        negative_pooled_prompt_embeds=pooled[1:2],
         width=width,
         height=height,
         guidance_scale=guidance_scale,