Controlled_Chat_CPU

Sleeping

App Files Files Community

abrakjamson commited on Sep 26, 2024

Commit

453c7fc

•

1 Parent(s): 9acb8e6

Corrected history and special tokens

Browse files

Files changed (1) hide show

app.py +45 -29

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from repeng import ControlVector, ControlModel
@@ -48,16 +49,18 @@ def toggle_slider(checked):
     return gr.update(visible=checked)
 # Function to generate the model's response
-def generate_response(system_prompt, user_message, *args, history=None, max_new_tokens=256, repetition_penalty=1.1):
     checkboxes = []
     sliders = []
     # Separate checkboxes and sliders based on type
-    for item in args:
-        if type(item) == bool:
-            checkboxes.append(item)
-        elif isinstance(item, (int, float)):
-            sliders.append(item)
     if len(checkboxes) != len(control_vector_files) or len(sliders) != len(control_vector_files):
         return history if history else [], history if history else []
@@ -76,21 +79,28 @@ def generate_response(system_prompt, user_message, *args, history=None, max_new_
             except Exception as e:
                 print(f"Failed to set control vector {cv_file}: {e}")
-    # Initialize history if None
-    history = history or []
-    # Construct the formatted prompt based on history
     formatted_prompt = ""
-    for turn in history:
-        user_msg, asst_msg = turn
-        formatted_prompt += f"{user_tag} {user_msg} {asst_tag} {asst_msg} </s>"
     # Append the system prompt if provided
     if system_prompt.strip():
-        formatted_prompt += f"[INST] {system_prompt}"
     # Append the new user message
-    formatted_prompt += f"\n{user_tag} {user_message} {asst_tag}"
     # Tokenize the input
     input_ids = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
@@ -99,23 +109,30 @@ def generate_response(system_prompt, user_message, *args, history=None, max_new_
         "pad_token_id": tokenizer.eos_token_id,
         "do_sample": default_generation_settings["do_sample"],
         "max_new_tokens": int(max_new_tokens),
-        "repetition_penalty": repetition_penalty,
     }
     # Generate the response
-    output_ids = model.generate(**input_ids, **default_generation_settings)
-    response = tokenizer.decode(output_ids.squeeze(), skip_special_tokens=True)
-    # Clean up the response by removing any trailing tags
-    if "</s>" in response:
-        response = response.split("</s>")[0].strip()
     # Update conversation history
-    history.append((user_message, response))
-    return history, history
 # Function to reset the conversation history
 def reset_chat():
     return [], []
 # Build the Gradio interface
@@ -198,20 +215,19 @@ with gr.Blocks() as demo:
                 submit_button = gr.Button("💬 Submit")
                 new_chat_button = gr.Button("🆕 New Chat")
-    # State to keep track of conversation history
-    state = gr.State()
     # Define button actions
     submit_button.click(
         generate_response,
-        inputs=[system_prompt, user_input] + control_checks + control_sliders + [state],
-        outputs=[chatbot, state]
     )
     new_chat_button.click(
         reset_chat,
         inputs=[],
-        outputs=[chatbot, state]
     )
 # Launch the Gradio app

 import os
+import re
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from repeng import ControlVector, ControlModel
     return gr.update(visible=checked)
 # Function to generate the model's response
+def generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, *args):
     checkboxes = []
     sliders = []
+    #inputs_list = [system_prompt, user_input, chatbot, max_new_tokens, repetition_penalty] + control_checks + control_sliders
     # Separate checkboxes and sliders based on type
+    # The first x in args are the checkbox names (the file names)
+    # The second x in args are the slider values
+    for i in range(len(control_vector_files)):
+        checkboxes.append(args[i])
+        sliders.append(args[len(control_vector_files) + i])
     if len(checkboxes) != len(control_vector_files) or len(sliders) != len(control_vector_files):
         return history if history else [], history if history else []
             except Exception as e:
                 print(f"Failed to set control vector {cv_file}: {e}")
     formatted_prompt = ""
+    # Mistral expects the history to be wrapped in <s>history</s>
+    if len(history) > 0:
+        formatted_prompt += "<s>"
     # Append the system prompt if provided
     if system_prompt.strip():
+        formatted_prompt += f"[INST] {system_prompt} [/INST] "
+    # Construct the formatted prompt based on history
+    if len(history) > 0:
+        for turn in history:
+            user_msg, asst_msg = turn
+            formatted_prompt += f"{user_tag} {user_msg} {asst_tag} {asst_msg}"
+    if len(history) > 0:
+        formatted_prompt += "</s>"
     # Append the new user message
+    formatted_prompt += f"{user_tag} {user_message} {asst_tag}"
     # Tokenize the input
     input_ids = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
         "pad_token_id": tokenizer.eos_token_id,
         "do_sample": default_generation_settings["do_sample"],
         "max_new_tokens": int(max_new_tokens),
+        "repetition_penalty": repetition_penalty.value,
     }
     # Generate the response
+    output_ids = model.generate(**input_ids, **generation_settings)
+    response = tokenizer.decode(output_ids.squeeze(), skip_special_tokens=False)
+    def get_assistant_response(input_string):
+        # Use regex to find the text between the final [/INST] tag and </s>
+        pattern = r'\[/INST\](?!.*\[/INST\])\s*(.*?)(?:</s>|$)'
+        match = re.search(pattern, input_string, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return None
+    assistant_response = get_assistant_response(response)
     # Update conversation history
+    history.append((user_message, assistant_response))
+    return history
 # Function to reset the conversation history
 def reset_chat():
+    # returns a blank user input text and a blank conversation history
     return [], []
 # Build the Gradio interface
                 submit_button = gr.Button("💬 Submit")
                 new_chat_button = gr.Button("🆕 New Chat")
+    inputs_list = [system_prompt, user_input, chatbot, max_new_tokens, repetition_penalty] + control_checks + control_sliders
     # Define button actions
     submit_button.click(
         generate_response,
+        inputs=inputs_list,
+        outputs=[chatbot]
     )
     new_chat_button.click(
         reset_chat,
         inputs=[],
+        outputs=[chatbot, user_input]
     )
 # Launch the Gradio app