Controlled_Chat_CPU

Sleeping

App Files Files Community

abrakjamson commited on Oct 6, 2024

Commit

85e58bb

•

1 Parent(s): 4da1fb0

Disable input while generating

Browse files

Files changed (1) hide show

app.py +47 -10

app.py CHANGED Viewed

@@ -24,10 +24,12 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True,
     use_safetensors=True
 )
-model = model.to("cuda:0" if torch.cuda.is_available() else "cpu")
-print(f"Is CUDA available: {torch.cuda.is_available()}")
-if torch.cuda.is_available():
     print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 model = ControlModel(model, list(range(-5, -18, -1)))
@@ -87,7 +89,8 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
     Returns a list of tuples, the user message and the assistant response,
         which Gradio uses to update the chatbot history
     """
     # Separate checkboxes and sliders based on type
     # The first x in args are the checkbox names (the file names)
     # The second x in args are the slider values
@@ -139,7 +142,10 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
         "repetition_penalty": repetition_penalty.value,
     }
-    _streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=False,)
     generate_kwargs = dict(
         input_ids,
@@ -155,6 +161,9 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
     # Display the response as it streams in, prepending the control vector info
     partial_message = ""
     for new_token in _streamer:
         if new_token != '<' and new_token != '</s>': # seems to hit EOS correctly without this needed
             partial_message += new_token
@@ -181,14 +190,17 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
     # Update conversation history
     history.append((user_message, assistant_response_display))
-    yield history
 def generate_response_with_retry(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, *args):
     # Remove last user input and assistant response from history, then call generate_response()
     if history:
         history = history[0:-1]
-    for output in generate_response(system_prompt, user_message, history, max_new_tokens, repetition_penalty, do_sample, *args):
-        yield output
 # Function to reset the conversation history
 def reset_chat():
@@ -281,7 +293,7 @@ def set_preset_stoner(*args):
     for check in model_names_and_indexes:
         if check == "Angry":
             new_checkbox_values.append(True)
-            new_slider_values.append(0.5)
         elif check == "Right-leaning":
             new_checkbox_values.append(True)
             new_slider_values.append(-0.5)
@@ -323,6 +335,15 @@ def set_preset_facts(*args):
     return new_checkbox_values + new_slider_values
 tooltip_css = """
 /* Tooltip container */
     .tooltip {
@@ -560,10 +581,22 @@ with gr.Blocks(
     inputs_list = [system_prompt, user_input, chatbot, max_new_tokens, repetition_penalty, do_sample] + control_checks + control_sliders
     # Define button actions
     submit_button.click(
         generate_response,
         inputs=inputs_list,
         outputs=[chatbot]
     )
     user_input.submit(
@@ -575,7 +608,11 @@ with gr.Blocks(
     retry_button.click(
         generate_response_with_retry,
         inputs=inputs_list,
-        outputs=[chatbot]
     )
     new_chat_button.click(

     trust_remote_code=True,
     use_safetensors=True
 )
+cuda = torch.cuda.is_available()
+print(f"Is CUDA available: {cuda}")
+model = model.to("cuda:0" if cuda else "cpu")
+if cuda:
     print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 model = ControlModel(model, list(range(-5, -18, -1)))
     Returns a list of tuples, the user message and the assistant response,
         which Gradio uses to update the chatbot history
     """
+    global previous_turn
+    previous_turn = user_message
     # Separate checkboxes and sliders based on type
     # The first x in args are the checkbox names (the file names)
     # The second x in args are the slider values
         "repetition_penalty": repetition_penalty.value,
     }
+    timeout = 120.0
+    if cuda:
+        timeout = 10.0
+    _streamer = TextIteratorStreamer(tokenizer, timeout=timeout, skip_prompt=True, skip_special_tokens=False,)
     generate_kwargs = dict(
         input_ids,
     # Display the response as it streams in, prepending the control vector info
     partial_message = ""
+    #show the control vector info while we wait for the first token
+    temp_output = "*" + assistant_message_title + "*" + "\n\n*Please wait*..." + partial_message
+    yield history + [(user_message, temp_output)]
     for new_token in _streamer:
         if new_token != '<' and new_token != '</s>': # seems to hit EOS correctly without this needed
             partial_message += new_token
     # Update conversation history
     history.append((user_message, assistant_response_display))
+    return history
 def generate_response_with_retry(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, *args):
     # Remove last user input and assistant response from history, then call generate_response()
+    global previous_turn
+    previous_ueser_message = previous_turn
     if history:
         history = history[0:-1]
+    # Using the previous turn's text, even though it isn't in the textbox anymore
+    for output in generate_response(system_prompt, previous_ueser_message, history, max_new_tokens, repetition_penalty, do_sample, *args):
+        yield [output, previous_ueser_message]
 # Function to reset the conversation history
 def reset_chat():
     for check in model_names_and_indexes:
         if check == "Angry":
             new_checkbox_values.append(True)
+            new_slider_values.append(0.4)
         elif check == "Right-leaning":
             new_checkbox_values.append(True)
             new_slider_values.append(-0.5)
     return new_checkbox_values + new_slider_values
+def disable_controls():
+    return gr.update(interactive= False, value= "⌛ Processing"), gr.update(interactive=False)
+def enable_controls():
+    return gr.update(interactive= True, value= "💬 Submit"), gr.update(interactive= True)
+def clear_input(input_textbox):
+    return ""
 tooltip_css = """
 /* Tooltip container */
     .tooltip {
     inputs_list = [system_prompt, user_input, chatbot, max_new_tokens, repetition_penalty, do_sample] + control_checks + control_sliders
     # Define button actions
+    # Disable the submit button while processing
+    submit_button.click(
+        disable_controls,
+        inputs= None,
+        outputs= [submit_button, user_input]
+    )
     submit_button.click(
         generate_response,
         inputs=inputs_list,
         outputs=[chatbot]
+    ).then(
+        clear_input,
+        inputs= user_input,
+        outputs= user_input
+    ).then(
+        enable_controls, inputs=None, outputs=[submit_button, user_input]
     )
     user_input.submit(
     retry_button.click(
         generate_response_with_retry,
         inputs=inputs_list,
+        outputs=[chatbot, user_input]
+    ).then(
+        clear_input,
+        inputs= user_input,
+        outputs= user_input
     )
     new_chat_button.click(