Controlled_Chat_CPU

Sleeping

abrakjamson commited on Oct 8, 2024

Commit

2fd6955

•

1 Parent(s): 85e58bb

Adding readme (and missing control model)

Files changed (3) hide show

README.md CHANGED Viewed

@@ -1,13 +1,23 @@
 ---
-title: Controlled Chat
-emoji: 🏢
-colorFrom: indigo
-colorTo: blue
 sdk: gradio
 sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: LLM Mind Control
+emoji: ⚡
+colorFrom: pink
+colorTo: gray
 sdk: gradio
 sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: mit
+preload_from_hub:
+  - mistralai/Mistral-7B-Instruct-v0.3
+suggested_hardware:
+  - l4x1
 ---
+# 🧠 LLM Mind Control"
+Unlike prompting, direct weight manipulation lets you fine-tune the amount of a personality
+trait or topic. Enabled through [Representation Engineering](https://arxiv.org/abs/2310.01405)
+via the [repeng](https://pypi.org/project/repeng) library.
+[Watch a demo](https://youtu.be/gYZPGVafD7M) for usage tips.
+This space needs more than 16GB of video memory to run on GPU, but could be modified to use a smaller model.

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ from huggingface_hub import login
 # Initialize model and tokenizer
 mistral_path = "mistralai/Mistral-7B-Instruct-v0.3"
-#mistral_path = r"E:/language_models/models/mistral"
 access_token = os.getenv("mistralaccesstoken")
 login(access_token)
@@ -30,7 +29,6 @@ model = model.to("cuda:0" if cuda else "cpu")
 if cuda:
     print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 model = ControlModel(model, list(range(-5, -18, -1)))
 # Generation settings
@@ -144,7 +142,7 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
     timeout = 120.0
     if cuda:
-        timeout = 10.0
     _streamer = TextIteratorStreamer(tokenizer, timeout=timeout, skip_prompt=True, skip_special_tokens=False,)
     generate_kwargs = dict(
@@ -231,7 +229,7 @@ def set_preset_helpful(*args):
     # sets checkboxes and sliders accordingly to this persona
     # args is a list of checkboxes and then slider values
     # must return the updated list of checkboxes and sliders
-    count_checkboxes = int(len(args)/2)
     new_checkbox_values = []
     new_slider_values = []
@@ -400,8 +398,13 @@ with gr.Blocks(
     # Header
-    gr.Markdown("# 🧠 LLM Brain Control")
-    gr.Markdown("Usage demo: [link](https://example.com)")
     with gr.Row():
         # Left Column: Control Vectors and advanced settings

 # Initialize model and tokenizer
 mistral_path = "mistralai/Mistral-7B-Instruct-v0.3"
 access_token = os.getenv("mistralaccesstoken")
 login(access_token)
 if cuda:
     print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 model = ControlModel(model, list(range(-5, -18, -1)))
 # Generation settings
     timeout = 120.0
     if cuda:
+        timeout = 15.0
     _streamer = TextIteratorStreamer(tokenizer, timeout=timeout, skip_prompt=True, skip_special_tokens=False,)
     generate_kwargs = dict(
     # sets checkboxes and sliders accordingly to this persona
     # args is a list of checkboxes and then slider values
     # must return the updated list of checkboxes and sliders
     new_checkbox_values = []
     new_slider_values = []
     # Header
+    gr.Markdown("# 🧠 LLM Mind Control")
+    gr.Markdown("""Unlike prompting, direct weight manipulation lets you fine-tune the amount of a personality
+trait or topic. Enabled through [Representation Engineering](https://arxiv.org/abs/2310.01405)
+via the [repeng](https://pypi.org/project/repeng) library.
+[Watch a demo](https://youtu.be/gYZPGVafD7M) for usage tips.""")
+    if not cuda:
+        gr.Markdown("*Warning: running on CPU will be very slow*")
     with gr.Row():
         # Left Column: Control Vectors and advanced settings

control_models/truthful.gguf ADDED Viewed

Binary file (509 kB). View file