stablelm-2-1_6b-zephyr

Sleeping

dmayhem93 commited on Jan 19, 2024

Commit

040b554

verified ·

1 Parent(s): d0ede55

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,9 +9,11 @@ from threading import Thread
 print(f"Starting to load the model to memory")
 m = AutoModelForCausalLM.from_pretrained(
-    "stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float32, trust_remote_code=True)
 tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
-generator = pipeline('text-generation', model=m, tokenizer=tok)
 print(f"Sucessfully loaded the model to the memory")
@@ -31,7 +33,7 @@ def chat(message, history):
     chat.append({"role": "user", "content": message})
     messages = tok.apply_chat_template(chat, tokenize=False)
     # Tokenize the messages string
-    model_inputs = tok([messages], return_tensors="pt")
     streamer = TextIteratorStreamer(
         tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(

 print(f"Starting to load the model to memory")
 m = AutoModelForCausalLM.from_pretrained(
+    "stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float16, trust_remote_code=True)
 tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
+# using CUDA for an optimal experience
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+m = m.to(device)
 print(f"Sucessfully loaded the model to the memory")
     chat.append({"role": "user", "content": message})
     messages = tok.apply_chat_template(chat, tokenize=False)
     # Tokenize the messages string
+    model_inputs = tok([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(
         tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(