dmayhem93 commited on
Commit
040b554
·
verified ·
1 Parent(s): d0ede55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -9,9 +9,11 @@ from threading import Thread
9
 
10
  print(f"Starting to load the model to memory")
11
  m = AutoModelForCausalLM.from_pretrained(
12
- "stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float32, trust_remote_code=True)
13
  tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
14
- generator = pipeline('text-generation', model=m, tokenizer=tok)
 
 
15
  print(f"Sucessfully loaded the model to the memory")
16
 
17
 
@@ -31,7 +33,7 @@ def chat(message, history):
31
  chat.append({"role": "user", "content": message})
32
  messages = tok.apply_chat_template(chat, tokenize=False)
33
  # Tokenize the messages string
34
- model_inputs = tok([messages], return_tensors="pt")
35
  streamer = TextIteratorStreamer(
36
  tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
37
  generate_kwargs = dict(
 
9
 
10
  print(f"Starting to load the model to memory")
11
  m = AutoModelForCausalLM.from_pretrained(
12
+ "stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float16, trust_remote_code=True)
13
  tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
14
+ # using CUDA for an optimal experience
15
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
+ m = m.to(device)
17
  print(f"Sucessfully loaded the model to the memory")
18
 
19
 
 
33
  chat.append({"role": "user", "content": message})
34
  messages = tok.apply_chat_template(chat, tokenize=False)
35
  # Tokenize the messages string
36
+ model_inputs = tok([messages], return_tensors="pt").to(device)
37
  streamer = TextIteratorStreamer(
38
  tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
39
  generate_kwargs = dict(