vtrv.vls commited on
Commit
acd9509
·
1 Parent(s): 6f92fa3
Files changed (1) hide show
  1. models.py +26 -6
models.py CHANGED
@@ -1,13 +1,19 @@
1
  import torch
2
- from transformers import pipeline
3
 
4
  def get_tinyllama():
5
  tinyllama = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
6
  return tinyllama
7
 
8
  def get_qwen2ins1b():
9
- tinyllama = pipeline("text-generation", model="Qwen/Qwen2-1.5B-Instruct", torch_dtype=torch.float16, device_map="auto")
10
- return tinyllama
 
 
 
 
 
 
11
 
12
  def response_tinyllama(
13
  model=None,
@@ -46,7 +52,21 @@ def response_qwen2ins1b(
46
  if len(step) >= 2:
47
  messages_dict.append({'role': 'assistant', 'content': step[1]})
48
 
49
- prompt = model.tokenizer.apply_chat_template(messages_dict, tokenize=False, add_generation_prompt=True)
50
- outputs = model(prompt, max_new_tokens=64, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- return outputs[0]['generated_text'] #.split('<|assistant|>')[1].strip()
 
1
  import torch
2
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
 
4
  def get_tinyllama():
5
  tinyllama = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
6
  return tinyllama
7
 
8
  def get_qwen2ins1b():
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ "Qwen/Qwen2-1.5B-Instruct",
11
+ torch_dtype="auto",
12
+ device_map="auto"
13
+ )
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
16
+ return {'model': model, 'tokenizer': tokenizer}
17
 
18
  def response_tinyllama(
19
  model=None,
 
52
  if len(step) >= 2:
53
  messages_dict.append({'role': 'assistant', 'content': step[1]})
54
 
55
+ text = model['tokenizer'].apply_chat_template(
56
+ messages_dict,
57
+ tokenize=False,
58
+ add_generation_prompt=True
59
+ )
60
+ model_inputs = model['tokenizer']([text], return_tensors="pt")
61
+
62
+ generated_ids = model['model'].generate(
63
+ model_inputs.input_ids,
64
+ max_new_tokens=512
65
+ )
66
+ generated_ids = [
67
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
68
+ ]
69
+
70
+ response = model['tokenizer'].batch_decode(generated_ids, skip_special_tokens=True)[0]
71
 
72
+ return response # outputs[0]['generated_text'] #.split('<|assistant|>')[1].strip()