affandes commited on
Commit
6520c83
·
verified ·
1 Parent(s): 6976e86

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from fastapi import FastAPI
3
+
4
+ app = FastAPI()
5
+
6
+ @app.get("/")
7
+ def greet_json():
8
+ model_name = "Qwen/QwQ-32B-Preview"
9
+
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ torch_dtype="auto",
13
+ device_map="auto"
14
+ )
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+
17
+ prompt = "How many r in strawberry."
18
+ messages = [
19
+ {"role": "system", "content": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."},
20
+ {"role": "user", "content": prompt}
21
+ ]
22
+ text = tokenizer.apply_chat_template(
23
+ messages,
24
+ tokenize=False,
25
+ add_generation_prompt=True
26
+ )
27
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
28
+
29
+ generated_ids = model.generate(
30
+ **model_inputs,
31
+ max_new_tokens=512
32
+ )
33
+ generated_ids = [
34
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
35
+ ]
36
+
37
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
38
+ return response