Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -45,9 +45,10 @@ def inference(query):
|
|
45 |
|
46 |
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
47 |
outputs = model.generate(tokenized_chat, **generation_params)
|
48 |
-
decoded_outputs = tokenizer.batch_decode(outputs,
|
49 |
assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip()
|
50 |
-
|
|
|
51 |
# outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
|
52 |
# return outputs
|
53 |
|
|
|
45 |
|
46 |
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
47 |
outputs = model.generate(tokenized_chat, **generation_params)
|
48 |
+
decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
49 |
assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip()
|
50 |
+
response_ = assistant_response.replace('<|im_end|>', "")
|
51 |
+
return response_
|
52 |
# outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
|
53 |
# return outputs
|
54 |
|