Spaces:
Runtime error
Runtime error
dreamerdeo
commited on
Commit
•
f61d72c
1
Parent(s):
e88da0b
Update app.py
Browse files
app.py
CHANGED
@@ -4,8 +4,8 @@ import torch
|
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
|
5 |
from threading import Thread
|
6 |
|
7 |
-
|
8 |
-
model_path = 'sail/Sailor-0.5B-Chat'
|
9 |
|
10 |
# Loading the tokenizer and model from Hugging Face's model hub.
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
@@ -78,33 +78,15 @@ def predict(message, history):
|
|
78 |
repetition_penalty=1.1,
|
79 |
)
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
partial_message =
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
# t.start()
|
91 |
-
|
92 |
-
# # 实时生成部分消息
|
93 |
-
# partial_message = ""
|
94 |
-
# for new_token in streamer:
|
95 |
-
# partial_message += new_token
|
96 |
-
# if sft_end_token in partial_message: # 检测到停止标志
|
97 |
-
# break
|
98 |
-
# # 将历史记录和当前消息转换为 tuple 格式并实时返回
|
99 |
-
# # yield [(msg, bot) for msg, bot in history] + [(message, partial_message)]
|
100 |
-
# # yield (message, partial_message)
|
101 |
-
# yield partial_message
|
102 |
-
|
103 |
-
# # 处理生成的最终回复
|
104 |
-
# final_message = partial_message.replace(sft_end_token, "").strip()
|
105 |
-
# history.append([message, final_message]) # 更新历史记录
|
106 |
-
# # 返回最终的对话历史,确保格式为元组的列表
|
107 |
-
# yield [(msg, bot) for msg, bot in history]
|
108 |
|
109 |
css = """
|
110 |
full-height {
|
|
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
|
5 |
from threading import Thread
|
6 |
|
7 |
+
model_path = 'dreamerdeo/Sailor2-0.8B-Chat'
|
8 |
+
# model_path = 'sail/Sailor-0.5B-Chat'
|
9 |
|
10 |
# Loading the tokenizer and model from Hugging Face's model hub.
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
|
78 |
repetition_penalty=1.1,
|
79 |
)
|
80 |
|
81 |
+
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
82 |
+
t.start() # Starting the generation in a separate thread.
|
83 |
+
partial_message = ""
|
84 |
+
for new_token in streamer:
|
85 |
+
partial_message += new_token
|
86 |
+
if sft_end_token in partial_message: # Breaking the loop if the stop token is generated.
|
87 |
+
break
|
88 |
+
yield partial_message
|
89 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
css = """
|
92 |
full-height {
|