File size: 2,562 Bytes
c48005f
adefb5a
7f7f074
8b470c5
9961c4d
cabbba6
9961c4d
adefb5a
7f7f074
3c4cd05
adefb5a
7f7f074
 
 
 
 
 
 
 
adefb5a
7f7f074
595c3d2
adefb5a
 
 
 
 
 
 
 
7f7f074
 
 
 
 
 
 
adefb5a
7f7f074
adefb5a
7f7f074
 
 
 
 
 
 
adefb5a
7f7f074
 
 
 
 
adefb5a
 
7f7f074
 
adefb5a
7f7f074
 
 
 
 
 
 
adefb5a
7f7f074
adefb5a
7f7f074
adefb5a
7f7f074
adefb5a
 
 
7f7f074
adefb5a
 
7f7f074
adefb5a
 
 
ca0543e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import spaces
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import whoami
import os

os.system("rm -rf /data-nvme/zerogpu-offload/*")

# 定义系统提示语
system_prompt = """你是 Skywork-o1,Skywork AI 开发的思维模型,擅长通过深度思考解决涉及数学、编码和逻辑推理的复杂问题。面对用户请求时,你首先会进行一段漫长而深入的思考过程,探索问题的可能解决方案。完成思考后,你会在回复中详细解释解决过程。"""

# 初始化模型和分词器
model_name = "Skywork/Skywork-o1-Open-Llama-3.1-8B"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 定义生成回复的函数
@spaces.GPU
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # 构造对话历史
    conversation = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            conversation.append({"role": "user", "content": user_msg})
        if assistant_msg:
            conversation.append({"role": "assistant", "content": assistant_msg})

    conversation.append({"role": "user", "content": message})

    # 构造输入
    input_ids = tokenizer.apply_chat_template(
        conversation,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    # 模型生成
    generation = model.generate(
        input_ids=input_ids,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        pad_token_id=tokenizer.pad_token_id,
    )

    # 解码生成内容
    completion = tokenizer.decode(
        generation[0][len(input_ids[0]):],
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True
    )
    return completion

# 定义Gradio界面
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(value=system_prompt, label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
        ),
    ],
    # chatbot_style="default"
)

if __name__ == "__main__":
    demo.launch()