File size: 2,324 Bytes
28c6f95
 
548b6a7
b3a9230
1234221
28c6f95
d801608
28c6f95
1234221
28c6f95
 
1234221
 
 
 
 
 
28c6f95
1234221
28c6f95
1234221
 
 
 
 
28c6f95
1234221
28c6f95
1234221
28c6f95
1234221
 
 
bbcb5dc
1234221
 
 
 
28c6f95
1234221
 
d801608
ccf21d2
1234221
 
 
 
d801608
 
 
1234221
 
d801608
ccf21d2
6706d71
28c6f95
1234221
 
0d7cb73
1234221
 
 
 
 
 
 
 
 
28c6f95
 
1234221
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
from huggingface_hub import InferenceClient
import os

api_key = os.environ.get('qwen_API_KEY')
"""
For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct", token=api_key)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p
    ):
        token = message.choices[0].delta.content

        response += token
        yield response
    
example_prompts = [
    ["How to cook Kung Pao chicken the tastiest?", ""],
    ["你是谁开发的?", ""],
    ["写一篇关于青春的五言绝句", ""],
    ["你是谁?", ""]
]
latex_delimiters = [
    {"left": "$$", "right": "$$", "display": True},
    {"left": "\\[", "right": "\\]", "display": True},
    {"left": "$", "right": "$", "display": False},
    {"left": "\\(", "right": "\\)", "display": False}
]

demo = gr.ChatInterface(
    fn=respond,
    examples=example_prompts,
    cache_examples=False,
    title="千问2.5-72B",
    description="千问2.5-72B聊天机器人",
    additional_inputs=[
        gr.Textbox(value="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=8888, value=2048, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    chatbot=gr.Chatbot(show_label=True, latex_delimiters=latex_delimiters, show_copy_button=True)
)

if __name__ == "__main__":
    demo.queue(default_concurrency_limit=40)
    demo.launch(max_threads=40)