ModelChat

Sleeping

File size: 2,072 Bytes

28c6f95
 
548b6a7
b3a9230
 
548b6a7
28c6f95
 
 
bd943ee
28c6f95
 
 
 
 
 
 
6706d71
28c6f95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6706d71
68f68a8
28c6f95
 
 
 
ccf21d2
 
 
 
 
 
 
6706d71
28c6f95
 
ccf21d2
a007d73
 
28c6f95
9a86ca9
 
28c6f95
013aded
28c6f95
b3a9230
6e484d8
28c6f95
 
 
ccf21d2

import gradio as gr
from huggingface_hub import InferenceClient
import os
import json

api_key=os.environ.get('qwen_API_KEY')
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct",token=api_key)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p
    ):
        token = message.choices[0].delta.content

        response += token
        yield response
        
example_prompts = [
    ["How to cook Kung Pao chicken the tastiest?"],
    ["Help me create an email expressing my greetings to an old friend."],
    ["写一篇关于青春的五言绝句"],
    ["你是谁？"]
]

demo = gr.ChatInterface(
    respond,
    examples=example_prompts,
    title="千问2.5-72B",
    description="千问2.5-72B聊天机器人",
    additional_inputs=[
        gr.Textbox(value="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=8888, value=2048, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    chatbot=gr.Chatbot(show_label=True, show_copy_button=True)

)

if __name__ == "__main__":
    demo.queue(default_concurrency_limit=40)
    demo.launch(max_threads=40)