File size: 2,324 Bytes
28c6f95 548b6a7 b3a9230 1234221 28c6f95 d801608 28c6f95 1234221 28c6f95 1234221 28c6f95 1234221 28c6f95 1234221 28c6f95 1234221 28c6f95 1234221 28c6f95 1234221 bbcb5dc 1234221 28c6f95 1234221 d801608 ccf21d2 1234221 d801608 1234221 d801608 ccf21d2 6706d71 28c6f95 1234221 0d7cb73 1234221 28c6f95 1234221 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
api_key = os.environ.get('qwen_API_KEY')
"""
For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct", token=api_key)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p
):
token = message.choices[0].delta.content
response += token
yield response
example_prompts = [
["How to cook Kung Pao chicken the tastiest?", ""],
["你是谁开发的?", ""],
["写一篇关于青春的五言绝句", ""],
["你是谁?", ""]
]
latex_delimiters = [
{"left": "$$", "right": "$$", "display": True},
{"left": "\\[", "right": "\\]", "display": True},
{"left": "$", "right": "$", "display": False},
{"left": "\\(", "right": "\\)", "display": False}
]
demo = gr.ChatInterface(
fn=respond,
examples=example_prompts,
cache_examples=False,
title="千问2.5-72B",
description="千问2.5-72B聊天机器人",
additional_inputs=[
gr.Textbox(value="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.", label="System message"),
gr.Slider(minimum=1, maximum=8888, value=2048, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
chatbot=gr.Chatbot(show_label=True, latex_delimiters=latex_delimiters, show_copy_button=True)
)
if __name__ == "__main__":
demo.queue(default_concurrency_limit=40)
demo.launch(max_threads=40) |