Spaces:
Running
Running
import os | |
import gradio as gr | |
from openai import OpenAI | |
def predict( | |
message, | |
history, | |
system_prompt, | |
model, | |
api_url, | |
api_key, | |
max_tk, | |
temp, | |
top_p, | |
): | |
if not api_key: | |
return "Please set valid api keys in settings first." | |
# Format history with a given chat template | |
msgs = [{"role": "system", "content": system_prompt}] | |
for user, assistant in history: | |
msgs.append({"role": "user", "content": user}) | |
msgs.append({"role": "system", "content": assistant}) | |
msgs.append({"role": "user", "content": message}) | |
try: | |
client = OpenAI(api_key=api_key, base_url=api_url) | |
response = client.chat.completions.create( | |
model=model, | |
messages=msgs, | |
max_tokens=max_tk, | |
temperature=temp, | |
top_p=top_p, | |
stream=False, | |
).to_dict()["choices"][0]["message"]["content"] | |
except Exception as e: | |
response = f"{e}" | |
return response | |
def deepseek( | |
message, | |
history, | |
model, | |
api_key, | |
system_prompt, | |
max_tk, | |
temp, | |
top_p, | |
): | |
response = predict( | |
message, | |
history, | |
system_prompt, | |
model, | |
"https://api.deepseek.com", | |
api_key, | |
max_tk, | |
temp, | |
top_p, | |
) | |
outputs = [] | |
for new_token in response: | |
outputs.append(new_token) | |
yield "".join(outputs) | |
def kimi( | |
message, | |
history, | |
model, | |
api_key, | |
system_prompt, | |
max_tk, | |
temp, | |
top_p, | |
): | |
response = predict( | |
message, | |
history, | |
system_prompt, | |
model, | |
"https://api.moonshot.cn/v1", | |
api_key, | |
max_tk, | |
temp, | |
top_p, | |
) | |
outputs = [] | |
for new_token in response: | |
outputs.append(new_token) | |
yield "".join(outputs) | |
if __name__ == "__main__": | |
with gr.Blocks() as demo: # Create Gradio interface | |
gr.Markdown("# LLM API Aggregation Deployment") | |
with gr.Tab("DeepSeek"): | |
with gr.Accordion(label="⚙️ Settings", open=False) as ds_acc: | |
ds_model = gr.Dropdown( | |
choices=["deepseek-chat", "deepseek-reasoner"], | |
value="deepseek-chat", | |
label="Select a model", | |
) | |
ds_key = gr.Textbox( | |
os.getenv("ds_api_key"), | |
type="password", | |
label="API key", | |
) | |
ds_sys = gr.Textbox( | |
"You are a useful assistant. first recognize user request and then reply carfuly and thinking", | |
label="System prompt", | |
) | |
ds_maxtk = gr.Slider(0, 32000, 10000, label="Max new tokens") | |
ds_temp = gr.Slider(0, 1, 0.3, label="Temperature") | |
ds_topp = gr.Slider(0, 1, 0.95, label="Top P sampling") | |
gr.ChatInterface( | |
deepseek, | |
additional_inputs=[ | |
ds_model, | |
ds_key, | |
ds_sys, | |
ds_maxtk, | |
ds_temp, | |
ds_topp, | |
], | |
) | |
with gr.Tab("Kimi"): | |
with gr.Accordion(label="⚙️ Settings", open=False) as kimi_acc: | |
kimi_model = gr.Dropdown( | |
choices=["moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"], | |
value="moonshot-v1-32k", | |
label="Select a model", | |
) | |
kimi_key = gr.Textbox( | |
os.getenv("kimi_api_key"), | |
type="password", | |
label="API key", | |
) | |
kimi_sys = gr.Textbox( | |
"You are a useful assistant. first recognize user request and then reply carfuly and thinking", | |
label="System prompt", | |
) | |
kimi_maxtk = gr.Slider(0, 32000, 10000, label="Max new tokens") | |
kimi_temp = gr.Slider(0, 1, 0.3, label="Temperature") | |
kimi_topp = gr.Slider(0, 1, 0.95, label="Top P sampling") | |
gr.ChatInterface( | |
kimi, | |
additional_inputs=[ | |
kimi_model, | |
kimi_key, | |
kimi_sys, | |
kimi_maxtk, | |
kimi_temp, | |
kimi_topp, | |
], | |
) | |
demo.queue().launch() | |