LLM_APIs / app.py
admin
upd hint
86660db
raw
history blame
4.54 kB
import os
import gradio as gr
from openai import OpenAI
def predict(
message,
history,
system_prompt,
model,
api_url,
api_key,
max_tk,
temp,
top_p,
):
if not api_key:
return "Please set valid api keys in settings first."
# Format history with a given chat template
msgs = [{"role": "system", "content": system_prompt}]
for user, assistant in history:
msgs.append({"role": "user", "content": user})
msgs.append({"role": "system", "content": assistant})
msgs.append({"role": "user", "content": message})
try:
client = OpenAI(api_key=api_key, base_url=api_url)
response = client.chat.completions.create(
model=model,
messages=msgs,
max_tokens=max_tk,
temperature=temp,
top_p=top_p,
stream=False,
).to_dict()["choices"][0]["message"]["content"]
except Exception as e:
response = f"{e}"
return response
def deepseek(
message,
history,
model,
api_key,
system_prompt,
max_tk,
temp,
top_p,
):
response = predict(
message,
history,
system_prompt,
model,
"https://api.deepseek.com",
api_key,
max_tk,
temp,
top_p,
)
outputs = []
for new_token in response:
outputs.append(new_token)
yield "".join(outputs)
def kimi(
message,
history,
model,
api_key,
system_prompt,
max_tk,
temp,
top_p,
):
response = predict(
message,
history,
system_prompt,
model,
"https://api.moonshot.cn/v1",
api_key,
max_tk,
temp,
top_p,
)
outputs = []
for new_token in response:
outputs.append(new_token)
yield "".join(outputs)
if __name__ == "__main__":
with gr.Blocks() as demo: # Create Gradio interface
gr.Markdown("# LLM API Aggregation Deployment")
with gr.Tab("DeepSeek"):
with gr.Accordion(label="⚙️ Settings", open=False) as ds_acc:
ds_model = gr.Dropdown(
choices=["deepseek-chat", "deepseek-reasoner"],
value="deepseek-chat",
label="Select a model",
)
ds_key = gr.Textbox(
os.getenv("ds_api_key"),
type="password",
label="API key",
)
ds_sys = gr.Textbox(
"You are a useful assistant. first recognize user request and then reply carfuly and thinking",
label="System prompt",
)
ds_maxtk = gr.Slider(0, 32000, 10000, label="Max new tokens")
ds_temp = gr.Slider(0, 1, 0.3, label="Temperature")
ds_topp = gr.Slider(0, 1, 0.95, label="Top P sampling")
gr.ChatInterface(
deepseek,
additional_inputs=[
ds_model,
ds_key,
ds_sys,
ds_maxtk,
ds_temp,
ds_topp,
],
)
with gr.Tab("Kimi"):
with gr.Accordion(label="⚙️ Settings", open=False) as kimi_acc:
kimi_model = gr.Dropdown(
choices=["moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"],
value="moonshot-v1-32k",
label="Select a model",
)
kimi_key = gr.Textbox(
os.getenv("kimi_api_key"),
type="password",
label="API key",
)
kimi_sys = gr.Textbox(
"You are a useful assistant. first recognize user request and then reply carfuly and thinking",
label="System prompt",
)
kimi_maxtk = gr.Slider(0, 32000, 10000, label="Max new tokens")
kimi_temp = gr.Slider(0, 1, 0.3, label="Temperature")
kimi_topp = gr.Slider(0, 1, 0.95, label="Top P sampling")
gr.ChatInterface(
kimi,
additional_inputs=[
kimi_model,
kimi_key,
kimi_sys,
kimi_maxtk,
kimi_temp,
kimi_topp,
],
)
demo.queue().launch()