Spaces:
Running
Running
File size: 2,694 Bytes
038f313 445b96b c5a20a4 038f313 db00df1 445b96b c6bdd15 445b96b 038f313 27c8b8d 445b96b 0f1304a 544cdb6 a00ad13 038f313 445b96b 0f1304a 445b96b 27c8b8d 19532c8 445b96b 19532c8 e5700d7 19532c8 445b96b 7570a52 445b96b e737e5e 0ef95ea ca486cf a8fc89d 445b96b f572bef e5700d7 0f1304a 445b96b 245e0b7 5aff883 445b96b 769901b 77298b9 445b96b db8b55b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
from openai import OpenAI
import os
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
custom_model
):
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System message: {system_message}")
if seed == -1:
seed = None
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
model_to_use = custom_model.strip() if custom_model.strip()!= "" else "Qwen/Qwen2.5-Coder-32B-Instruct"
response = ""
for message_chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
for choice in message_chunk.choices:
token_text = choice.delta.content
response += token_text
yield response
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="ChatGPT is initializing...", likeable=True, layout="panel")
system_message_box = gr.Label(value="You can select Max Tokens, Temperature, Top-P, Seed")
max_tokens_slider = gr.Slider(1024, 2048, value=1024, step=100, label="Max new tokens")
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
frequency_penalty_slider = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
seed_slider = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 for random)")
custom_model_box = gr.Textbox(value="Qwen/Qwen2.5-Coder-32B-Instruct", label="AI Mode is ")
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
system_message_box,
max_tokens_slider,
temperature_slider,
top_p_slider,
frequency_penalty_slider,
seed_slider,
custom_model_box,
],
fill_height=True,
chatbot=chatbot,
theme="Nymbo/Nymbo_Theme",
)
if __name__ == "__main__":
print("Launching the ChatGPT-Llama...")
demo.launch()
|