File size: 2,694 Bytes
038f313
445b96b
c5a20a4
038f313
db00df1
445b96b
c6bdd15
445b96b
 
 
 
 
038f313
 
27c8b8d
445b96b
0f1304a
544cdb6
 
 
 
 
a00ad13
038f313
445b96b
 
 
 
0f1304a
 
 
 
445b96b
27c8b8d
19532c8
 
 
 
 
445b96b
19532c8
e5700d7
19532c8
445b96b
7570a52
445b96b
 
 
 
 
 
 
 
 
 
e737e5e
 
 
 
0ef95ea
ca486cf
a8fc89d
445b96b
 
 
f572bef
 
 
 
 
e5700d7
0f1304a
445b96b
 
 
 
 
 
 
 
 
 
 
 
 
245e0b7
5aff883
445b96b
769901b
77298b9
445b96b
db8b55b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
from openai import OpenAI
import os

ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model
):
    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    
    if seed == -1:
        seed = None
    
    messages = [{"role": "system", "content": system_message}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    messages.append({"role": "user", "content": message})
    
    model_to_use = custom_model.strip() if custom_model.strip()!= "" else "Qwen/Qwen2.5-Coder-32B-Instruct"
    
    response = ""
    
    for message_chunk in client.chat.completions.create(
        model=model_to_use,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        seed=seed,
        messages=messages,
    ):
        for choice in message_chunk.choices:
            token_text = choice.delta.content
            response += token_text
            yield response

chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="ChatGPT is initializing...", likeable=True, layout="panel")

system_message_box = gr.Label(value="You can select Max Tokens, Temperature, Top-P, Seed")

max_tokens_slider = gr.Slider(1024, 2048, value=1024, step=100, label="Max new tokens")
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
frequency_penalty_slider = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
seed_slider = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 for random)")

custom_model_box = gr.Textbox(value="Qwen/Qwen2.5-Coder-32B-Instruct", label="AI Mode is ")

demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        system_message_box,
        max_tokens_slider,
        temperature_slider,
        top_p_slider,
        frequency_penalty_slider,
        seed_slider,
        custom_model_box,
    ],
    fill_height=True,
    chatbot=chatbot,
    theme="Nymbo/Nymbo_Theme",
    
)

if __name__ == "__main__":
    print("Launching the ChatGPT-Llama...")
    demo.launch()