File size: 2,645 Bytes
038f313
fab24df
c5a20a4
038f313
db00df1
0ef95ea
c6bdd15
038f313
 
 
 
0ef95ea
038f313
 
27c8b8d
0f1304a
 
544cdb6
 
 
 
 
a00ad13
038f313
0ef95ea
 
0f1304a
19532c8
0f1304a
 
 
 
19532c8
27c8b8d
19532c8
 
 
 
 
27c8b8d
19532c8
43a16a4
19532c8
0ef95ea
19532c8
0ef95ea
 
a00ad13
 
 
 
 
 
0ef95ea
 
 
 
 
 
ca486cf
a8fc89d
0f1304a
 
f572bef
 
 
 
 
 
43a16a4
0f1304a
a8fc89d
30153c5
0f1304a
 
a00ad13
 
 
 
 
0f1304a
 
30153c5
 
97ee33b
5aff883
901bafe
769901b
77298b9
19532c8
db8b55b
0f1304a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
from openai import OpenAI
import os

ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model
):
    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    
    if seed == -1:
        seed = None
    
    messages = [{"role": "system", "content": system_message}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    messages.append({"role": "user", "content": message})
    
    model_to_use = custom_model.strip() if custom_model.strip() != "" else "Qwen/Qwen2.5-72B-Instruct"
    
    response = ""
    
    for message_chunk in client.chat.completions.create(
        model=model_to_use,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        seed=seed,
        messages=messages,
    ):
        token_text = message_chunk.choices[0].delta.content
        response += token_text
        yield response

chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="ChatGPT is initializing...", likeable=True, layout="panel")

system_message_box = gr.Label(value="You can select Max Tokens, Temperature, Top-P, Seed")

max_tokens_slider = gr.Slider(1024, 2048, value=1024, step=100, label="Max new tokens")
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
frequency_penalty_slider = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
seed_slider = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 for random)")

custom_model_box = gr.Textbox(value="Qwen/Qwen2.5-72B-Instruct", label="AI Mode is ")

demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        system_message_box,
        max_tokens_slider,
        temperature_slider,
        top_p_slider,
        frequency_penalty_slider,
        seed_slider,
        custom_model_box,
    ],
    fill_height=True,
    chatbot=chatbot,
    theme="Nymbo/Nymbo_Theme",
    
)

if __name__ == "__main__":
    print("Launching the ChatGPT-Llama...")
    demo.launch()