File size: 2,357 Bytes
038f313
fab24df
c5a20a4
038f313
db00df1
0ef95ea
c6bdd15
038f313
 
 
 
0ef95ea
038f313
 
27c8b8d
0f1304a
 
544cdb6
 
 
 
 
a00ad13
038f313
0ef95ea
 
0f1304a
19532c8
0f1304a
 
 
 
19532c8
27c8b8d
19532c8
 
 
 
 
27c8b8d
19532c8
0f1304a
19532c8
0ef95ea
19532c8
0ef95ea
 
a00ad13
 
 
 
 
 
0ef95ea
 
 
 
 
 
ca486cf
a8fc89d
0f1304a
 
1b7a625
 
 
 
 
0f1304a
 
a8fc89d
30153c5
0f1304a
 
a00ad13
 
 
 
 
0f1304a
 
30153c5
 
 
901bafe
769901b
77298b9
19532c8
db8b55b
0f1304a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
from openai import OpenAI
import os

ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model
):
    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    
    if seed == -1:
        seed = None
    
    messages = [{"role": "system", "content": system_message}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    messages.append({"role": "user", "content": message})
    
    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.1-8B-Instruct"
    
    response = ""
    
    for message_chunk in client.chat.completions.create(
        model=model_to_use,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        seed=seed,
        messages=messages,
    ):
        token_text = message_chunk.choices[0].delta.content
        response += token_text
        yield response

chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="ChatGPT is initializing...", likeable=True, layout="panel")

system_message_box = gr.Label(value="You can select Max Tokens, Temperature, Top-P, Seed")

max_tokens_slider = 2048, 
temperature_slider = 0.7, 
top_p_slider = 0.95, 
frequency_penalty_slider = 0.0, 
seed_slider = -1 
custom_model_box = gr.Textbox(value="meta-llama/Llama-3.2-3B-Instruct", label="AI Mode is ")

demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        system_message_box,
        max_tokens_slider,
        temperature_slider,
        top_p_slider,
        frequency_penalty_slider,
        seed_slider,
        custom_model_box,
    ],
    fill_height=True,
    chatbot=chatbot,
    theme="Nymbo/Nymbo_Theme",
)

if __name__ == "__main__":
    print("Launching the ChatGPT-Llama...")
    demo.launch()