Spaces:

burman-ai
/

ChatGPT-Qwen-Coder

Sleeping

File size: 1,602 Bytes

038f313
fab24df
c5a20a4
038f313
db00df1
0ef95ea
c6bdd15
038f313
 
 
 
0ef95ea
038f313
 
27c8b8d
 
c448ca4
038f313
0ef95ea
 
 
19532c8
c5a20a4
19532c8
27c8b8d
19532c8
 
 
 
 
27c8b8d
19532c8
66a8827
19532c8
0ef95ea
19532c8
0ef95ea
 
6e87c9d
b633776
6e87c9d
 
 
 
0ef95ea
 
 
 
 
 
ca486cf
a8fc89d
 
30153c5
 
 
 
901bafe
769901b
77298b9
19532c8
db8b55b

import gradio as gr
from openai import OpenAI
import os

ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message
):
    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    
    messages = [{"role": "system", "content": system_message}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    messages.append({"role": "user", "content": message})
    
    model_to_use = "meta-llama/Llama-3.2-3B-Instruct"
    
    response = ""
    
    for message_chunk in client.chat.completions.create(
        model=model_to_use,
        max_tokens=2048,
        stream=True,
        temperature=0.7,
        top_p=0.95,
        frequency_penalty=0.0,
        seed=None,
        messages=messages,
    ):
        token_text = message_chunk.choices[0].delta.content
        response += token_text
        yield response

chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="ChatGPT is initializing...", likeable=True, layout="panel")

demo = gr.ChatInterface(
    fn=respond,
    fill_height=True,
    chatbot=chatbot,
    theme="Nymbo/Nymbo_Theme",
)

if __name__ == "__main__":
    print("Launching the ChatGPT-Llama...")
    demo.launch()