Spaces:
Sleeping
Sleeping
import gradio as gr | |
from openai import OpenAI | |
import os | |
ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
print("Access token loaded.") | |
client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1/", | |
api_key=ACCESS_TOKEN, | |
) | |
print("OpenAI client initialized.") | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message | |
): | |
print(f"Received message: {message}") | |
print(f"History: {history}") | |
print(f"System message: {system_message}") | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
model_to_use = "meta-llama/Llama-3.2-3B-Instruct" | |
response = "" | |
for message_chunk in client.chat.completions.create( | |
model=model_to_use, | |
max_tokens=2048, | |
stream=True, | |
temperature=0.7, | |
top_p=0.95, | |
frequency_penalty=0.0, | |
seed=None, | |
messages=messages, | |
): | |
token_text = message_chunk.choices[0].delta.content | |
response += token_text | |
yield response | |
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="ChatGPT is initializing...", likeable=True, layout="panel") | |
demo = gr.ChatInterface( | |
fn=respond, | |
fill_height=True, | |
chatbot=chatbot, | |
theme="Nymbo/Nymbo_Theme", | |
) | |
if __name__ == "__main__": | |
print("Launching the ChatGPT-Llama...") | |
demo.launch() | |