Spaces:
Sleeping
Sleeping
File size: 1,602 Bytes
038f313 fab24df c5a20a4 038f313 db00df1 0ef95ea c6bdd15 038f313 0ef95ea 038f313 27c8b8d c448ca4 038f313 0ef95ea 19532c8 c5a20a4 19532c8 27c8b8d 19532c8 27c8b8d 19532c8 66a8827 19532c8 0ef95ea 19532c8 0ef95ea 6e87c9d b633776 6e87c9d 0ef95ea ca486cf a8fc89d 30153c5 901bafe 769901b 77298b9 19532c8 db8b55b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
from openai import OpenAI
import os
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message
):
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System message: {system_message}")
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
model_to_use = "meta-llama/Llama-3.2-3B-Instruct"
response = ""
for message_chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=2048,
stream=True,
temperature=0.7,
top_p=0.95,
frequency_penalty=0.0,
seed=None,
messages=messages,
):
token_text = message_chunk.choices[0].delta.content
response += token_text
yield response
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="ChatGPT is initializing...", likeable=True, layout="panel")
demo = gr.ChatInterface(
fn=respond,
fill_height=True,
chatbot=chatbot,
theme="Nymbo/Nymbo_Theme",
)
if __name__ == "__main__":
print("Launching the ChatGPT-Llama...")
demo.launch()
|