Spaces:
Runtime error
Runtime error
import gradio as gr | |
from llama_cpp import Llama | |
# ๋ชจ๋ธ ํ์ผ ๊ฒฝ๋ก (Hugging Face Hub์์ ๋ค์ด๋ก๋) | |
MODEL_REPO_ID = "kimhyunwoo/KOONE" | |
MODEL_FILENAME = "KOONE-3.5-2.4B-Instruct-Q4_K_M.gguf" | |
# Llama ๊ฐ์ฒด ์์ฑ (CPU๋ง ์ฌ์ฉํ๋ฏ๋ก n_gpu_layers๋ 0 ๋๋ ์ค์ ํ์ง ์์) | |
# n_threads๋ฅผ ์์คํ CPU ์ฝ์ด ์์ ๋ง๊ฒ ์กฐ์ (๋๋ ์๋ตํ์ฌ ์๋ ์ค์ ) | |
llm = Llama( | |
model_path="", # model_path๋ ๋น์๋๊ณ from_pretrained ์ฌ์ฉ | |
repo_id=MODEL_REPO_ID, | |
filename=MODEL_FILENAME, | |
n_ctx=2048, # ์ปจํ ์คํธ ๊ธธ์ด. ๋ชจ๋ธ์ ๋ง๊ฒ ์ค์ . | |
n_threads=8, # CPU ์ฐ๋ ๋ ์ (์์คํ ์ ๋ง๊ฒ ์กฐ์ ) | |
verbose=False, # ํ์ํ๋ฉด True๋ก ๋ณ๊ฒฝ | |
) | |
def generate_text(prompt, system_prompt, max_tokens, temperature, top_p): | |
"""๋ชจ๋ธ์ ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ๊ณ ์์ฑ๋ ํ ์คํธ๋ฅผ ๋ฐํํฉ๋๋ค.""" | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": prompt}, | |
] | |
output = llm.create_chat_completion( | |
messages=messages, | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
stream=False, # ์คํธ๋ฆฌ๋ฐ ์ฌ์ฉ ์ ํจ | |
echo=False, # ์ ๋ ฅ ํ๋กฌํํธ๋ ์ถ๋ ฅํ์ง ์์ | |
) | |
generated_text = output["choices"][0]["message"]["content"] | |
return generated_text | |
# Gradio ์ธํฐํ์ด์ค ์ ์ | |
iface = gr.Interface( | |
fn=generate_text, | |
inputs=[ | |
gr.Textbox(lines=5, label="Prompt (์ง๋ฌธ)"), | |
gr.Textbox(lines=2, label="System Prompt (์ ํ ์ฌํญ)", value="๋น์ ์ ๋์์ด ๋๋ ํ๊ตญ์ด ์ด์์คํดํธ์ ๋๋ค."), #๊ธฐ๋ณธ ์์คํ ํ๋กฌํํธ | |
gr.Slider(minimum=16, maximum=512, step=16, label="Max Tokens", value=128), | |
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=0.7), | |
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Top P", value=0.9), | |
], | |
outputs=gr.Textbox(label="Generated Text (๋ต๋ณ)"), | |
title="KOONE Chatbot (CPU Only)", | |
description="์ง๋ฌธ์ ์ ๋ ฅํ๊ณ Submit์ ํด๋ฆญํ์ฌ ๋ต๋ณ์ ์์ฑํ์ธ์.", | |
) | |
iface.launch() |