Spaces:

kimhyunwoo
/

KOONE

Runtime error

App Files Files Community

KOONE / app.py

kimhyunwoo

Update app.py

e9d635e verified 2 months ago

raw

history blame contribute delete

2.15 kB

	import gradio as gr
	from llama_cpp import Llama

	# 모델 파일 경로 (Hugging Face Hub에서 다운로드)
	MODEL_REPO_ID = "kimhyunwoo/KOONE"
	MODEL_FILENAME = "KOONE-3.5-2.4B-Instruct-Q4_K_M.gguf"

	# Llama 객체 생성 (CPU만 사용하므로 n_gpu_layers는 0 또는 설정하지 않음)
	# n_threads를 시스템 CPU 코어 수에 맞게 조절 (또는 생략하여 자동 설정)
	llm = Llama(
	model_path="", # model_path는 비워두고 from_pretrained 사용
	repo_id=MODEL_REPO_ID,
	filename=MODEL_FILENAME,
	n_ctx=2048, # 컨텍스트 길이. 모델에 맞게 설정.
	n_threads=8, # CPU 쓰레드 수 (시스템에 맞게 조절)
	verbose=False, # 필요하면 True로 변경
	)


	def generate_text(prompt, system_prompt, max_tokens, temperature, top_p):
	"""모델에 프롬프트를 입력하고 생성된 텍스트를 반환합니다."""

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]

	output = llm.create_chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	stream=False, # 스트리밍 사용 안 함
	echo=False, # 입력 프롬프트는 출력하지 않음
	)

	generated_text = output["choices"][0]["message"]["content"]
	return generated_text


	# Gradio 인터페이스 정의
	iface = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Textbox(lines=5, label="Prompt (질문)"),
	gr.Textbox(lines=2, label="System Prompt (선택 사항)", value="당신은 도움이 되는 한국어 어시스턴트입니다."), #기본 시스템 프롬프트
	gr.Slider(minimum=16, maximum=512, step=16, label="Max Tokens", value=128),
	gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=0.7),
	gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Top P", value=0.9),
	],
	outputs=gr.Textbox(label="Generated Text (답변)"),
	title="KOONE Chatbot (CPU Only)",
	description="질문을 입력하고 Submit을 클릭하여 답변을 생성하세요.",
	)

	iface.launch()