Spaces:

sawac
/

llama_chat_test

Runtime error

llama_chat_test / app.py

Update app.py

abbe10d verified 5 months ago

1.41 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import os

	# Hugging Face Hub上のモデルを指定
	repo_id = "ineair/llm-jp-3-3.7b-instruct-EZO-Humanities-gguf"
	filename = "llm-jp-3-3.7b-instruct-EZO-Humanities-f16.gguf"

	# モデルをダウンロード（キャッシュされている場合はキャッシュを使用）
	model_path = hf_hub_download(repo_id=repo_id, filename=filename)

	CONTEXT_SIZE = 4096

	llm = Llama(
	model_path=model_path,
	n_threads=os.cpu_count(),
	n_batch=32,
	verbose=False,
	n_ctx=CONTEXT_SIZE,
	)

	def get_llama_response(prompt):
	return llm(prompt, max_tokens=2048, temperature=0.7, top_p=0.95, repeat_penalty=1.1, stream=True)

	def greet(prompt, intensity):
	full_response = ""
	for output in get_llama_response(prompt):
	if len(output['choices']) > 0:
	text_chunk = output['choices'][0]['text']
	full_response += text_chunk
	yield full_response

	return full_response + "！" * int(intensity)

	demo = gr.Interface(
	title="Llama.cpp-python-sample (Streaming)",
	description=f"MODEL: {filename} from {repo_id}",
	fn=greet,
	inputs=[
	gr.Textbox(label="Enter your prompt"),
	gr.Slider(minimum=0, maximum=10, step=1, label="Intensity")
	],
	outputs=gr.Textbox(label="Generated Response"),
	live=False
	)

	demo.queue()
	demo.launch()