agenttemplate

Sleeping

agenttemplate / app.py

Update app.py

f1f3641 verified 3 months ago

1.51 kB

	import llama_cpp
	import llama_cpp.llama_tokenizer

	import gradio as gr

	llama = llama_cpp.Llama.from_pretrained(
	repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
	filename="*q8_0.gguf",
	tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
	"Qwen/Qwen1.5-0.5B"
	),
	verbose=False,
	)

	model = "gpt-3.5-turbo"


	def predict(message, history):
	messages = []

	for user_message, assistant_message in history:
	messages.append({"role": "user", "content": user_message})
	messages.append({"role": "assistant", "content": assistant_message})

	messages.append({"role": "user", "content": message})

	response = llama.create_chat_completion_openai_v1(
	model=model, messages=messages, stream=True
	)

	text = ""
	for chunk in response:
	content = chunk.choices[0].delta.content
	if content:
	text += content
	yield text


	js = """function () {
	gradioURL = window.location.href
	if (!gradioURL.endsWith('?__theme=dark')) {
	window.location.replace(gradioURL + '?__theme=dark');
	}
	}"""

	css = """
	footer {
	visibility: hidden;
	}
	full-height {
	height: 100%;
	}
	"""

	with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
	gr.ChatInterface(
	predict,
	fill_height=True,
	examples=[
	"What is the capital of France?",
	"Who was the first person on the moon?",
	],
	)


	if __name__ == "__main__":
	demo.launch()