Spaces:

RCCsabi
/

skatriXR

Sleeping

skatriXR / app.py

Update app.py

dae8804 verified 4 months ago

953 Bytes

	import gradio as gr
	from llama_cpp import Llama
	import os

	HF_TOKEN = os.getenv("HF_TOKEN")
	MODEL_PATH = "unsloth.Q4_K_M.gguf"

	# Download the model if not present (Optional)
	if not os.path.exists(MODEL_PATH):
	from huggingface_hub import hf_hub_download
	MODEL_PATH = hf_hub_download(
	repo_id="RCCsabi/model_q4_k_m",
	filename="unsloth.Q4_K_M.gguf",
	use_auth_token=HF_TOKEN # if the repo is private
	)

	# Initialize the Llama model
	llm = Llama(model_path=MODEL_PATH)

	def generate_response(prompt):
	output = llm(prompt, max_tokens=50, stop=["\n"])
	response = output["choices"][0]["text"]
	return response.strip()

	demo = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
	outputs="text",
	title="unsloth.Q4_K_M.gguf Model Inference",
	description="Enter a prompt to generate a response."
	)

	if __name__ == "__main__":
	demo.launch()