Spaces:

ID2223-Lab
/

iris

Sleeping

iris / app.py

desert

change title

eb14dbe 7 months ago

2 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	# Model identifier from Hugging Face
	model_repo = "ID2223-Lab/llama_lora_merged_GGUF" # Hugging Face model ID

	# Download the GGUF file from Hugging Face
	model_path = hf_hub_download(repo_id=model_repo, filename="FineTune_Llama.gguf")

	# Load the GGUF model using llama-cpp-python
	print("Loading model...")
	llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8) # Adjust threads as needed
	print("Model loaded!")
	# Function for inference
	def chat_with_model(user_input, chat_history):
	"""
	Process user input and generate a response from the model.
	:param user_input: User's input string
	:param chat_history: List of [user_message, ai_response] pairs
	:return: Updated chat history
	"""
	# Construct the prompt from chat history
	prompt = ""
	for user, ai in chat_history:
	prompt += f"User: {user}\nAI: {ai}\n"
	prompt += f"User: {user_input}\nAI:" # Add the latest user input

	# Generate response from the model
	raw_response = llm(prompt)["choices"][0]["text"].strip()

	# Clean the response (remove extra tags, if any)
	response = raw_response.split("User:")[0].strip()

	# Update chat history with the new turn
	chat_history.append((user_input, response))
	return chat_history, chat_history


	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 🦙 LLaMA Chatbot finetune with FineTome-100k")
	chatbot = gr.Chatbot(label="Chat with the Model")

	with gr.Row():
	with gr.Column(scale=4):
	user_input = gr.Textbox(label="Your Message", placeholder="Type a message...")
	with gr.Column(scale=1):
	submit_btn = gr.Button("Send")

	chat_history = gr.State([])

	# Link components
	submit_btn.click(
	chat_with_model,
	inputs=[user_input, chat_history],
	outputs=[chatbot, chat_history],
	show_progress=True,
	)

	# Launch the Gradio app
	demo.launch()