Spaces:

mounseflit
/

Marrakech-Heritage-LLM

Runtime error

Update app.py

ffbc1c3 verified 5 months ago

1.35 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel
	import gradio as gr

	# Set model name and path
	model_name = "ybelkada/falcon-7b-sharded-bf16"
	fine_tuned_model = "mounseflit/falcon-7b-marrakech"

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Load base model (CPU-only, no quantization)
	base_model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto", # Auto device map for CPU
	offload_folder="offload", # Offload large parts of the model to disk to save memory
	offload_state_dict=True # Enable state dict offloading to reduce memory usage
	)

	# Load the fine-tuned LoRA model on top of the base model
	model = PeftModel.from_pretrained(base_model, fine_tuned_model)

	# Ensure the model is in evaluation mode
	model.eval()

	# Function to generate text
	def generate_text(prompt):
	inputs = tokenizer(prompt, return_tensors="pt", max_length=50, truncation=True).to("cpu") # Reduce input length
	with torch.no_grad():
	outputs = model.generate(**inputs, max_length=100) # Reduce output length
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Create Gradio interface
	iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", title="Falcon 7B Lite (CPU)")

	# Launch the app
	iface.launch()