Spaces:

hosseinhimself
/

ISANG-1.0-8B

Runtime error

App Files Files Community

ISANG-1.0-8B / app.py

hosseinhimself

Update app.py

393b5f6 verified about 2 months ago

raw

history blame

2.11 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel, PeftConfig
	import spaces
	import time

	model_name = "hosseinhimself/ISANG-v1.0-8B"
	base_model_name = "unsloth/Meta-Llama-3.1-8B"

	# Load tokenizer globally
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	@spaces.GPU
	def load_model():
	try:
	# Load the base model
	base_model = AutoModelForCausalLM.from_pretrained(
	base_model_name,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True,
	low_cpu_mem_usage=True
	)
	# Load the PEFT model
	model = PeftModel.from_pretrained(base_model, model_name)
	print(f"Model loaded successfully. Using device: {model.device}")
	return model
	except Exception as e:
	print(f"Error loading model: {e}")
	raise

	@spaces.GPU
	def generate_text(prompt):
	model = load_model()
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
	inputs = {k: v.to(model.device) for k, v in inputs.items()}
	with torch.no_grad():
	outputs = model.generate(**inputs, max_new_tokens=200, num_return_sequences=1, temperature=0.7)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response

	gradio_app = gr.Interface(
	generate_text,
	inputs=gr.Textbox(label="Enter your message", lines=3),
	outputs=gr.Textbox(label="Chatbot Response"),
	title="ISANG Chatbot",
	description=f"""This is a simple chatbot powered by the ISANG model. It is fine-tuned from the {base_model_name} model.
	Enter your message and see how the chatbot responds!""",
	examples=[
	["سلام، چطوری؟"],
	["برام یه داستان تعریف کن"],
	["بهترین کتابی که خوندی چی بوده؟"],
	["توی اوقات فراغتت چی کار می‌کنی؟"],
	["نظرت درباره هوش مصنوعی چیه؟"]
	]
	)

	if __name__ == "__main__":
	gradio_app.launch()