ISANG-1.0-8B / app.py
hosseinhimself's picture
Update app.py
393b5f6 verified
raw
history blame
2.11 kB
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import spaces
import time
model_name = "hosseinhimself/ISANG-v1.0-8B"
base_model_name = "unsloth/Meta-Llama-3.1-8B"
# Load tokenizer globally
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@spaces.GPU
def load_model():
try:
# Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
low_cpu_mem_usage=True
)
# Load the PEFT model
model = PeftModel.from_pretrained(base_model, model_name)
print(f"Model loaded successfully. Using device: {model.device}")
return model
except Exception as e:
print(f"Error loading model: {e}")
raise
@spaces.GPU
def generate_text(prompt):
model = load_model()
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=200, num_return_sequences=1, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
gradio_app = gr.Interface(
generate_text,
inputs=gr.Textbox(label="Enter your message", lines=3),
outputs=gr.Textbox(label="Chatbot Response"),
title="ISANG Chatbot",
description=f"""This is a simple chatbot powered by the ISANG model. It is fine-tuned from the {base_model_name} model.
Enter your message and see how the chatbot responds!""",
examples=[
["سلام، چطوری؟"],
["برام یه داستان تعریف کن"],
["بهترین کتابی که خوندی چی بوده؟"],
["توی اوقات فراغتت چی کار می‌کنی؟"],
["نظرت درباره هوش مصنوعی چیه؟"]
]
)
if __name__ == "__main__":
gradio_app.launch()