Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from peft import PeftModel, PeftConfig | |
import spaces | |
import time | |
model_name = "hosseinhimself/ISANG-v1.0-8B" | |
base_model_name = "unsloth/Meta-Llama-3.1-8B" | |
# Load tokenizer globally | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
def load_model(): | |
try: | |
# Load the base model | |
base_model = AutoModelForCausalLM.from_pretrained( | |
base_model_name, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
trust_remote_code=True, | |
low_cpu_mem_usage=True | |
) | |
# Load the PEFT model | |
model = PeftModel.from_pretrained(base_model, model_name) | |
print(f"Model loaded successfully. Using device: {model.device}") | |
return model | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
raise | |
def generate_text(prompt): | |
model = load_model() | |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) | |
inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
with torch.no_grad(): | |
outputs = model.generate(**inputs, max_new_tokens=200, num_return_sequences=1, temperature=0.7) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
gradio_app = gr.Interface( | |
generate_text, | |
inputs=gr.Textbox(label="Enter your message", lines=3), | |
outputs=gr.Textbox(label="Chatbot Response"), | |
title="ISANG Chatbot", | |
description=f"""This is a simple chatbot powered by the ISANG model. It is fine-tuned from the {base_model_name} model. | |
Enter your message and see how the chatbot responds!""", | |
examples=[ | |
["سلام، چطوری؟"], | |
["برام یه داستان تعریف کن"], | |
["بهترین کتابی که خوندی چی بوده؟"], | |
["توی اوقات فراغتت چی کار میکنی؟"], | |
["نظرت درباره هوش مصنوعی چیه؟"] | |
] | |
) | |
if __name__ == "__main__": | |
gradio_app.launch() | |