Spaces:

Alkhalaf
/

lab2

Runtime error

File size: 1,534 Bytes

6a8f69a
 
6118439
6a8f69a
 
 
ddd165e
ed282fb
2ad030f
ddd165e
a90fba5
d43a866
6a8f69a
 
 
 
 
fc79e83
6a8f69a
 
fc79e83
6a8f69a
 
 
 
d43a866
6a8f69a
 
 
 
 
 
fc79e83
6a8f69a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6118439
 
 
6a8f69a

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import gradio as gr
from huggingface_hub import login
import torch
import os

hf_token = os.getenv("llama")
print(hf_token)

assert hf_token is not None, "Token is missing! Make sure 'llama' is set in the environment."
#login(hf_token)
# Model and adapter paths
model_name = "unsloth/llama-3.2-1b-instruct-bnb-4bit"  # Base model
adapter_name = "Alkhalaf/lora_model"  # LoRA model adapter

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)

# Load the LoRA adapter configuration
peft_config = PeftConfig.from_pretrained(adapter_name, token=hf_token)

# Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    token=hf_token,
   
    #torch_dtype=torch.float16

    
)
# Apply the LoRA adapter to the base model
model = PeftModel.from_pretrained(base_model, adapter_name, token=hf_token)

# Define prediction function
def predict(input_text):
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(inputs["input_ids"], max_length=150)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Gradio Interface
interface = gr.Interface(
    fn=predict,
    inputs="text",
    outputs="text",
    title="Conversational AI with LoRA",
    description="Interact with a fine-tuned LoRA model for conversational AI."
)

if __name__ == "__main__":
    interface.launch(share=True)