from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Path to your local model directory | |
model_path = "C:/Users/YourName/Documents/fine_tuned_llama_3_2" # Update this path accordingly | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# Load the model with optimized settings | |
model = AutoModelForCausalLM.from_pretrained( | |
model_path, | |
torch_dtype=torch.float16, # Use float16 for efficiency | |
device_map="auto" # Automatically use GPU if available | |
) | |
# Move model to GPU if available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
print("✅ Model loaded successfully!") | |
# Function to generate text | |
def generate_text(prompt, max_length=200): | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
output = model.generate(**inputs, max_length=max_length) | |
return tokenizer.decode(output[0], skip_special_tokens=True) | |
# Example usage | |
prompt = "The future of AI is" | |
print("\nGenerated Output:\n", generate_text(prompt)) | |