from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Path to your local model directory model_path = "C:/Users/YourName/Documents/fine_tuned_llama_3_2" # Update this path accordingly # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained(model_path) # Load the model with optimized settings model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.float16, # Use float16 for efficiency device_map="auto" # Automatically use GPU if available ) # Move model to GPU if available device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) print("✅ Model loaded successfully!") # Function to generate text def generate_text(prompt, max_length=200): inputs = tokenizer(prompt, return_tensors="pt").to(device) output = model.generate(**inputs, max_length=max_length) return tokenizer.decode(output[0], skip_special_tokens=True) # Example usage prompt = "The future of AI is" print("\nGenerated Output:\n", generate_text(prompt))