import torch from torch.nn import functional as F from gpt_class import GPTConfig, GPT # Assuming tiktoken is correctly imported and functions as expected import tiktoken # Setup device device = "cuda" if torch.cuda.is_available() else "cpu" # Load model state_dict = torch.load('model_51999.pt', map_location=device) config = state_dict['config'] model = GPT(config) model.load_state_dict(state_dict['model']) model.to(device) model.eval() # Set seed for reproducibility torch.manual_seed(42) torch.cuda.manual_seed_all(42) # Get tokenizer tokenizer = tiktoken.get_encoding("gpt2") def Generate(model, tokenizer, example, num_return_sequences, max_length): model.eval() tokens = tokenizer.encode(example) tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(0).repeat(num_return_sequences, 1) tokens = tokens.to(device) sample_rng = torch.Generator(device=device) xgen = tokens while xgen.size(1) < max_length: with torch.no_grad(): with torch.autocast(device_type=device): logits, _ = model(xgen) # Assumes model returns logits and optional loss logits = logits[:, -1, :] # Get last token logits probs = F.softmax(logits, dim=-1) topk_probs, topk_indices = torch.topk(probs, 50, dim=-1) ix = torch.multinomial(topk_probs, 1, generator=sample_rng) xcol = torch.gather(topk_indices, -1, ix) xgen = torch.cat((xgen, xcol), dim=1) # Generate output for each sequence for i in range(num_return_sequences): tokens = xgen[i, :max_length].tolist() decoded = tokenizer.decode(tokens) print(f"Sample {i+1}: {decoded}") # Generate text Generate(model, tokenizer, example="It is raining outside and", num_return_sequences=4, max_length=64)