import torch from transformers import GPT2Tokenizer, GPT2LMHeadModel import time class GPT2Assistant: def __init__(self, model_dir): self.model = GPT2LMHeadModel.from_pretrained(model_dir) self.tokenizer = GPT2Tokenizer.from_pretrained(model_dir) def generate_answer(self, prompt, max_length=1024): input_ids = self.tokenizer.encode(prompt, return_tensors="pt") if self.tokenizer.pad_token_id is None: self.tokenizer.pad_token = self.tokenizer.eos_token attention_mask = (input_ids != self.tokenizer.pad_token_id).long() output = self.model.generate( input_ids, attention_mask=attention_mask, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, do_sample=True, top_k=50, top_p=0.95, temperature=0.70 ) answer = self.tokenizer.decode(output[0], skip_special_tokens=True) return answer[len(prompt):] def query(self, prompt): generated_answer = self.generate_answer(prompt) return generated_answer def main(): start_time = time.time() model_output_dir = "/Users/migueldeguzman/Desktop/gpt2xl_algos/RLLMv3-Assistant-AI/layer10/" assistant = GPT2Assistant(model_output_dir) num_iterations = 200 prompt = input("Enter your question to ask the model 200 times: ") for i in range(num_iterations): print(f"Answering question {i + 1}/{num_iterations}...") response = assistant.query(prompt) print(f"Response {i + 1}: {response}\n") end_time = time.time() elapsed_time = (end_time - start_time) / 60 # Convert to minutes print(f"Time-stamp: {elapsed_time:.2f} minutes") end_time = time.time() elapsed_time = (end_time - start_time) / 60 # Convert to minutes print(f"Time taken to complete the task: {elapsed_time:.2f} minutes") if __name__ == "__main__": main()