import gradio as gr import torch import json from transformers import GPT2Tokenizer from safetensors.torch import load_file from transformers import GPT2Config as GPTConfig import torch.nn as nn from torch.nn import functional as F from dataclasses import dataclass # Define the GPTConfig class with filtering class GPTConfig: def __init__(self, n_embd, n_head, n_layer, vocab_size): self.n_embd = n_embd self.n_head = n_head self.n_layer = n_layer self.vocab_size = vocab_size @classmethod def from_dict(cls, config_dict): # Define the expected keys expected_keys = {'n_embd', 'n_head', 'n_layer', 'vocab_size'} # Filter out unexpected keys filtered_dict = {key: value for key, value in config_dict.items() if key in expected_keys} return cls(**filtered_dict) # Define the GPT class class GPT(nn.Module): def __init__(self, config): super().__init__() # Initialize the embedding layer self.embedding = nn.Embedding(config.vocab_size, config.n_embd) # Initialize the Transformer decoder decoder_layer = nn.TransformerDecoderLayer(d_model=config.n_embd, nhead=config.n_head, dim_feedforward=config.n_embd, dropout=0.1) self.transformer = nn.TransformerDecoder(decoder_layer, num_layers=config.n_layer) # Initialize the language model head self.lm_head = nn.Linear(config.n_embd, config.vocab_size) def forward(self, input_ids): # Embed the input tokens input_embeddings = self.embedding(input_ids) # Transpose the input to match the expected shape for TransformerDecoder input_embeddings = input_embeddings.transpose(0, 1) # Pass through the Transformer decoder transformer_output = self.transformer(input_embeddings, input_embeddings) # Transpose back to the original shape transformer_output = transformer_output.transpose(0, 1) # Get the logits from the language model head logits = self.lm_head(transformer_output) return logits def generate(self, input_ids, max_new_tokens, temperature, top_k): # Implement the text generation logic output_ids = input_ids for _ in range(max_new_tokens): logits = self.forward(output_ids[:, -1:]) logits = logits / temperature probs = F.softmax(logits, dim=-1) # Ensure probs is 2D if probs.dim() == 3: probs = probs.squeeze(0) # Remove the batch dimension if it exists top_k_probs, top_k_indices = torch.topk(probs, k=top_k) # Ensure top_k_probs is 2D if top_k_probs.dim() == 1: top_k_probs = top_k_probs.unsqueeze(0) next_token = torch.multinomial(top_k_probs, num_samples=1) next_token = top_k_indices.gather(-1, next_token) # Ensure next_token is 2D if next_token.dim() == 1: next_token = next_token.unsqueeze(0) output_ids = torch.cat([output_ids, next_token], dim=1) return output_ids # Initialize global variables model = None tokenizer = None device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def load_model(): """Load the Leap0 model and tokenizer.""" global model, tokenizer try: # Paths to config and model files config_path = "config.json" model_path = "model.safetensors" print(f"Loading configuration from {config_path}...") # Load the configuration with open(config_path, "r") as f: config_dict = json.load(f) print("Configuration loaded. Creating model config...") config = GPTConfig.from_dict(config_dict) print(f"Model config created: {config}") print(f"Loading model weights from {model_path}...") # Load the model weights tensors = load_file(model_path) print("Instantiating model...") # Instantiate the model with the loaded config model = GPT(config) print("Loading weights into model...") model.load_state_dict(tensors, strict=False) model.to(device) model.eval() print("Loading tokenizer...") # Load the tokenizer tokenizer = GPT2Tokenizer.from_pretrained("gpt2") print("Model and tokenizer loaded successfully") except Exception as e: print(f"Error loading model: {str(e)}") raise def generate_text(prompt, max_length=50, temperature=0.7, top_k=40): """Generate text based on the provided prompt.""" if model is None or tokenizer is None: load_model() # Tokenize the input text input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device) # Generate text with torch.no_grad(): output_ids = model.generate( input_ids, max_new_tokens=max_length, temperature=temperature, top_k=top_k ) # Decode the output output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) return output_text # Create the Gradio interface def create_interface(): with gr.Blocks(css="footer {visibility: hidden}") as demo: gr.Markdown("# Leap0 Language Model") gr.Markdown("A GPT-2 based model trained on the Tiny Stories dataset") with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Enter your prompt", placeholder="once upon a time in the village of", lines=3 ) with gr.Row(): max_length = gr.Slider( minimum=1, maximum=200, value=50, step=1, label="Max Length" ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature" ) top_k = gr.Slider( minimum=1, maximum=100, value=40, step=1, label="Top K" ) generate_btn = gr.Button("Generate Text") with gr.Column(): output = gr.Textbox( label="Generated Output", lines=10, placeholder="Your generated text will appear here..." ) generate_btn.click( fn=generate_text, inputs=[prompt, max_length, temperature, top_k], outputs=output ) return demo # Load the model when the script is run load_model() # Create and launch the interface demo = create_interface() if __name__ == "__main__": demo.launch()