Ronakparmar commited on
Commit
500f774
·
verified ·
1 Parent(s): 4969c5b

Upload 4 files

Browse files
Files changed (4) hide show
  1. __init__.py +5 -0
  2. config.json +18 -0
  3. configuration_gpt.py +15 -0
  4. modeling_gpt.py +151 -0
__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .configuration_gpt import CustomGPTConfig
2
+ from .modeling_gpt import CustomGPT
3
+
4
+ CustomGPTConfig.register_for_auto_class()
5
+ CustomGPT.register_for_auto_class("AutoModelForCausalLM")
config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CustomGPT"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_gpt.CustomGPTConfig",
7
+ "AutoModelForCausalLM": "modeling_gpt.CustomGPT"
8
+ },
9
+ "block_size": 768,
10
+ "dropout": 0.1,
11
+ "model_type": "custom_gpt",
12
+ "n_embd": 768,
13
+ "n_head": 8,
14
+ "n_layer": 8,
15
+ "torch_dtype": "float32",
16
+ "transformers_version": "4.48.3",
17
+ "vocab_size": 50257
18
+ }
configuration_gpt.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from transformers import PretrainedConfig
3
+
4
+ @dataclass
5
+ class CustomGPTConfig(PretrainedConfig):
6
+ """
7
+ Configuration class for custom GPT model.
8
+ """
9
+ model_type = "custom_gpt"
10
+ block_size: int = 768
11
+ vocab_size: int = 50257
12
+ n_layer: int = 8
13
+ n_head: int = 8
14
+ n_embd: int = 768
15
+ dropout: float = 0.1
modeling_gpt.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torch.nn as nn
4
+ from torch.nn import functional as F
5
+ from transformers import PreTrainedModel, AutoConfig, AutoModelForCausalLM
6
+ from .configuration_gpt import CustomGPTConfig # Use relative import
7
+ from huggingface_hub import HfApi
8
+ from huggingface_hub import HfApi, create_repo
9
+
10
+ # Define the CausalSelfAttention class
11
+ class CausalSelfAttention(nn.Module):
12
+ def __init__(self, config):
13
+ super().__init__()
14
+ assert config.n_embd % config.n_head == 0
15
+ self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd)
16
+ self.c_proj = nn.Linear(config.n_embd, config.n_embd)
17
+ self.n_head = config.n_head
18
+ self.n_embd = config.n_embd
19
+
20
+ def forward(self, x):
21
+ B, T, C = x.size()
22
+ qkv = self.c_attn(x)
23
+ q, k, v = qkv.split(self.n_embd, dim=2)
24
+ k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
25
+ q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
26
+ v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
27
+ y = F.scaled_dot_product_attention(q, k, v, is_causal=True)
28
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
29
+ y = self.c_proj(y)
30
+ return y
31
+
32
+ # Define the MLP class
33
+ class MLP(nn.Module):
34
+ def __init__(self, config):
35
+ super().__init__()
36
+ self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd)
37
+ self.gelu = nn.GELU(approximate='tanh')
38
+ self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd)
39
+
40
+ def forward(self, x):
41
+ x = self.c_fc(x)
42
+ x = self.gelu(x)
43
+ x = self.c_proj(x)
44
+ return x
45
+
46
+ # Define the Block class
47
+ class Block(nn.Module):
48
+ def __init__(self, config):
49
+ super().__init__()
50
+ self.ln_1 = nn.LayerNorm(config.n_embd)
51
+ self.attn = CausalSelfAttention(config)
52
+ self.ln_2 = nn.LayerNorm(config.n_embd)
53
+ self.mlp = MLP(config)
54
+
55
+ def forward(self, x):
56
+ x = x + self.attn(self.ln_1(x))
57
+ x = x + self.mlp(self.ln_2(x))
58
+ return x
59
+
60
+ # Define the GPT class
61
+ class CustomGPT(PreTrainedModel):
62
+ config_class = CustomGPTConfig
63
+
64
+ def __init__(self, config):
65
+ super().__init__(config)
66
+ self.config = config
67
+ self.transformer = nn.ModuleDict(dict(
68
+ wte=nn.Embedding(config.vocab_size, config.n_embd),
69
+ wpe=nn.Embedding(config.block_size, config.n_embd),
70
+ h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
71
+ ln_f=nn.LayerNorm(config.n_embd),
72
+ ))
73
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
74
+ self.transformer.wte.weight = self.lm_head.weight
75
+ self.apply(self._init_weights)
76
+
77
+ def _init_weights(self, module):
78
+ if isinstance(module, nn.Linear):
79
+ std = 0.02
80
+ torch.nn.init.normal_(module.weight, mean=0.0, std=std)
81
+ if module.bias is not None:
82
+ torch.nn.init.zeros_(module.bias)
83
+ elif isinstance(module, nn.Embedding):
84
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
85
+
86
+ def forward(self, idx, targets=None):
87
+ B, T = idx.size()
88
+ assert T <= self.config.block_size, f"Cannot forward sequence of length {T}, block size is only {self.config.block_size}"
89
+ pos = torch.arange(0, T, dtype=torch.long, device=idx.device)
90
+ pos_emb = self.transformer.wpe(pos)
91
+ tok_emb = self.transformer.wte(idx)
92
+ x = tok_emb + pos_emb
93
+ for block in self.transformer.h:
94
+ x = block(x)
95
+ x = self.transformer.ln_f(x)
96
+ logits = self.lm_head(x)
97
+ loss = None
98
+ if targets is not None:
99
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
100
+ return logits, loss
101
+
102
+ def save_pretrained(self, save_directory, safe_serialization=False):
103
+ # Ensure the save directory exists
104
+ if not os.path.exists(save_directory):
105
+ os.makedirs(save_directory)
106
+ print(f"Created directory: {save_directory}")
107
+ else:
108
+ print(f"Directory already exists: {save_directory}")
109
+
110
+ # Save the model configuration
111
+ config_path = os.path.join(save_directory, "config.json")
112
+ self.config.save_pretrained(save_directory)
113
+ print(f"Saved configuration to: {config_path}")
114
+
115
+ # Save the model weights
116
+ model_path = os.path.join(save_directory, "pytorch_model.bin")
117
+ torch.save(self.state_dict(), model_path)
118
+ print(f"Saved model weights to: {model_path}")
119
+
120
+ # If safe_serialization is False, call the base class method
121
+ if not safe_serialization:
122
+ super().save_pretrained(save_directory, safe_serialization=False)
123
+
124
+
125
+
126
+
127
+
128
+
129
+ def push_to_hub(self, repo_id, commit_message="Push model to hub"):
130
+ try:
131
+ # Save the model locally
132
+ self.save_pretrained(repo_id)
133
+ print(f"Model saved locally to {repo_id}")
134
+
135
+ # Create the repository with the desired privacy settings
136
+ api = HfApi()
137
+ api.create_repo(repo_id=repo_id, private=False, exist_ok=True)
138
+ print(f"Repository created (or already exists) with ID: {repo_id}")
139
+
140
+ # Use HfApi to push the model to the Hugging Face Hub
141
+ api.upload_folder(
142
+ folder_path=repo_id,
143
+ repo_id=repo_id,
144
+ repo_type="model",
145
+ commit_message=commit_message
146
+ )
147
+ print(f"Model uploaded successfully to {repo_id}")
148
+ except Exception as e:
149
+ print(f"Failed to upload model: {e}")
150
+
151
+