Padlex commited on
Commit
4de9f05
1 Parent(s): d4e773b

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -5,9 +5,9 @@
5
  "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
- "bos_token_id": 13,
9
  "embd_pdrop": 0.1,
10
- "eos_token_id": 14,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
@@ -17,7 +17,7 @@
17
  "n_inner": null,
18
  "n_layer": 12,
19
  "n_positions": 1024,
20
- "pad_token_id": 15,
21
  "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
23
  "scale_attn_by_inverse_layer_idx": false,
@@ -34,7 +34,7 @@
34
  }
35
  },
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.40.1",
38
  "use_cache": true,
39
- "vocab_size": 16
40
  }
 
5
  "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
+ "bos_token_id": 103,
9
  "embd_pdrop": 0.1,
10
+ "eos_token_id": 104,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
 
17
  "n_inner": null,
18
  "n_layer": 12,
19
  "n_positions": 1024,
20
+ "pad_token_id": 105,
21
  "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
23
  "scale_attn_by_inverse_layer_idx": false,
 
34
  }
35
  },
36
  "torch_dtype": "float32",
37
+ "transformers_version": "4.41.2",
38
  "use_cache": true,
39
+ "vocab_size": 109
40
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e8192b65d651ae379d926ab2ac5689f2b700f0349201837692f97c7a5ac3166
3
- size 343433856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c611a34d822851c990b4056427b66d5d320373b4208f3001fa3ed40f385719fb
3
+ size 343719552
tokenizer.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 103,
8
+ "content": "[BOS]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 104,
17
+ "content": "[EOS]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 105,
26
+ "content": "[PAD]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ }
33
+ ],
34
+ "normalizer": null,
35
+ "pre_tokenizer": {
36
+ "type": "Whitespace"
37
+ },
38
+ "post_processor": {
39
+ "type": "TemplateProcessing",
40
+ "single": [
41
+ {
42
+ "SpecialToken": {
43
+ "id": "[BOS]",
44
+ "type_id": 0
45
+ }
46
+ },
47
+ {
48
+ "Sequence": {
49
+ "id": "A",
50
+ "type_id": 0
51
+ }
52
+ },
53
+ {
54
+ "SpecialToken": {
55
+ "id": "[EOS]",
56
+ "type_id": 0
57
+ }
58
+ }
59
+ ],
60
+ "pair": [
61
+ {
62
+ "SpecialToken": {
63
+ "id": "[BOS]",
64
+ "type_id": 0
65
+ }
66
+ },
67
+ {
68
+ "Sequence": {
69
+ "id": "A",
70
+ "type_id": 0
71
+ }
72
+ },
73
+ {
74
+ "SpecialToken": {
75
+ "id": "[EOS]",
76
+ "type_id": 0
77
+ }
78
+ },
79
+ {
80
+ "Sequence": {
81
+ "id": "B",
82
+ "type_id": 1
83
+ }
84
+ },
85
+ {
86
+ "SpecialToken": {
87
+ "id": "[EOS]",
88
+ "type_id": 1
89
+ }
90
+ }
91
+ ],
92
+ "special_tokens": {
93
+ "[BOS]": {
94
+ "id": "[BOS]",
95
+ "ids": [
96
+ 103
97
+ ],
98
+ "tokens": [
99
+ "[BOS]"
100
+ ]
101
+ },
102
+ "[EOS]": {
103
+ "id": "[EOS]",
104
+ "ids": [
105
+ 104
106
+ ],
107
+ "tokens": [
108
+ "[EOS]"
109
+ ]
110
+ }
111
+ }
112
+ },
113
+ "decoder": null,
114
+ "model": {
115
+ "type": "WordLevel",
116
+ "vocab": {
117
+ "0": 0,
118
+ "1": 1,
119
+ "2": 2,
120
+ "3": 3,
121
+ "4": 4,
122
+ "5": 5,
123
+ "6": 6,
124
+ "7": 7,
125
+ "8": 8,
126
+ "9": 9,
127
+ "10": 10,
128
+ "11": 11,
129
+ "12": 12,
130
+ "13": 13,
131
+ "14": 14,
132
+ "15": 15,
133
+ "16": 16,
134
+ "17": 17,
135
+ "18": 18,
136
+ "19": 19,
137
+ "20": 20,
138
+ "21": 21,
139
+ "22": 22,
140
+ "23": 23,
141
+ "24": 24,
142
+ "25": 25,
143
+ "26": 26,
144
+ "27": 27,
145
+ "28": 28,
146
+ "29": 29,
147
+ "30": 30,
148
+ "31": 31,
149
+ "32": 32,
150
+ "33": 33,
151
+ "34": 34,
152
+ "35": 35,
153
+ "36": 36,
154
+ "37": 37,
155
+ "38": 38,
156
+ "39": 39,
157
+ "40": 40,
158
+ "41": 41,
159
+ "42": 42,
160
+ "43": 43,
161
+ "44": 44,
162
+ "45": 45,
163
+ "46": 46,
164
+ "47": 47,
165
+ "48": 48,
166
+ "49": 49,
167
+ "50": 50,
168
+ "51": 51,
169
+ "52": 52,
170
+ "53": 53,
171
+ "54": 54,
172
+ "55": 55,
173
+ "56": 56,
174
+ "57": 57,
175
+ "58": 58,
176
+ "59": 59,
177
+ "60": 60,
178
+ "61": 61,
179
+ "62": 62,
180
+ "63": 63,
181
+ "64": 64,
182
+ "65": 65,
183
+ "66": 66,
184
+ "67": 67,
185
+ "68": 68,
186
+ "69": 69,
187
+ "70": 70,
188
+ "71": 71,
189
+ "72": 72,
190
+ "73": 73,
191
+ "74": 74,
192
+ "75": 75,
193
+ "76": 76,
194
+ "77": 77,
195
+ "78": 78,
196
+ "79": 79,
197
+ "80": 80,
198
+ "81": 81,
199
+ "82": 82,
200
+ "83": 83,
201
+ "84": 84,
202
+ "85": 85,
203
+ "86": 86,
204
+ "87": 87,
205
+ "88": 88,
206
+ "89": 89,
207
+ "90": 90,
208
+ "91": 91,
209
+ "92": 92,
210
+ "93": 93,
211
+ "94": 94,
212
+ "95": 95,
213
+ "96": 96,
214
+ "97": 97,
215
+ "98": 98,
216
+ "99": 99,
217
+ "+": 100,
218
+ "*": 101,
219
+ "=": 102,
220
+ "[BOS]": 103,
221
+ "[EOS]": 104,
222
+ "[PAD]": 105
223
+ },
224
+ "unk_token": "[UNK]"
225
+ }
226
+ }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "13": {
4
  "content": "[BOS]",
5
  "lstrip": false,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": true
10
  },
11
- "14": {
12
  "content": "[EOS]",
13
  "lstrip": false,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "15": {
20
  "content": "[PAD]",
21
  "lstrip": false,
22
  "normalized": false,
@@ -28,7 +28,7 @@
28
  "bos_token": "[BOS]",
29
  "clean_up_tokenization_spaces": true,
30
  "eos_token": "[EOS]",
31
- "model_max_length": 64,
32
  "pad_token": "[PAD]",
33
- "tokenizer_class": "CharacterTokenizer"
34
  }
 
1
  {
2
  "added_tokens_decoder": {
3
+ "103": {
4
  "content": "[BOS]",
5
  "lstrip": false,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "104": {
12
  "content": "[EOS]",
13
  "lstrip": false,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "105": {
20
  "content": "[PAD]",
21
  "lstrip": false,
22
  "normalized": false,
 
28
  "bos_token": "[BOS]",
29
  "clean_up_tokenization_spaces": true,
30
  "eos_token": "[EOS]",
31
+ "model_max_length": 1000000000000000019884624838656,
32
  "pad_token": "[PAD]",
33
+ "tokenizer_class": "PreTrainedTokenizerFast"
34
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6d001b64fce175c741ad3c4f6c7c67a4d2e67bbc43d57fcbb8c9eec926b19f8
3
- size 4527
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e1798f23cb48ad2a135c1f84e94622430ecc4956d0bafaf20524754cabd041
3
+ size 4655