Padlex commited on
Commit
73fb26e
1 Parent(s): b1f7c7e

Training in progress, epoch 1

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "[BOS]": 13,
3
+ "[EOS]": 14,
4
+ "[PAD]": 15
5
+ }
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 103,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 104,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 64,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 105,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.41.2",
38
+ "use_cache": true,
39
+ "vocab_size": 109
40
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3001f7713c41ad3e592d94abaa5bb5f4bd4cb0752a0777c8c115691394fc29fb
3
+ size 343719552
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[BOS]",
3
+ "eos_token": "[EOS]",
4
+ "pad_token": "[PAD]"
5
+ }
tokenizer.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 103,
8
+ "content": "[BOS]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 104,
17
+ "content": "[EOS]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 105,
26
+ "content": "[PAD]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ }
33
+ ],
34
+ "normalizer": null,
35
+ "pre_tokenizer": {
36
+ "type": "Whitespace"
37
+ },
38
+ "post_processor": {
39
+ "type": "TemplateProcessing",
40
+ "single": [
41
+ {
42
+ "SpecialToken": {
43
+ "id": "[BOS]",
44
+ "type_id": 0
45
+ }
46
+ },
47
+ {
48
+ "Sequence": {
49
+ "id": "A",
50
+ "type_id": 0
51
+ }
52
+ },
53
+ {
54
+ "SpecialToken": {
55
+ "id": "[EOS]",
56
+ "type_id": 0
57
+ }
58
+ }
59
+ ],
60
+ "pair": [
61
+ {
62
+ "SpecialToken": {
63
+ "id": "[BOS]",
64
+ "type_id": 0
65
+ }
66
+ },
67
+ {
68
+ "Sequence": {
69
+ "id": "A",
70
+ "type_id": 0
71
+ }
72
+ },
73
+ {
74
+ "SpecialToken": {
75
+ "id": "[EOS]",
76
+ "type_id": 0
77
+ }
78
+ },
79
+ {
80
+ "Sequence": {
81
+ "id": "B",
82
+ "type_id": 1
83
+ }
84
+ },
85
+ {
86
+ "SpecialToken": {
87
+ "id": "[EOS]",
88
+ "type_id": 1
89
+ }
90
+ }
91
+ ],
92
+ "special_tokens": {
93
+ "[BOS]": {
94
+ "id": "[BOS]",
95
+ "ids": [
96
+ 103
97
+ ],
98
+ "tokens": [
99
+ "[BOS]"
100
+ ]
101
+ },
102
+ "[EOS]": {
103
+ "id": "[EOS]",
104
+ "ids": [
105
+ 104
106
+ ],
107
+ "tokens": [
108
+ "[EOS]"
109
+ ]
110
+ }
111
+ }
112
+ },
113
+ "decoder": null,
114
+ "model": {
115
+ "type": "WordLevel",
116
+ "vocab": {
117
+ "0": 0,
118
+ "1": 1,
119
+ "2": 2,
120
+ "3": 3,
121
+ "4": 4,
122
+ "5": 5,
123
+ "6": 6,
124
+ "7": 7,
125
+ "8": 8,
126
+ "9": 9,
127
+ "10": 10,
128
+ "11": 11,
129
+ "12": 12,
130
+ "13": 13,
131
+ "14": 14,
132
+ "15": 15,
133
+ "16": 16,
134
+ "17": 17,
135
+ "18": 18,
136
+ "19": 19,
137
+ "20": 20,
138
+ "21": 21,
139
+ "22": 22,
140
+ "23": 23,
141
+ "24": 24,
142
+ "25": 25,
143
+ "26": 26,
144
+ "27": 27,
145
+ "28": 28,
146
+ "29": 29,
147
+ "30": 30,
148
+ "31": 31,
149
+ "32": 32,
150
+ "33": 33,
151
+ "34": 34,
152
+ "35": 35,
153
+ "36": 36,
154
+ "37": 37,
155
+ "38": 38,
156
+ "39": 39,
157
+ "40": 40,
158
+ "41": 41,
159
+ "42": 42,
160
+ "43": 43,
161
+ "44": 44,
162
+ "45": 45,
163
+ "46": 46,
164
+ "47": 47,
165
+ "48": 48,
166
+ "49": 49,
167
+ "50": 50,
168
+ "51": 51,
169
+ "52": 52,
170
+ "53": 53,
171
+ "54": 54,
172
+ "55": 55,
173
+ "56": 56,
174
+ "57": 57,
175
+ "58": 58,
176
+ "59": 59,
177
+ "60": 60,
178
+ "61": 61,
179
+ "62": 62,
180
+ "63": 63,
181
+ "64": 64,
182
+ "65": 65,
183
+ "66": 66,
184
+ "67": 67,
185
+ "68": 68,
186
+ "69": 69,
187
+ "70": 70,
188
+ "71": 71,
189
+ "72": 72,
190
+ "73": 73,
191
+ "74": 74,
192
+ "75": 75,
193
+ "76": 76,
194
+ "77": 77,
195
+ "78": 78,
196
+ "79": 79,
197
+ "80": 80,
198
+ "81": 81,
199
+ "82": 82,
200
+ "83": 83,
201
+ "84": 84,
202
+ "85": 85,
203
+ "86": 86,
204
+ "87": 87,
205
+ "88": 88,
206
+ "89": 89,
207
+ "90": 90,
208
+ "91": 91,
209
+ "92": 92,
210
+ "93": 93,
211
+ "94": 94,
212
+ "95": 95,
213
+ "96": 96,
214
+ "97": 97,
215
+ "98": 98,
216
+ "99": 99,
217
+ "+": 100,
218
+ "*": 101,
219
+ "=": 102,
220
+ "[BOS]": 103,
221
+ "[EOS]": 104,
222
+ "[PAD]": 105
223
+ },
224
+ "unk_token": "[UNK]"
225
+ }
226
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "103": {
4
+ "content": "[BOS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "104": {
12
+ "content": "[EOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "105": {
20
+ "content": "[PAD]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "[BOS]",
29
+ "clean_up_tokenization_spaces": true,
30
+ "eos_token": "[EOS]",
31
+ "model_max_length": 1000000000000000019884624838656,
32
+ "pad_token": "[PAD]",
33
+ "tokenizer_class": "PreTrainedTokenizerFast"
34
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2e40b7cbb762970ece1645561337cf93fc42c6767b6b24f57040bc204a5cd4
3
+ size 4655
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "+": 10, "*": 11, "=": 12}