choprahetarth commited on
Commit
b74ca27
·
verified ·
1 Parent(s): 1597380

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - TechxGenus/starcoder2-3b-instruct
4
+ tags:
5
+ - merge
6
+ - mergekit
7
+ - lazymergekit
8
+ - TechxGenus/starcoder2-3b-instruct
9
+ ---
10
+
11
+ # they_not_like_us
12
+
13
+ they_not_like_us is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
14
+ * [TechxGenus/starcoder2-3b-instruct](https://huggingface.co/TechxGenus/starcoder2-3b-instruct)
15
+
16
+ ## 🧩 Configuration
17
+
18
+ ```yaml
19
+ models:
20
+ - model: bigcode/starcoder2-3b
21
+ - model: TechxGenus/starcoder2-3b-instruct
22
+ parameters:
23
+ density:
24
+ - filter: self_attn.q_proj.0
25
+ value: 0.01384
26
+ - filter: self_attn.q_proj.1
27
+ value: 0.00657
28
+ - filter: self_attn.q_proj.2
29
+ value: 0.02731
30
+ - filter: self_attn.q_proj.3
31
+ value: 0.03261
32
+ - filter: self_attn.q_proj.4
33
+ value: 0.05056
34
+ - filter: self_attn.q_proj.5
35
+ value: 0.05539
36
+ - filter: self_attn.q_proj.6
37
+ value: 0.05796
38
+ - filter: self_attn.q_proj.7
39
+ value: 0.06573
40
+ - filter: self_attn.q_proj.8
41
+ value: 0.09558
42
+ - filter: self_attn.q_proj.9
43
+ value: 0.11226
44
+ - filter: self_attn.q_proj.10
45
+ value: 0.1053
46
+ - filter: self_attn.q_proj.11
47
+ value: 0.07023
48
+ - filter: self_attn.q_proj.12
49
+ value: 0.04345
50
+ - filter: self_attn.q_proj.13
51
+ value: 0.02855
52
+ - filter: self_attn.q_proj.14
53
+ value: 0.04248
54
+ - filter: self_attn.q_proj.15
55
+ value: 0.00938
56
+ - filter: self_attn.q_proj.16
57
+ value: 0.04849
58
+ - filter: self_attn.q_proj.17
59
+ value: 0.0569
60
+ - filter: self_attn.q_proj.18
61
+ value: 0.02524
62
+ - filter: self_attn.q_proj.19
63
+ value: 0.03765
64
+ - filter: self_attn.q_proj.20
65
+ value: 0.00301
66
+ - filter: self_attn.q_proj.21
67
+ value: 0.01149
68
+ - filter: self_attn.k_proj.0
69
+ value: 0.01448
70
+ - filter: self_attn.k_proj.1
71
+ value: 0.01995
72
+ - filter: self_attn.k_proj.2
73
+ value: 0.02198
74
+ - filter: self_attn.k_proj.3
75
+ value: 0.02643
76
+ - filter: self_attn.k_proj.4
77
+ value: 0.04244
78
+ - filter: self_attn.k_proj.5
79
+ value: 0.03983
80
+ - filter: self_attn.k_proj.6
81
+ value: 0.03963
82
+ - filter: self_attn.k_proj.7
83
+ value: 0.04097
84
+ - filter: self_attn.k_proj.8
85
+ value: 0.10344
86
+ - filter: self_attn.k_proj.9
87
+ value: 0.12206
88
+ - filter: self_attn.k_proj.10
89
+ value: 0.10247
90
+ - filter: self_attn.k_proj.11
91
+ value: 0.08638
92
+ - filter: self_attn.k_proj.12
93
+ value: 0.06374
94
+ - filter: self_attn.k_proj.13
95
+ value: 0.05815
96
+ - filter: self_attn.k_proj.14
97
+ value: 0.0267
98
+ - filter: self_attn.k_proj.15
99
+ value: 0.0105
100
+ - filter: self_attn.k_proj.16
101
+ value: 0.03183
102
+ - filter: self_attn.k_proj.17
103
+ value: 0.0173
104
+ - filter: self_attn.k_proj.18
105
+ value: 0.06724
106
+ - filter: self_attn.k_proj.19
107
+ value: 0.01356
108
+ - filter: self_attn.k_proj.20
109
+ value: 0.03054
110
+ - filter: self_attn.k_proj.21
111
+ value: 0.02039
112
+ - filter: self_attn.v_proj.0
113
+ value: 0.00132
114
+ - filter: self_attn.v_proj.1
115
+ value: 0.00024
116
+ - filter: self_attn.v_proj.2
117
+ value: 0.00149
118
+ - filter: self_attn.v_proj.3
119
+ value: 0.00292
120
+ - filter: self_attn.v_proj.4
121
+ value: 0.00208
122
+ - filter: self_attn.v_proj.5
123
+ value: 0.00715
124
+ - filter: self_attn.v_proj.6
125
+ value: 0.00725
126
+ - filter: self_attn.v_proj.7
127
+ value: 0.00891
128
+ - filter: self_attn.v_proj.8
129
+ value: 0.0271
130
+ - filter: self_attn.v_proj.9
131
+ value: 0.03499
132
+ - filter: self_attn.v_proj.10
133
+ value: 0.04242
134
+ - filter: self_attn.v_proj.11
135
+ value: 0.05088
136
+ - filter: self_attn.v_proj.12
137
+ value: 0.0481
138
+ - filter: self_attn.v_proj.13
139
+ value: 0.04341
140
+ - filter: self_attn.v_proj.14
141
+ value: 0.05882
142
+ - filter: self_attn.v_proj.15
143
+ value: 0.0702
144
+ - filter: self_attn.v_proj.16
145
+ value: 0.07306
146
+ - filter: self_attn.v_proj.17
147
+ value: 0.08186
148
+ - filter: self_attn.v_proj.18
149
+ value: 0.08394
150
+ - filter: self_attn.v_proj.19
151
+ value: 0.07471
152
+ - filter: self_attn.v_proj.20
153
+ value: 0.16654
154
+ - filter: self_attn.v_proj.21
155
+ value: 0.11261
156
+ - filter: self_attn.o_proj.0
157
+ value: 0.00157
158
+ - filter: self_attn.o_proj.1
159
+ value: 0.00103
160
+ - filter: self_attn.o_proj.2
161
+ value: 0.0019
162
+ - filter: self_attn.o_proj.3
163
+ value: 0.00336
164
+ - filter: self_attn.o_proj.4
165
+ value: 0.00236
166
+ - filter: self_attn.o_proj.5
167
+ value: 0.0061
168
+ - filter: self_attn.o_proj.6
169
+ value: 0.00104
170
+ - filter: self_attn.o_proj.7
171
+ value: 0.0095
172
+ - filter: self_attn.o_proj.8
173
+ value: 0.00767
174
+ - filter: self_attn.o_proj.9
175
+ value: 0.01618
176
+ - filter: self_attn.o_proj.10
177
+ value: 0.01477
178
+ - filter: self_attn.o_proj.11
179
+ value: 0.00161
180
+ - filter: self_attn.o_proj.12
181
+ value: 0.00596
182
+ - filter: self_attn.o_proj.13
183
+ value: 0.01282
184
+ - filter: self_attn.o_proj.14
185
+ value: 0.05706
186
+ - filter: self_attn.o_proj.15
187
+ value: 0.04798
188
+ - filter: self_attn.o_proj.16
189
+ value: 0.02363
190
+ - filter: self_attn.o_proj.17
191
+ value: 0.04288
192
+ - filter: self_attn.o_proj.18
193
+ value: 0.12266
194
+ - filter: self_attn.o_proj.19
195
+ value: 0.03632
196
+ - filter: self_attn.o_proj.20
197
+ value: 0.32836
198
+ - filter: self_attn.o_proj.21
199
+ value: 0.25524
200
+ - filter: mlp.gate_proj.0
201
+ value: 0.0008
202
+ - filter: mlp.gate_proj.1
203
+ value: 0.00325
204
+ - filter: mlp.gate_proj.2
205
+ value: 0.0023
206
+ - filter: mlp.gate_proj.3
207
+ value: 0.00033
208
+ - filter: mlp.gate_proj.4
209
+ value: 0.00163
210
+ - filter: mlp.gate_proj.5
211
+ value: 0.00365
212
+ - filter: mlp.gate_proj.6
213
+ value: 0.0042
214
+ - filter: mlp.gate_proj.7
215
+ value: 0.01307
216
+ - filter: mlp.gate_proj.8
217
+ value: 0.01445
218
+ - filter: mlp.gate_proj.9
219
+ value: 0.02852
220
+ - filter: mlp.gate_proj.10
221
+ value: 0.03948
222
+ - filter: mlp.gate_proj.11
223
+ value: 0.04786
224
+ - filter: mlp.gate_proj.12
225
+ value: 0.0536
226
+ - filter: mlp.gate_proj.13
227
+ value: 0.05585
228
+ - filter: mlp.gate_proj.14
229
+ value: 0.0648
230
+ - filter: mlp.gate_proj.15
231
+ value: 0.06929
232
+ - filter: mlp.gate_proj.16
233
+ value: 0.0744
234
+ - filter: mlp.gate_proj.17
235
+ value: 0.08906
236
+ - filter: mlp.gate_proj.18
237
+ value: 0.104
238
+ - filter: mlp.gate_proj.19
239
+ value: 0.09914
240
+ - filter: mlp.gate_proj.20
241
+ value: 0.10889
242
+ - filter: mlp.gate_proj.21
243
+ value: 0.12143
244
+ - filter: mlp.up_proj.0
245
+ value: 0.00118
246
+ - filter: mlp.up_proj.1
247
+ value: 0.00377
248
+ - filter: mlp.up_proj.2
249
+ value: 0.00249
250
+ - filter: mlp.up_proj.3
251
+ value: 0.00103
252
+ - filter: mlp.up_proj.4
253
+ value: 0.00085
254
+ - filter: mlp.up_proj.5
255
+ value: 0.00298
256
+ - filter: mlp.up_proj.6
257
+ value: 0.00318
258
+ - filter: mlp.up_proj.7
259
+ value: 0.01108
260
+ - filter: mlp.up_proj.8
261
+ value: 0.0145
262
+ - filter: mlp.up_proj.9
263
+ value: 0.02919
264
+ - filter: mlp.up_proj.10
265
+ value: 0.03808
266
+ - filter: mlp.up_proj.11
267
+ value: 0.04536
268
+ - filter: mlp.up_proj.12
269
+ value: 0.05076
270
+ - filter: mlp.up_proj.13
271
+ value: 0.05593
272
+ - filter: mlp.up_proj.14
273
+ value: 0.06894
274
+ - filter: mlp.up_proj.15
275
+ value: 0.07535
276
+ - filter: mlp.up_proj.16
277
+ value: 0.07777
278
+ - filter: mlp.up_proj.17
279
+ value: 0.08961
280
+ - filter: mlp.up_proj.18
281
+ value: 0.1045
282
+ - filter: mlp.up_proj.19
283
+ value: 0.10045
284
+ - filter: mlp.up_proj.20
285
+ value: 0.11479
286
+ - filter: mlp.up_proj.21
287
+ value: 0.10822
288
+ - filter: mlp.down_proj.0
289
+ value: 0.07073
290
+ - filter: mlp.down_proj.1
291
+ value: 0.04763
292
+ - filter: mlp.down_proj.2
293
+ value: 0.01994
294
+ - filter: mlp.down_proj.3
295
+ value: 0.00011
296
+ - filter: mlp.down_proj.4
297
+ value: 0.00291
298
+ - filter: mlp.down_proj.5
299
+ value: 0.00115
300
+ - filter: mlp.down_proj.6
301
+ value: 0.00396
302
+ - filter: mlp.down_proj.7
303
+ value: 0.02167
304
+ - filter: mlp.down_proj.8
305
+ value: 0.00561
306
+ - filter: mlp.down_proj.9
307
+ value: 0.00899
308
+ - filter: mlp.down_proj.10
309
+ value: 0.01681
310
+ - filter: mlp.down_proj.11
311
+ value: 0.02464
312
+ - filter: mlp.down_proj.12
313
+ value: 0.02988
314
+ - filter: mlp.down_proj.13
315
+ value: 0.03701
316
+ - filter: mlp.down_proj.14
317
+ value: 0.04521
318
+ - filter: mlp.down_proj.15
319
+ value: 0.07515
320
+ - filter: mlp.down_proj.16
321
+ value: 0.05471
322
+ - filter: mlp.down_proj.17
323
+ value: 0.05965
324
+ - filter: mlp.down_proj.18
325
+ value: 0.06833
326
+ - filter: mlp.down_proj.19
327
+ value: 0.07838
328
+ - filter: mlp.down_proj.20
329
+ value: 0.11912
330
+ - filter: mlp.down_proj.21
331
+ value: 0.20841
332
+ - value: 1
333
+ weight:
334
+ - value: 1
335
+ merge_method: ties
336
+ base_model: bigcode/starcoder2-3b
337
+ parameters:
338
+ normalize: true
339
+ int8_mask: true
340
+ dtype: bfloat16
341
+ tokenizer_source: union
342
+ ```
343
+
344
+ ## 💻 Usage
345
+
346
+ ```python
347
+ !pip install -qU transformers accelerate
348
+
349
+ from transformers import AutoTokenizer
350
+ import transformers
351
+ import torch
352
+
353
+ model = "choprahetarth/they_not_like_us"
354
+ messages = [{"role": "user", "content": "What is a large language model?"}]
355
+
356
+ tokenizer = AutoTokenizer.from_pretrained(model)
357
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
358
+ pipeline = transformers.pipeline(
359
+ "text-generation",
360
+ model=model,
361
+ torch_dtype=torch.float16,
362
+ device_map="auto",
363
+ )
364
+
365
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
366
+ print(outputs[0]["generated_text"])
367
+ ```
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bigcode/starcoder2-3b",
3
+ "architectures": [
4
+ "Starcoder2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": 0,
8
+ "embedding_dropout": 0.1,
9
+ "eos_token_id": 0,
10
+ "hidden_act": "gelu_pytorch_tanh",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.018042,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 16384,
15
+ "mlp_type": "default",
16
+ "model_type": "starcoder2",
17
+ "norm_epsilon": 1e-05,
18
+ "norm_type": "layer_norm",
19
+ "num_attention_heads": 24,
20
+ "num_hidden_layers": 30,
21
+ "num_key_value_heads": 2,
22
+ "residual_dropout": 0.1,
23
+ "rope_theta": 999999.4420358813,
24
+ "sliding_window": 4096,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.44.2",
27
+ "use_bias": true,
28
+ "use_cache": true,
29
+ "vocab_size": 49152
30
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ models:
3
+ - model: bigcode/starcoder2-3b
4
+ - model: TechxGenus/starcoder2-3b-instruct
5
+ parameters:
6
+ density:
7
+ - filter: self_attn.q_proj.0
8
+ value: 0.01384
9
+ - filter: self_attn.q_proj.1
10
+ value: 0.00657
11
+ - filter: self_attn.q_proj.2
12
+ value: 0.02731
13
+ - filter: self_attn.q_proj.3
14
+ value: 0.03261
15
+ - filter: self_attn.q_proj.4
16
+ value: 0.05056
17
+ - filter: self_attn.q_proj.5
18
+ value: 0.05539
19
+ - filter: self_attn.q_proj.6
20
+ value: 0.05796
21
+ - filter: self_attn.q_proj.7
22
+ value: 0.06573
23
+ - filter: self_attn.q_proj.8
24
+ value: 0.09558
25
+ - filter: self_attn.q_proj.9
26
+ value: 0.11226
27
+ - filter: self_attn.q_proj.10
28
+ value: 0.1053
29
+ - filter: self_attn.q_proj.11
30
+ value: 0.07023
31
+ - filter: self_attn.q_proj.12
32
+ value: 0.04345
33
+ - filter: self_attn.q_proj.13
34
+ value: 0.02855
35
+ - filter: self_attn.q_proj.14
36
+ value: 0.04248
37
+ - filter: self_attn.q_proj.15
38
+ value: 0.00938
39
+ - filter: self_attn.q_proj.16
40
+ value: 0.04849
41
+ - filter: self_attn.q_proj.17
42
+ value: 0.0569
43
+ - filter: self_attn.q_proj.18
44
+ value: 0.02524
45
+ - filter: self_attn.q_proj.19
46
+ value: 0.03765
47
+ - filter: self_attn.q_proj.20
48
+ value: 0.00301
49
+ - filter: self_attn.q_proj.21
50
+ value: 0.01149
51
+ - filter: self_attn.k_proj.0
52
+ value: 0.01448
53
+ - filter: self_attn.k_proj.1
54
+ value: 0.01995
55
+ - filter: self_attn.k_proj.2
56
+ value: 0.02198
57
+ - filter: self_attn.k_proj.3
58
+ value: 0.02643
59
+ - filter: self_attn.k_proj.4
60
+ value: 0.04244
61
+ - filter: self_attn.k_proj.5
62
+ value: 0.03983
63
+ - filter: self_attn.k_proj.6
64
+ value: 0.03963
65
+ - filter: self_attn.k_proj.7
66
+ value: 0.04097
67
+ - filter: self_attn.k_proj.8
68
+ value: 0.10344
69
+ - filter: self_attn.k_proj.9
70
+ value: 0.12206
71
+ - filter: self_attn.k_proj.10
72
+ value: 0.10247
73
+ - filter: self_attn.k_proj.11
74
+ value: 0.08638
75
+ - filter: self_attn.k_proj.12
76
+ value: 0.06374
77
+ - filter: self_attn.k_proj.13
78
+ value: 0.05815
79
+ - filter: self_attn.k_proj.14
80
+ value: 0.0267
81
+ - filter: self_attn.k_proj.15
82
+ value: 0.0105
83
+ - filter: self_attn.k_proj.16
84
+ value: 0.03183
85
+ - filter: self_attn.k_proj.17
86
+ value: 0.0173
87
+ - filter: self_attn.k_proj.18
88
+ value: 0.06724
89
+ - filter: self_attn.k_proj.19
90
+ value: 0.01356
91
+ - filter: self_attn.k_proj.20
92
+ value: 0.03054
93
+ - filter: self_attn.k_proj.21
94
+ value: 0.02039
95
+ - filter: self_attn.v_proj.0
96
+ value: 0.00132
97
+ - filter: self_attn.v_proj.1
98
+ value: 0.00024
99
+ - filter: self_attn.v_proj.2
100
+ value: 0.00149
101
+ - filter: self_attn.v_proj.3
102
+ value: 0.00292
103
+ - filter: self_attn.v_proj.4
104
+ value: 0.00208
105
+ - filter: self_attn.v_proj.5
106
+ value: 0.00715
107
+ - filter: self_attn.v_proj.6
108
+ value: 0.00725
109
+ - filter: self_attn.v_proj.7
110
+ value: 0.00891
111
+ - filter: self_attn.v_proj.8
112
+ value: 0.0271
113
+ - filter: self_attn.v_proj.9
114
+ value: 0.03499
115
+ - filter: self_attn.v_proj.10
116
+ value: 0.04242
117
+ - filter: self_attn.v_proj.11
118
+ value: 0.05088
119
+ - filter: self_attn.v_proj.12
120
+ value: 0.0481
121
+ - filter: self_attn.v_proj.13
122
+ value: 0.04341
123
+ - filter: self_attn.v_proj.14
124
+ value: 0.05882
125
+ - filter: self_attn.v_proj.15
126
+ value: 0.0702
127
+ - filter: self_attn.v_proj.16
128
+ value: 0.07306
129
+ - filter: self_attn.v_proj.17
130
+ value: 0.08186
131
+ - filter: self_attn.v_proj.18
132
+ value: 0.08394
133
+ - filter: self_attn.v_proj.19
134
+ value: 0.07471
135
+ - filter: self_attn.v_proj.20
136
+ value: 0.16654
137
+ - filter: self_attn.v_proj.21
138
+ value: 0.11261
139
+ - filter: self_attn.o_proj.0
140
+ value: 0.00157
141
+ - filter: self_attn.o_proj.1
142
+ value: 0.00103
143
+ - filter: self_attn.o_proj.2
144
+ value: 0.0019
145
+ - filter: self_attn.o_proj.3
146
+ value: 0.00336
147
+ - filter: self_attn.o_proj.4
148
+ value: 0.00236
149
+ - filter: self_attn.o_proj.5
150
+ value: 0.0061
151
+ - filter: self_attn.o_proj.6
152
+ value: 0.00104
153
+ - filter: self_attn.o_proj.7
154
+ value: 0.0095
155
+ - filter: self_attn.o_proj.8
156
+ value: 0.00767
157
+ - filter: self_attn.o_proj.9
158
+ value: 0.01618
159
+ - filter: self_attn.o_proj.10
160
+ value: 0.01477
161
+ - filter: self_attn.o_proj.11
162
+ value: 0.00161
163
+ - filter: self_attn.o_proj.12
164
+ value: 0.00596
165
+ - filter: self_attn.o_proj.13
166
+ value: 0.01282
167
+ - filter: self_attn.o_proj.14
168
+ value: 0.05706
169
+ - filter: self_attn.o_proj.15
170
+ value: 0.04798
171
+ - filter: self_attn.o_proj.16
172
+ value: 0.02363
173
+ - filter: self_attn.o_proj.17
174
+ value: 0.04288
175
+ - filter: self_attn.o_proj.18
176
+ value: 0.12266
177
+ - filter: self_attn.o_proj.19
178
+ value: 0.03632
179
+ - filter: self_attn.o_proj.20
180
+ value: 0.32836
181
+ - filter: self_attn.o_proj.21
182
+ value: 0.25524
183
+ - filter: mlp.gate_proj.0
184
+ value: 0.0008
185
+ - filter: mlp.gate_proj.1
186
+ value: 0.00325
187
+ - filter: mlp.gate_proj.2
188
+ value: 0.0023
189
+ - filter: mlp.gate_proj.3
190
+ value: 0.00033
191
+ - filter: mlp.gate_proj.4
192
+ value: 0.00163
193
+ - filter: mlp.gate_proj.5
194
+ value: 0.00365
195
+ - filter: mlp.gate_proj.6
196
+ value: 0.0042
197
+ - filter: mlp.gate_proj.7
198
+ value: 0.01307
199
+ - filter: mlp.gate_proj.8
200
+ value: 0.01445
201
+ - filter: mlp.gate_proj.9
202
+ value: 0.02852
203
+ - filter: mlp.gate_proj.10
204
+ value: 0.03948
205
+ - filter: mlp.gate_proj.11
206
+ value: 0.04786
207
+ - filter: mlp.gate_proj.12
208
+ value: 0.0536
209
+ - filter: mlp.gate_proj.13
210
+ value: 0.05585
211
+ - filter: mlp.gate_proj.14
212
+ value: 0.0648
213
+ - filter: mlp.gate_proj.15
214
+ value: 0.06929
215
+ - filter: mlp.gate_proj.16
216
+ value: 0.0744
217
+ - filter: mlp.gate_proj.17
218
+ value: 0.08906
219
+ - filter: mlp.gate_proj.18
220
+ value: 0.104
221
+ - filter: mlp.gate_proj.19
222
+ value: 0.09914
223
+ - filter: mlp.gate_proj.20
224
+ value: 0.10889
225
+ - filter: mlp.gate_proj.21
226
+ value: 0.12143
227
+ - filter: mlp.up_proj.0
228
+ value: 0.00118
229
+ - filter: mlp.up_proj.1
230
+ value: 0.00377
231
+ - filter: mlp.up_proj.2
232
+ value: 0.00249
233
+ - filter: mlp.up_proj.3
234
+ value: 0.00103
235
+ - filter: mlp.up_proj.4
236
+ value: 0.00085
237
+ - filter: mlp.up_proj.5
238
+ value: 0.00298
239
+ - filter: mlp.up_proj.6
240
+ value: 0.00318
241
+ - filter: mlp.up_proj.7
242
+ value: 0.01108
243
+ - filter: mlp.up_proj.8
244
+ value: 0.0145
245
+ - filter: mlp.up_proj.9
246
+ value: 0.02919
247
+ - filter: mlp.up_proj.10
248
+ value: 0.03808
249
+ - filter: mlp.up_proj.11
250
+ value: 0.04536
251
+ - filter: mlp.up_proj.12
252
+ value: 0.05076
253
+ - filter: mlp.up_proj.13
254
+ value: 0.05593
255
+ - filter: mlp.up_proj.14
256
+ value: 0.06894
257
+ - filter: mlp.up_proj.15
258
+ value: 0.07535
259
+ - filter: mlp.up_proj.16
260
+ value: 0.07777
261
+ - filter: mlp.up_proj.17
262
+ value: 0.08961
263
+ - filter: mlp.up_proj.18
264
+ value: 0.1045
265
+ - filter: mlp.up_proj.19
266
+ value: 0.10045
267
+ - filter: mlp.up_proj.20
268
+ value: 0.11479
269
+ - filter: mlp.up_proj.21
270
+ value: 0.10822
271
+ - filter: mlp.down_proj.0
272
+ value: 0.07073
273
+ - filter: mlp.down_proj.1
274
+ value: 0.04763
275
+ - filter: mlp.down_proj.2
276
+ value: 0.01994
277
+ - filter: mlp.down_proj.3
278
+ value: 0.00011
279
+ - filter: mlp.down_proj.4
280
+ value: 0.00291
281
+ - filter: mlp.down_proj.5
282
+ value: 0.00115
283
+ - filter: mlp.down_proj.6
284
+ value: 0.00396
285
+ - filter: mlp.down_proj.7
286
+ value: 0.02167
287
+ - filter: mlp.down_proj.8
288
+ value: 0.00561
289
+ - filter: mlp.down_proj.9
290
+ value: 0.00899
291
+ - filter: mlp.down_proj.10
292
+ value: 0.01681
293
+ - filter: mlp.down_proj.11
294
+ value: 0.02464
295
+ - filter: mlp.down_proj.12
296
+ value: 0.02988
297
+ - filter: mlp.down_proj.13
298
+ value: 0.03701
299
+ - filter: mlp.down_proj.14
300
+ value: 0.04521
301
+ - filter: mlp.down_proj.15
302
+ value: 0.07515
303
+ - filter: mlp.down_proj.16
304
+ value: 0.05471
305
+ - filter: mlp.down_proj.17
306
+ value: 0.05965
307
+ - filter: mlp.down_proj.18
308
+ value: 0.06833
309
+ - filter: mlp.down_proj.19
310
+ value: 0.07838
311
+ - filter: mlp.down_proj.20
312
+ value: 0.11912
313
+ - filter: mlp.down_proj.21
314
+ value: 0.20841
315
+ - value: 1
316
+ weight:
317
+ - value: 1
318
+ merge_method: ties
319
+ base_model: bigcode/starcoder2-3b
320
+ parameters:
321
+ normalize: true
322
+ int8_mask: true
323
+ dtype: bfloat16
324
+ tokenizer_source: union
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee54e35dd348ce64f6fe4465a34bd21b50bf59404f4b454233489560df254405
3
+ size 4998607728
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36d5dbce653b191dcd0273cc281ccdc5ccaf33debb158a3da05cf0b398b4ac96
3
+ size 1364179056
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4.4", "total_size": 6362732544}, "weight_map": {"lm_head.weight": "model-00001-of-00002.safetensors", "model.embed_tokens.weight": "model-00001-of-00002.safetensors", "model.layers.0.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.0.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.0.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.0.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.0.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.1.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.1.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.1.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.1.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.10.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.10.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.10.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.10.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.11.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.11.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.11.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.11.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.12.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.12.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.12.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.12.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.13.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.13.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.13.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.13.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.14.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.14.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.14.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.14.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.15.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.15.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.15.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.15.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.16.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.16.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.16.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.16.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.17.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.17.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.17.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.17.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.18.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.18.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.18.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.18.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.19.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.19.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.19.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.19.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.2.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.2.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.2.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.2.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.20.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.20.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.20.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.20.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.21.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.21.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.21.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.21.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.22.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.22.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.22.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.22.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.23.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.23.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.23.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.23.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.24.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.24.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.24.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.24.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.24.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.24.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.24.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.25.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.25.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.25.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.25.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.25.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.25.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.25.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.26.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.26.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.26.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.26.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.26.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.26.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.26.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.27.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.27.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.27.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.27.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.27.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.27.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.27.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.28.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.28.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.28.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.28.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.28.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.28.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.28.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.29.input_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.29.mlp.c_fc.bias": "model-00001-of-00002.safetensors", "model.layers.29.mlp.c_fc.weight": "model-00001-of-00002.safetensors", "model.layers.29.mlp.c_proj.bias": "model-00001-of-00002.safetensors", "model.layers.29.mlp.c_proj.weight": "model-00001-of-00002.safetensors", "model.layers.29.post_attention_layernorm.bias": "model-00001-of-00002.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.29.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.29.self_attn.o_proj.bias": "model-00001-of-00002.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.input_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.3.mlp.c_fc.bias": "model-00002-of-00002.safetensors", "model.layers.3.mlp.c_fc.weight": "model-00002-of-00002.safetensors", "model.layers.3.mlp.c_proj.bias": "model-00002-of-00002.safetensors", "model.layers.3.mlp.c_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.post_attention_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.o_proj.bias": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.input_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.4.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.4.mlp.c_fc.bias": "model-00002-of-00002.safetensors", "model.layers.4.mlp.c_fc.weight": "model-00002-of-00002.safetensors", "model.layers.4.mlp.c_proj.bias": "model-00002-of-00002.safetensors", "model.layers.4.mlp.c_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.post_attention_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.o_proj.bias": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.input_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.5.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.5.mlp.c_fc.bias": "model-00002-of-00002.safetensors", "model.layers.5.mlp.c_fc.weight": "model-00002-of-00002.safetensors", "model.layers.5.mlp.c_proj.bias": "model-00002-of-00002.safetensors", "model.layers.5.mlp.c_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.post_attention_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.o_proj.bias": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.input_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.6.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.6.mlp.c_fc.bias": "model-00002-of-00002.safetensors", "model.layers.6.mlp.c_fc.weight": "model-00002-of-00002.safetensors", "model.layers.6.mlp.c_proj.bias": "model-00002-of-00002.safetensors", "model.layers.6.mlp.c_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.post_attention_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.o_proj.bias": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.input_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.7.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.7.mlp.c_fc.bias": "model-00002-of-00002.safetensors", "model.layers.7.mlp.c_fc.weight": "model-00002-of-00002.safetensors", "model.layers.7.mlp.c_proj.bias": "model-00002-of-00002.safetensors", "model.layers.7.mlp.c_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.post_attention_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.o_proj.bias": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.input_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.8.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.8.mlp.c_fc.bias": "model-00002-of-00002.safetensors", "model.layers.8.mlp.c_fc.weight": "model-00002-of-00002.safetensors", "model.layers.8.mlp.c_proj.bias": "model-00002-of-00002.safetensors", "model.layers.8.mlp.c_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.post_attention_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.o_proj.bias": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.input_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.9.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.9.mlp.c_fc.bias": "model-00002-of-00002.safetensors", "model.layers.9.mlp.c_fc.weight": "model-00002-of-00002.safetensors", "model.layers.9.mlp.c_proj.bias": "model-00002-of-00002.safetensors", "model.layers.9.mlp.c_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.post_attention_layernorm.bias": "model-00002-of-00002.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.o_proj.bias": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.norm.bias": "model-00002-of-00002.safetensors", "model.norm.weight": "model-00002-of-00002.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "<fim_prefix>",
5
+ "<fim_middle>",
6
+ "<fim_suffix>",
7
+ "<fim_pad>",
8
+ "<repo_name>",
9
+ "<file_sep>",
10
+ "<issue_start>",
11
+ "<issue_comment>",
12
+ "<issue_closed>",
13
+ "<jupyter_start>",
14
+ "<jupyter_text>",
15
+ "<jupyter_code>",
16
+ "<jupyter_output>",
17
+ "<jupyter_script>",
18
+ "<empty_output>",
19
+ "<code_to_intermediate>",
20
+ "<intermediate_to_code>",
21
+ "<pr>",
22
+ "<pr_status>",
23
+ "<pr_is_merged>",
24
+ "<pr_base>",
25
+ "<pr_file>",
26
+ "<pr_base_code>",
27
+ "<pr_diff>",
28
+ "<pr_diff_hunk>",
29
+ "<pr_comment>",
30
+ "<pr_event_id>",
31
+ "<pr_review>",
32
+ "<pr_review_state>",
33
+ "<pr_review_comment>",
34
+ "<pr_in_reply_to_review_id>",
35
+ "<pr_in_reply_to_comment_id>",
36
+ "<pr_diff_hunk_comment_line>",
37
+ "<NAME>",
38
+ "<EMAIL>",
39
+ "<KEY>",
40
+ "<PASSWORD>"
41
+ ],
42
+ "bos_token": {
43
+ "content": "<|endoftext|>",
44
+ "lstrip": false,
45
+ "normalized": false,
46
+ "rstrip": false,
47
+ "single_word": false
48
+ },
49
+ "eos_token": {
50
+ "content": "<|endoftext|>",
51
+ "lstrip": false,
52
+ "normalized": false,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
+ "unk_token": {
57
+ "content": "<|endoftext|>",
58
+ "lstrip": false,
59
+ "normalized": false,
60
+ "rstrip": false,
61
+ "single_word": false
62
+ }
63
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": false
11
+ },
12
+ "1": {
13
+ "content": "<fim_prefix>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": false
19
+ },
20
+ "2": {
21
+ "content": "<fim_middle>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "3": {
29
+ "content": "<fim_suffix>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "4": {
37
+ "content": "<fim_pad>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "5": {
45
+ "content": "<repo_name>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "6": {
53
+ "content": "<file_sep>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "7": {
61
+ "content": "<issue_start>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "8": {
69
+ "content": "<issue_comment>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "9": {
77
+ "content": "<issue_closed>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "10": {
85
+ "content": "<jupyter_start>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_text>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_code>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_output>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_script>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "15": {
125
+ "content": "<empty_output>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "16": {
133
+ "content": "<code_to_intermediate>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "17": {
141
+ "content": "<intermediate_to_code>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "18": {
149
+ "content": "<pr>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "19": {
157
+ "content": "<pr_status>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "20": {
165
+ "content": "<pr_is_merged>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "21": {
173
+ "content": "<pr_base>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "22": {
181
+ "content": "<pr_file>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "23": {
189
+ "content": "<pr_base_code>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "24": {
197
+ "content": "<pr_diff>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "25": {
205
+ "content": "<pr_diff_hunk>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "26": {
213
+ "content": "<pr_comment>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "27": {
221
+ "content": "<pr_event_id>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "28": {
229
+ "content": "<pr_review>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "29": {
237
+ "content": "<pr_review_state>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "30": {
245
+ "content": "<pr_review_comment>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "31": {
253
+ "content": "<pr_in_reply_to_review_id>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "32": {
261
+ "content": "<pr_in_reply_to_comment_id>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "33": {
269
+ "content": "<pr_diff_hunk_comment_line>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "34": {
277
+ "content": "<NAME>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "35": {
285
+ "content": "<EMAIL>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "36": {
293
+ "content": "<KEY>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
+ },
300
+ "37": {
301
+ "content": "<PASSWORD>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": false
307
+ }
308
+ },
309
+ "additional_special_tokens": [
310
+ "<|endoftext|>",
311
+ "<fim_prefix>",
312
+ "<fim_middle>",
313
+ "<fim_suffix>",
314
+ "<fim_pad>",
315
+ "<repo_name>",
316
+ "<file_sep>",
317
+ "<issue_start>",
318
+ "<issue_comment>",
319
+ "<issue_closed>",
320
+ "<jupyter_start>",
321
+ "<jupyter_text>",
322
+ "<jupyter_code>",
323
+ "<jupyter_output>",
324
+ "<jupyter_script>",
325
+ "<empty_output>",
326
+ "<code_to_intermediate>",
327
+ "<intermediate_to_code>",
328
+ "<pr>",
329
+ "<pr_status>",
330
+ "<pr_is_merged>",
331
+ "<pr_base>",
332
+ "<pr_file>",
333
+ "<pr_base_code>",
334
+ "<pr_diff>",
335
+ "<pr_diff_hunk>",
336
+ "<pr_comment>",
337
+ "<pr_event_id>",
338
+ "<pr_review>",
339
+ "<pr_review_state>",
340
+ "<pr_review_comment>",
341
+ "<pr_in_reply_to_review_id>",
342
+ "<pr_in_reply_to_comment_id>",
343
+ "<pr_diff_hunk_comment_line>",
344
+ "<NAME>",
345
+ "<EMAIL>",
346
+ "<KEY>",
347
+ "<PASSWORD>"
348
+ ],
349
+ "bos_token": "<|endoftext|>",
350
+ "clean_up_tokenization_spaces": true,
351
+ "eos_token": "<|endoftext|>",
352
+ "model_max_length": 1000000000000000019884624838656,
353
+ "tokenizer_class": "GPT2Tokenizer",
354
+ "unk_token": "<|endoftext|>",
355
+ "vocab_size": 49152
356
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff