elvircrn commited on
Commit
38440d3
·
verified ·
1 Parent(s): 23a1f8e

Upload 6 files

Browse files
config.json ADDED
@@ -0,0 +1,1001 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 32000,
3
+ "max_position_embeddings": 4096,
4
+ "hidden_size": 4096,
5
+ "intermediate_size": 11008,
6
+ "num_hidden_layers": 32,
7
+ "num_attention_heads": 32,
8
+ "num_key_value_heads": 32,
9
+ "hidden_act": "silu",
10
+ "initializer_range": 0.02,
11
+ "rms_norm_eps": 1e-05,
12
+ "pretraining_tp": 1,
13
+ "use_cache": true,
14
+ "rope_theta": 10000.0,
15
+ "rope_scaling": null,
16
+ "attention_bias": false,
17
+ "attention_dropout": 0.0,
18
+ "mlp_bias": false,
19
+ "torch_dtype": null,
20
+ "tie_word_embeddings": false,
21
+ "architectures": [],
22
+ "bos_token_id": 1,
23
+ "eos_token_id": 2,
24
+ "_name_or_path": "/mnt/6e3c126c-c6bb-43eb-9d82-1e59b2111688/ecrncevi/Llama-2-7b-hf",
25
+ "transformers_version": "4.44.2",
26
+ "model_type": "llama",
27
+ "quantization_config": {
28
+ "quant_method": "spqr",
29
+ "beta1": 16,
30
+ "beta2": 16,
31
+ "bits": 3,
32
+ "linear_weights_not_to_quantize": [
33
+ "model.embed_tokens.weight",
34
+ "model.layers.0.input_layernorm.weight",
35
+ "model.layers.0.post_attention_layernorm.weight",
36
+ "model.layers.1.input_layernorm.weight",
37
+ "model.layers.1.post_attention_layernorm.weight",
38
+ "model.layers.2.input_layernorm.weight",
39
+ "model.layers.2.post_attention_layernorm.weight",
40
+ "model.layers.3.input_layernorm.weight",
41
+ "model.layers.3.post_attention_layernorm.weight",
42
+ "model.layers.4.input_layernorm.weight",
43
+ "model.layers.4.post_attention_layernorm.weight",
44
+ "model.layers.5.input_layernorm.weight",
45
+ "model.layers.5.post_attention_layernorm.weight",
46
+ "model.layers.6.input_layernorm.weight",
47
+ "model.layers.6.post_attention_layernorm.weight",
48
+ "model.layers.7.input_layernorm.weight",
49
+ "model.layers.7.post_attention_layernorm.weight",
50
+ "model.layers.8.input_layernorm.weight",
51
+ "model.layers.8.post_attention_layernorm.weight",
52
+ "model.layers.9.input_layernorm.weight",
53
+ "model.layers.9.post_attention_layernorm.weight",
54
+ "model.layers.10.input_layernorm.weight",
55
+ "model.layers.10.post_attention_layernorm.weight",
56
+ "model.layers.11.input_layernorm.weight",
57
+ "model.layers.11.post_attention_layernorm.weight",
58
+ "model.layers.12.input_layernorm.weight",
59
+ "model.layers.12.post_attention_layernorm.weight",
60
+ "model.layers.13.input_layernorm.weight",
61
+ "model.layers.13.post_attention_layernorm.weight",
62
+ "model.layers.14.input_layernorm.weight",
63
+ "model.layers.14.post_attention_layernorm.weight",
64
+ "model.layers.15.input_layernorm.weight",
65
+ "model.layers.15.post_attention_layernorm.weight",
66
+ "model.layers.16.input_layernorm.weight",
67
+ "model.layers.16.post_attention_layernorm.weight",
68
+ "model.layers.17.input_layernorm.weight",
69
+ "model.layers.17.post_attention_layernorm.weight",
70
+ "model.layers.18.input_layernorm.weight",
71
+ "model.layers.18.post_attention_layernorm.weight",
72
+ "model.layers.19.input_layernorm.weight",
73
+ "model.layers.19.post_attention_layernorm.weight",
74
+ "model.layers.20.input_layernorm.weight",
75
+ "model.layers.20.post_attention_layernorm.weight",
76
+ "model.layers.21.input_layernorm.weight",
77
+ "model.layers.21.post_attention_layernorm.weight",
78
+ "model.layers.22.input_layernorm.weight",
79
+ "model.layers.22.post_attention_layernorm.weight",
80
+ "model.layers.23.input_layernorm.weight",
81
+ "model.layers.23.post_attention_layernorm.weight",
82
+ "model.layers.24.input_layernorm.weight",
83
+ "model.layers.24.post_attention_layernorm.weight",
84
+ "model.layers.25.input_layernorm.weight",
85
+ "model.layers.25.post_attention_layernorm.weight",
86
+ "model.layers.26.input_layernorm.weight",
87
+ "model.layers.26.post_attention_layernorm.weight",
88
+ "model.layers.27.input_layernorm.weight",
89
+ "model.layers.27.post_attention_layernorm.weight",
90
+ "model.layers.28.input_layernorm.weight",
91
+ "model.layers.28.post_attention_layernorm.weight",
92
+ "model.layers.29.input_layernorm.weight",
93
+ "model.layers.29.post_attention_layernorm.weight",
94
+ "model.layers.30.input_layernorm.weight",
95
+ "model.layers.30.post_attention_layernorm.weight",
96
+ "model.layers.31.input_layernorm.weight",
97
+ "model.layers.31.post_attention_layernorm.weight",
98
+ "model.norm.weight",
99
+ "lm_head.weight"
100
+ ],
101
+ "shapes": {
102
+ "model.layers.0.self_attn.q_proj.dense_weights.shape": 1048576,
103
+ "model.layers.0.self_attn.q_proj.row_offsets.shape": 4097,
104
+ "model.layers.0.self_attn.q_proj.col_vals.shape": 44646,
105
+ "model.layers.0.self_attn.q_proj.in_perm.shape": 2048,
106
+ "model.layers.0.self_attn.k_proj.dense_weights.shape": 1048576,
107
+ "model.layers.0.self_attn.k_proj.row_offsets.shape": 4097,
108
+ "model.layers.0.self_attn.k_proj.col_vals.shape": 54665,
109
+ "model.layers.0.self_attn.k_proj.in_perm.shape": 2048,
110
+ "model.layers.0.self_attn.v_proj.dense_weights.shape": 1048576,
111
+ "model.layers.0.self_attn.v_proj.row_offsets.shape": 4097,
112
+ "model.layers.0.self_attn.v_proj.col_vals.shape": 344580,
113
+ "model.layers.0.self_attn.v_proj.in_perm.shape": 2048,
114
+ "model.layers.0.self_attn.o_proj.dense_weights.shape": 1048576,
115
+ "model.layers.0.self_attn.o_proj.row_offsets.shape": 4097,
116
+ "model.layers.0.self_attn.o_proj.col_vals.shape": 326823,
117
+ "model.layers.0.self_attn.o_proj.in_perm.shape": 2048,
118
+ "model.layers.0.mlp.gate_proj.dense_weights.shape": 2818048,
119
+ "model.layers.0.mlp.gate_proj.row_offsets.shape": 11009,
120
+ "model.layers.0.mlp.gate_proj.col_vals.shape": 767703,
121
+ "model.layers.0.mlp.gate_proj.in_perm.shape": 2048,
122
+ "model.layers.0.mlp.up_proj.dense_weights.shape": 2818048,
123
+ "model.layers.0.mlp.up_proj.row_offsets.shape": 11009,
124
+ "model.layers.0.mlp.up_proj.col_vals.shape": 781135,
125
+ "model.layers.0.mlp.up_proj.in_perm.shape": 2048,
126
+ "model.layers.0.mlp.down_proj.dense_weights.shape": 2818048,
127
+ "model.layers.0.mlp.down_proj.row_offsets.shape": 4097,
128
+ "model.layers.0.mlp.down_proj.col_vals.shape": 807092,
129
+ "model.layers.0.mlp.down_proj.in_perm.shape": 5504,
130
+ "model.layers.1.self_attn.q_proj.dense_weights.shape": 1048576,
131
+ "model.layers.1.self_attn.q_proj.row_offsets.shape": 4097,
132
+ "model.layers.1.self_attn.q_proj.col_vals.shape": 247251,
133
+ "model.layers.1.self_attn.q_proj.in_perm.shape": 2048,
134
+ "model.layers.1.self_attn.k_proj.dense_weights.shape": 1048576,
135
+ "model.layers.1.self_attn.k_proj.row_offsets.shape": 4097,
136
+ "model.layers.1.self_attn.k_proj.col_vals.shape": 239754,
137
+ "model.layers.1.self_attn.k_proj.in_perm.shape": 2048,
138
+ "model.layers.1.self_attn.v_proj.dense_weights.shape": 1048576,
139
+ "model.layers.1.self_attn.v_proj.row_offsets.shape": 4097,
140
+ "model.layers.1.self_attn.v_proj.col_vals.shape": 360082,
141
+ "model.layers.1.self_attn.v_proj.in_perm.shape": 2048,
142
+ "model.layers.1.self_attn.o_proj.dense_weights.shape": 1048576,
143
+ "model.layers.1.self_attn.o_proj.row_offsets.shape": 4097,
144
+ "model.layers.1.self_attn.o_proj.col_vals.shape": 312684,
145
+ "model.layers.1.self_attn.o_proj.in_perm.shape": 2048,
146
+ "model.layers.1.mlp.gate_proj.dense_weights.shape": 2818048,
147
+ "model.layers.1.mlp.gate_proj.row_offsets.shape": 11009,
148
+ "model.layers.1.mlp.gate_proj.col_vals.shape": 766401,
149
+ "model.layers.1.mlp.gate_proj.in_perm.shape": 2048,
150
+ "model.layers.1.mlp.up_proj.dense_weights.shape": 2818048,
151
+ "model.layers.1.mlp.up_proj.row_offsets.shape": 11009,
152
+ "model.layers.1.mlp.up_proj.col_vals.shape": 770241,
153
+ "model.layers.1.mlp.up_proj.in_perm.shape": 2048,
154
+ "model.layers.1.mlp.down_proj.dense_weights.shape": 2818048,
155
+ "model.layers.1.mlp.down_proj.row_offsets.shape": 4097,
156
+ "model.layers.1.mlp.down_proj.col_vals.shape": 65901,
157
+ "model.layers.1.mlp.down_proj.in_perm.shape": 5504,
158
+ "model.layers.2.self_attn.q_proj.dense_weights.shape": 1048576,
159
+ "model.layers.2.self_attn.q_proj.row_offsets.shape": 4097,
160
+ "model.layers.2.self_attn.q_proj.col_vals.shape": 277378,
161
+ "model.layers.2.self_attn.q_proj.in_perm.shape": 2048,
162
+ "model.layers.2.self_attn.k_proj.dense_weights.shape": 1048576,
163
+ "model.layers.2.self_attn.k_proj.row_offsets.shape": 4097,
164
+ "model.layers.2.self_attn.k_proj.col_vals.shape": 275528,
165
+ "model.layers.2.self_attn.k_proj.in_perm.shape": 2048,
166
+ "model.layers.2.self_attn.v_proj.dense_weights.shape": 1048576,
167
+ "model.layers.2.self_attn.v_proj.row_offsets.shape": 4097,
168
+ "model.layers.2.self_attn.v_proj.col_vals.shape": 316985,
169
+ "model.layers.2.self_attn.v_proj.in_perm.shape": 2048,
170
+ "model.layers.2.self_attn.o_proj.dense_weights.shape": 1048576,
171
+ "model.layers.2.self_attn.o_proj.row_offsets.shape": 4097,
172
+ "model.layers.2.self_attn.o_proj.col_vals.shape": 275900,
173
+ "model.layers.2.self_attn.o_proj.in_perm.shape": 2048,
174
+ "model.layers.2.mlp.gate_proj.dense_weights.shape": 2818048,
175
+ "model.layers.2.mlp.gate_proj.row_offsets.shape": 11009,
176
+ "model.layers.2.mlp.gate_proj.col_vals.shape": 766027,
177
+ "model.layers.2.mlp.gate_proj.in_perm.shape": 2048,
178
+ "model.layers.2.mlp.up_proj.dense_weights.shape": 2818048,
179
+ "model.layers.2.mlp.up_proj.row_offsets.shape": 11009,
180
+ "model.layers.2.mlp.up_proj.col_vals.shape": 764280,
181
+ "model.layers.2.mlp.up_proj.in_perm.shape": 2048,
182
+ "model.layers.2.mlp.down_proj.dense_weights.shape": 2818048,
183
+ "model.layers.2.mlp.down_proj.row_offsets.shape": 4097,
184
+ "model.layers.2.mlp.down_proj.col_vals.shape": 776893,
185
+ "model.layers.2.mlp.down_proj.in_perm.shape": 5504,
186
+ "model.layers.3.self_attn.q_proj.dense_weights.shape": 1048576,
187
+ "model.layers.3.self_attn.q_proj.row_offsets.shape": 4097,
188
+ "model.layers.3.self_attn.q_proj.col_vals.shape": 290549,
189
+ "model.layers.3.self_attn.q_proj.in_perm.shape": 2048,
190
+ "model.layers.3.self_attn.k_proj.dense_weights.shape": 1048576,
191
+ "model.layers.3.self_attn.k_proj.row_offsets.shape": 4097,
192
+ "model.layers.3.self_attn.k_proj.col_vals.shape": 286015,
193
+ "model.layers.3.self_attn.k_proj.in_perm.shape": 2048,
194
+ "model.layers.3.self_attn.v_proj.dense_weights.shape": 1048576,
195
+ "model.layers.3.self_attn.v_proj.row_offsets.shape": 4097,
196
+ "model.layers.3.self_attn.v_proj.col_vals.shape": 315620,
197
+ "model.layers.3.self_attn.v_proj.in_perm.shape": 2048,
198
+ "model.layers.3.self_attn.o_proj.dense_weights.shape": 1048576,
199
+ "model.layers.3.self_attn.o_proj.row_offsets.shape": 4097,
200
+ "model.layers.3.self_attn.o_proj.col_vals.shape": 262559,
201
+ "model.layers.3.self_attn.o_proj.in_perm.shape": 2048,
202
+ "model.layers.3.mlp.gate_proj.dense_weights.shape": 2818048,
203
+ "model.layers.3.mlp.gate_proj.row_offsets.shape": 11009,
204
+ "model.layers.3.mlp.gate_proj.col_vals.shape": 761413,
205
+ "model.layers.3.mlp.gate_proj.in_perm.shape": 2048,
206
+ "model.layers.3.mlp.up_proj.dense_weights.shape": 2818048,
207
+ "model.layers.3.mlp.up_proj.row_offsets.shape": 11009,
208
+ "model.layers.3.mlp.up_proj.col_vals.shape": 765370,
209
+ "model.layers.3.mlp.up_proj.in_perm.shape": 2048,
210
+ "model.layers.3.mlp.down_proj.dense_weights.shape": 2818048,
211
+ "model.layers.3.mlp.down_proj.row_offsets.shape": 4097,
212
+ "model.layers.3.mlp.down_proj.col_vals.shape": 790568,
213
+ "model.layers.3.mlp.down_proj.in_perm.shape": 5504,
214
+ "model.layers.4.self_attn.q_proj.dense_weights.shape": 1048576,
215
+ "model.layers.4.self_attn.q_proj.row_offsets.shape": 4097,
216
+ "model.layers.4.self_attn.q_proj.col_vals.shape": 279570,
217
+ "model.layers.4.self_attn.q_proj.in_perm.shape": 2048,
218
+ "model.layers.4.self_attn.k_proj.dense_weights.shape": 1048576,
219
+ "model.layers.4.self_attn.k_proj.row_offsets.shape": 4097,
220
+ "model.layers.4.self_attn.k_proj.col_vals.shape": 270454,
221
+ "model.layers.4.self_attn.k_proj.in_perm.shape": 2048,
222
+ "model.layers.4.self_attn.v_proj.dense_weights.shape": 1048576,
223
+ "model.layers.4.self_attn.v_proj.row_offsets.shape": 4097,
224
+ "model.layers.4.self_attn.v_proj.col_vals.shape": 314846,
225
+ "model.layers.4.self_attn.v_proj.in_perm.shape": 2048,
226
+ "model.layers.4.self_attn.o_proj.dense_weights.shape": 1048576,
227
+ "model.layers.4.self_attn.o_proj.row_offsets.shape": 4097,
228
+ "model.layers.4.self_attn.o_proj.col_vals.shape": 278091,
229
+ "model.layers.4.self_attn.o_proj.in_perm.shape": 2048,
230
+ "model.layers.4.mlp.gate_proj.dense_weights.shape": 2818048,
231
+ "model.layers.4.mlp.gate_proj.row_offsets.shape": 11009,
232
+ "model.layers.4.mlp.gate_proj.col_vals.shape": 761084,
233
+ "model.layers.4.mlp.gate_proj.in_perm.shape": 2048,
234
+ "model.layers.4.mlp.up_proj.dense_weights.shape": 2818048,
235
+ "model.layers.4.mlp.up_proj.row_offsets.shape": 11009,
236
+ "model.layers.4.mlp.up_proj.col_vals.shape": 771623,
237
+ "model.layers.4.mlp.up_proj.in_perm.shape": 2048,
238
+ "model.layers.4.mlp.down_proj.dense_weights.shape": 2818048,
239
+ "model.layers.4.mlp.down_proj.row_offsets.shape": 4097,
240
+ "model.layers.4.mlp.down_proj.col_vals.shape": 789374,
241
+ "model.layers.4.mlp.down_proj.in_perm.shape": 5504,
242
+ "model.layers.5.self_attn.q_proj.dense_weights.shape": 1048576,
243
+ "model.layers.5.self_attn.q_proj.row_offsets.shape": 4097,
244
+ "model.layers.5.self_attn.q_proj.col_vals.shape": 280057,
245
+ "model.layers.5.self_attn.q_proj.in_perm.shape": 2048,
246
+ "model.layers.5.self_attn.k_proj.dense_weights.shape": 1048576,
247
+ "model.layers.5.self_attn.k_proj.row_offsets.shape": 4097,
248
+ "model.layers.5.self_attn.k_proj.col_vals.shape": 268852,
249
+ "model.layers.5.self_attn.k_proj.in_perm.shape": 2048,
250
+ "model.layers.5.self_attn.v_proj.dense_weights.shape": 1048576,
251
+ "model.layers.5.self_attn.v_proj.row_offsets.shape": 4097,
252
+ "model.layers.5.self_attn.v_proj.col_vals.shape": 318024,
253
+ "model.layers.5.self_attn.v_proj.in_perm.shape": 2048,
254
+ "model.layers.5.self_attn.o_proj.dense_weights.shape": 1048576,
255
+ "model.layers.5.self_attn.o_proj.row_offsets.shape": 4097,
256
+ "model.layers.5.self_attn.o_proj.col_vals.shape": 273265,
257
+ "model.layers.5.self_attn.o_proj.in_perm.shape": 2048,
258
+ "model.layers.5.mlp.gate_proj.dense_weights.shape": 2818048,
259
+ "model.layers.5.mlp.gate_proj.row_offsets.shape": 11009,
260
+ "model.layers.5.mlp.gate_proj.col_vals.shape": 759031,
261
+ "model.layers.5.mlp.gate_proj.in_perm.shape": 2048,
262
+ "model.layers.5.mlp.up_proj.dense_weights.shape": 2818048,
263
+ "model.layers.5.mlp.up_proj.row_offsets.shape": 11009,
264
+ "model.layers.5.mlp.up_proj.col_vals.shape": 770071,
265
+ "model.layers.5.mlp.up_proj.in_perm.shape": 2048,
266
+ "model.layers.5.mlp.down_proj.dense_weights.shape": 2818048,
267
+ "model.layers.5.mlp.down_proj.row_offsets.shape": 4097,
268
+ "model.layers.5.mlp.down_proj.col_vals.shape": 786653,
269
+ "model.layers.5.mlp.down_proj.in_perm.shape": 5504,
270
+ "model.layers.6.self_attn.q_proj.dense_weights.shape": 1048576,
271
+ "model.layers.6.self_attn.q_proj.row_offsets.shape": 4097,
272
+ "model.layers.6.self_attn.q_proj.col_vals.shape": 284527,
273
+ "model.layers.6.self_attn.q_proj.in_perm.shape": 2048,
274
+ "model.layers.6.self_attn.k_proj.dense_weights.shape": 1048576,
275
+ "model.layers.6.self_attn.k_proj.row_offsets.shape": 4097,
276
+ "model.layers.6.self_attn.k_proj.col_vals.shape": 280859,
277
+ "model.layers.6.self_attn.k_proj.in_perm.shape": 2048,
278
+ "model.layers.6.self_attn.v_proj.dense_weights.shape": 1048576,
279
+ "model.layers.6.self_attn.v_proj.row_offsets.shape": 4097,
280
+ "model.layers.6.self_attn.v_proj.col_vals.shape": 314947,
281
+ "model.layers.6.self_attn.v_proj.in_perm.shape": 2048,
282
+ "model.layers.6.self_attn.o_proj.dense_weights.shape": 1048576,
283
+ "model.layers.6.self_attn.o_proj.row_offsets.shape": 4097,
284
+ "model.layers.6.self_attn.o_proj.col_vals.shape": 275025,
285
+ "model.layers.6.self_attn.o_proj.in_perm.shape": 2048,
286
+ "model.layers.6.mlp.gate_proj.dense_weights.shape": 2818048,
287
+ "model.layers.6.mlp.gate_proj.row_offsets.shape": 11009,
288
+ "model.layers.6.mlp.gate_proj.col_vals.shape": 759168,
289
+ "model.layers.6.mlp.gate_proj.in_perm.shape": 2048,
290
+ "model.layers.6.mlp.up_proj.dense_weights.shape": 2818048,
291
+ "model.layers.6.mlp.up_proj.row_offsets.shape": 11009,
292
+ "model.layers.6.mlp.up_proj.col_vals.shape": 770914,
293
+ "model.layers.6.mlp.up_proj.in_perm.shape": 2048,
294
+ "model.layers.6.mlp.down_proj.dense_weights.shape": 2818048,
295
+ "model.layers.6.mlp.down_proj.row_offsets.shape": 4097,
296
+ "model.layers.6.mlp.down_proj.col_vals.shape": 785700,
297
+ "model.layers.6.mlp.down_proj.in_perm.shape": 5504,
298
+ "model.layers.7.self_attn.q_proj.dense_weights.shape": 1048576,
299
+ "model.layers.7.self_attn.q_proj.row_offsets.shape": 4097,
300
+ "model.layers.7.self_attn.q_proj.col_vals.shape": 287757,
301
+ "model.layers.7.self_attn.q_proj.in_perm.shape": 2048,
302
+ "model.layers.7.self_attn.k_proj.dense_weights.shape": 1048576,
303
+ "model.layers.7.self_attn.k_proj.row_offsets.shape": 4097,
304
+ "model.layers.7.self_attn.k_proj.col_vals.shape": 281927,
305
+ "model.layers.7.self_attn.k_proj.in_perm.shape": 2048,
306
+ "model.layers.7.self_attn.v_proj.dense_weights.shape": 1048576,
307
+ "model.layers.7.self_attn.v_proj.row_offsets.shape": 4097,
308
+ "model.layers.7.self_attn.v_proj.col_vals.shape": 314672,
309
+ "model.layers.7.self_attn.v_proj.in_perm.shape": 2048,
310
+ "model.layers.7.self_attn.o_proj.dense_weights.shape": 1048576,
311
+ "model.layers.7.self_attn.o_proj.row_offsets.shape": 4097,
312
+ "model.layers.7.self_attn.o_proj.col_vals.shape": 274752,
313
+ "model.layers.7.self_attn.o_proj.in_perm.shape": 2048,
314
+ "model.layers.7.mlp.gate_proj.dense_weights.shape": 2818048,
315
+ "model.layers.7.mlp.gate_proj.row_offsets.shape": 11009,
316
+ "model.layers.7.mlp.gate_proj.col_vals.shape": 760209,
317
+ "model.layers.7.mlp.gate_proj.in_perm.shape": 2048,
318
+ "model.layers.7.mlp.up_proj.dense_weights.shape": 2818048,
319
+ "model.layers.7.mlp.up_proj.row_offsets.shape": 11009,
320
+ "model.layers.7.mlp.up_proj.col_vals.shape": 772503,
321
+ "model.layers.7.mlp.up_proj.in_perm.shape": 2048,
322
+ "model.layers.7.mlp.down_proj.dense_weights.shape": 2818048,
323
+ "model.layers.7.mlp.down_proj.row_offsets.shape": 4097,
324
+ "model.layers.7.mlp.down_proj.col_vals.shape": 783181,
325
+ "model.layers.7.mlp.down_proj.in_perm.shape": 5504,
326
+ "model.layers.8.self_attn.q_proj.dense_weights.shape": 1048576,
327
+ "model.layers.8.self_attn.q_proj.row_offsets.shape": 4097,
328
+ "model.layers.8.self_attn.q_proj.col_vals.shape": 286402,
329
+ "model.layers.8.self_attn.q_proj.in_perm.shape": 2048,
330
+ "model.layers.8.self_attn.k_proj.dense_weights.shape": 1048576,
331
+ "model.layers.8.self_attn.k_proj.row_offsets.shape": 4097,
332
+ "model.layers.8.self_attn.k_proj.col_vals.shape": 274369,
333
+ "model.layers.8.self_attn.k_proj.in_perm.shape": 2048,
334
+ "model.layers.8.self_attn.v_proj.dense_weights.shape": 1048576,
335
+ "model.layers.8.self_attn.v_proj.row_offsets.shape": 4097,
336
+ "model.layers.8.self_attn.v_proj.col_vals.shape": 316146,
337
+ "model.layers.8.self_attn.v_proj.in_perm.shape": 2048,
338
+ "model.layers.8.self_attn.o_proj.dense_weights.shape": 1048576,
339
+ "model.layers.8.self_attn.o_proj.row_offsets.shape": 4097,
340
+ "model.layers.8.self_attn.o_proj.col_vals.shape": 276760,
341
+ "model.layers.8.self_attn.o_proj.in_perm.shape": 2048,
342
+ "model.layers.8.mlp.gate_proj.dense_weights.shape": 2818048,
343
+ "model.layers.8.mlp.gate_proj.row_offsets.shape": 11009,
344
+ "model.layers.8.mlp.gate_proj.col_vals.shape": 757944,
345
+ "model.layers.8.mlp.gate_proj.in_perm.shape": 2048,
346
+ "model.layers.8.mlp.up_proj.dense_weights.shape": 2818048,
347
+ "model.layers.8.mlp.up_proj.row_offsets.shape": 11009,
348
+ "model.layers.8.mlp.up_proj.col_vals.shape": 774371,
349
+ "model.layers.8.mlp.up_proj.in_perm.shape": 2048,
350
+ "model.layers.8.mlp.down_proj.dense_weights.shape": 2818048,
351
+ "model.layers.8.mlp.down_proj.row_offsets.shape": 4097,
352
+ "model.layers.8.mlp.down_proj.col_vals.shape": 783639,
353
+ "model.layers.8.mlp.down_proj.in_perm.shape": 5504,
354
+ "model.layers.9.self_attn.q_proj.dense_weights.shape": 1048576,
355
+ "model.layers.9.self_attn.q_proj.row_offsets.shape": 4097,
356
+ "model.layers.9.self_attn.q_proj.col_vals.shape": 287016,
357
+ "model.layers.9.self_attn.q_proj.in_perm.shape": 2048,
358
+ "model.layers.9.self_attn.k_proj.dense_weights.shape": 1048576,
359
+ "model.layers.9.self_attn.k_proj.row_offsets.shape": 4097,
360
+ "model.layers.9.self_attn.k_proj.col_vals.shape": 272500,
361
+ "model.layers.9.self_attn.k_proj.in_perm.shape": 2048,
362
+ "model.layers.9.self_attn.v_proj.dense_weights.shape": 1048576,
363
+ "model.layers.9.self_attn.v_proj.row_offsets.shape": 4097,
364
+ "model.layers.9.self_attn.v_proj.col_vals.shape": 311792,
365
+ "model.layers.9.self_attn.v_proj.in_perm.shape": 2048,
366
+ "model.layers.9.self_attn.o_proj.dense_weights.shape": 1048576,
367
+ "model.layers.9.self_attn.o_proj.row_offsets.shape": 4097,
368
+ "model.layers.9.self_attn.o_proj.col_vals.shape": 277506,
369
+ "model.layers.9.self_attn.o_proj.in_perm.shape": 2048,
370
+ "model.layers.9.mlp.gate_proj.dense_weights.shape": 2818048,
371
+ "model.layers.9.mlp.gate_proj.row_offsets.shape": 11009,
372
+ "model.layers.9.mlp.gate_proj.col_vals.shape": 758413,
373
+ "model.layers.9.mlp.gate_proj.in_perm.shape": 2048,
374
+ "model.layers.9.mlp.up_proj.dense_weights.shape": 2818048,
375
+ "model.layers.9.mlp.up_proj.row_offsets.shape": 11009,
376
+ "model.layers.9.mlp.up_proj.col_vals.shape": 778305,
377
+ "model.layers.9.mlp.up_proj.in_perm.shape": 2048,
378
+ "model.layers.9.mlp.down_proj.dense_weights.shape": 2818048,
379
+ "model.layers.9.mlp.down_proj.row_offsets.shape": 4097,
380
+ "model.layers.9.mlp.down_proj.col_vals.shape": 788558,
381
+ "model.layers.9.mlp.down_proj.in_perm.shape": 5504,
382
+ "model.layers.10.self_attn.q_proj.dense_weights.shape": 1048576,
383
+ "model.layers.10.self_attn.q_proj.row_offsets.shape": 4097,
384
+ "model.layers.10.self_attn.q_proj.col_vals.shape": 287968,
385
+ "model.layers.10.self_attn.q_proj.in_perm.shape": 2048,
386
+ "model.layers.10.self_attn.k_proj.dense_weights.shape": 1048576,
387
+ "model.layers.10.self_attn.k_proj.row_offsets.shape": 4097,
388
+ "model.layers.10.self_attn.k_proj.col_vals.shape": 272194,
389
+ "model.layers.10.self_attn.k_proj.in_perm.shape": 2048,
390
+ "model.layers.10.self_attn.v_proj.dense_weights.shape": 1048576,
391
+ "model.layers.10.self_attn.v_proj.row_offsets.shape": 4097,
392
+ "model.layers.10.self_attn.v_proj.col_vals.shape": 308825,
393
+ "model.layers.10.self_attn.v_proj.in_perm.shape": 2048,
394
+ "model.layers.10.self_attn.o_proj.dense_weights.shape": 1048576,
395
+ "model.layers.10.self_attn.o_proj.row_offsets.shape": 4097,
396
+ "model.layers.10.self_attn.o_proj.col_vals.shape": 276573,
397
+ "model.layers.10.self_attn.o_proj.in_perm.shape": 2048,
398
+ "model.layers.10.mlp.gate_proj.dense_weights.shape": 2818048,
399
+ "model.layers.10.mlp.gate_proj.row_offsets.shape": 11009,
400
+ "model.layers.10.mlp.gate_proj.col_vals.shape": 754727,
401
+ "model.layers.10.mlp.gate_proj.in_perm.shape": 2048,
402
+ "model.layers.10.mlp.up_proj.dense_weights.shape": 2818048,
403
+ "model.layers.10.mlp.up_proj.row_offsets.shape": 11009,
404
+ "model.layers.10.mlp.up_proj.col_vals.shape": 776777,
405
+ "model.layers.10.mlp.up_proj.in_perm.shape": 2048,
406
+ "model.layers.10.mlp.down_proj.dense_weights.shape": 2818048,
407
+ "model.layers.10.mlp.down_proj.row_offsets.shape": 4097,
408
+ "model.layers.10.mlp.down_proj.col_vals.shape": 795947,
409
+ "model.layers.10.mlp.down_proj.in_perm.shape": 5504,
410
+ "model.layers.11.self_attn.q_proj.dense_weights.shape": 1048576,
411
+ "model.layers.11.self_attn.q_proj.row_offsets.shape": 4097,
412
+ "model.layers.11.self_attn.q_proj.col_vals.shape": 283876,
413
+ "model.layers.11.self_attn.q_proj.in_perm.shape": 2048,
414
+ "model.layers.11.self_attn.k_proj.dense_weights.shape": 1048576,
415
+ "model.layers.11.self_attn.k_proj.row_offsets.shape": 4097,
416
+ "model.layers.11.self_attn.k_proj.col_vals.shape": 267497,
417
+ "model.layers.11.self_attn.k_proj.in_perm.shape": 2048,
418
+ "model.layers.11.self_attn.v_proj.dense_weights.shape": 1048576,
419
+ "model.layers.11.self_attn.v_proj.row_offsets.shape": 4097,
420
+ "model.layers.11.self_attn.v_proj.col_vals.shape": 311473,
421
+ "model.layers.11.self_attn.v_proj.in_perm.shape": 2048,
422
+ "model.layers.11.self_attn.o_proj.dense_weights.shape": 1048576,
423
+ "model.layers.11.self_attn.o_proj.row_offsets.shape": 4097,
424
+ "model.layers.11.self_attn.o_proj.col_vals.shape": 278478,
425
+ "model.layers.11.self_attn.o_proj.in_perm.shape": 2048,
426
+ "model.layers.11.mlp.gate_proj.dense_weights.shape": 2818048,
427
+ "model.layers.11.mlp.gate_proj.row_offsets.shape": 11009,
428
+ "model.layers.11.mlp.gate_proj.col_vals.shape": 755255,
429
+ "model.layers.11.mlp.gate_proj.in_perm.shape": 2048,
430
+ "model.layers.11.mlp.up_proj.dense_weights.shape": 2818048,
431
+ "model.layers.11.mlp.up_proj.row_offsets.shape": 11009,
432
+ "model.layers.11.mlp.up_proj.col_vals.shape": 774866,
433
+ "model.layers.11.mlp.up_proj.in_perm.shape": 2048,
434
+ "model.layers.11.mlp.down_proj.dense_weights.shape": 2818048,
435
+ "model.layers.11.mlp.down_proj.row_offsets.shape": 4097,
436
+ "model.layers.11.mlp.down_proj.col_vals.shape": 785771,
437
+ "model.layers.11.mlp.down_proj.in_perm.shape": 5504,
438
+ "model.layers.12.self_attn.q_proj.dense_weights.shape": 1048576,
439
+ "model.layers.12.self_attn.q_proj.row_offsets.shape": 4097,
440
+ "model.layers.12.self_attn.q_proj.col_vals.shape": 286613,
441
+ "model.layers.12.self_attn.q_proj.in_perm.shape": 2048,
442
+ "model.layers.12.self_attn.k_proj.dense_weights.shape": 1048576,
443
+ "model.layers.12.self_attn.k_proj.row_offsets.shape": 4097,
444
+ "model.layers.12.self_attn.k_proj.col_vals.shape": 271141,
445
+ "model.layers.12.self_attn.k_proj.in_perm.shape": 2048,
446
+ "model.layers.12.self_attn.v_proj.dense_weights.shape": 1048576,
447
+ "model.layers.12.self_attn.v_proj.row_offsets.shape": 4097,
448
+ "model.layers.12.self_attn.v_proj.col_vals.shape": 307220,
449
+ "model.layers.12.self_attn.v_proj.in_perm.shape": 2048,
450
+ "model.layers.12.self_attn.o_proj.dense_weights.shape": 1048576,
451
+ "model.layers.12.self_attn.o_proj.row_offsets.shape": 4097,
452
+ "model.layers.12.self_attn.o_proj.col_vals.shape": 278019,
453
+ "model.layers.12.self_attn.o_proj.in_perm.shape": 2048,
454
+ "model.layers.12.mlp.gate_proj.dense_weights.shape": 2818048,
455
+ "model.layers.12.mlp.gate_proj.row_offsets.shape": 11009,
456
+ "model.layers.12.mlp.gate_proj.col_vals.shape": 753547,
457
+ "model.layers.12.mlp.gate_proj.in_perm.shape": 2048,
458
+ "model.layers.12.mlp.up_proj.dense_weights.shape": 2818048,
459
+ "model.layers.12.mlp.up_proj.row_offsets.shape": 11009,
460
+ "model.layers.12.mlp.up_proj.col_vals.shape": 774577,
461
+ "model.layers.12.mlp.up_proj.in_perm.shape": 2048,
462
+ "model.layers.12.mlp.down_proj.dense_weights.shape": 2818048,
463
+ "model.layers.12.mlp.down_proj.row_offsets.shape": 4097,
464
+ "model.layers.12.mlp.down_proj.col_vals.shape": 785966,
465
+ "model.layers.12.mlp.down_proj.in_perm.shape": 5504,
466
+ "model.layers.13.self_attn.q_proj.dense_weights.shape": 1048576,
467
+ "model.layers.13.self_attn.q_proj.row_offsets.shape": 4097,
468
+ "model.layers.13.self_attn.q_proj.col_vals.shape": 284102,
469
+ "model.layers.13.self_attn.q_proj.in_perm.shape": 2048,
470
+ "model.layers.13.self_attn.k_proj.dense_weights.shape": 1048576,
471
+ "model.layers.13.self_attn.k_proj.row_offsets.shape": 4097,
472
+ "model.layers.13.self_attn.k_proj.col_vals.shape": 271166,
473
+ "model.layers.13.self_attn.k_proj.in_perm.shape": 2048,
474
+ "model.layers.13.self_attn.v_proj.dense_weights.shape": 1048576,
475
+ "model.layers.13.self_attn.v_proj.row_offsets.shape": 4097,
476
+ "model.layers.13.self_attn.v_proj.col_vals.shape": 307926,
477
+ "model.layers.13.self_attn.v_proj.in_perm.shape": 2048,
478
+ "model.layers.13.self_attn.o_proj.dense_weights.shape": 1048576,
479
+ "model.layers.13.self_attn.o_proj.row_offsets.shape": 4097,
480
+ "model.layers.13.self_attn.o_proj.col_vals.shape": 278409,
481
+ "model.layers.13.self_attn.o_proj.in_perm.shape": 2048,
482
+ "model.layers.13.mlp.gate_proj.dense_weights.shape": 2818048,
483
+ "model.layers.13.mlp.gate_proj.row_offsets.shape": 11009,
484
+ "model.layers.13.mlp.gate_proj.col_vals.shape": 754940,
485
+ "model.layers.13.mlp.gate_proj.in_perm.shape": 2048,
486
+ "model.layers.13.mlp.up_proj.dense_weights.shape": 2818048,
487
+ "model.layers.13.mlp.up_proj.row_offsets.shape": 11009,
488
+ "model.layers.13.mlp.up_proj.col_vals.shape": 773648,
489
+ "model.layers.13.mlp.up_proj.in_perm.shape": 2048,
490
+ "model.layers.13.mlp.down_proj.dense_weights.shape": 2818048,
491
+ "model.layers.13.mlp.down_proj.row_offsets.shape": 4097,
492
+ "model.layers.13.mlp.down_proj.col_vals.shape": 795711,
493
+ "model.layers.13.mlp.down_proj.in_perm.shape": 5504,
494
+ "model.layers.14.self_attn.q_proj.dense_weights.shape": 1048576,
495
+ "model.layers.14.self_attn.q_proj.row_offsets.shape": 4097,
496
+ "model.layers.14.self_attn.q_proj.col_vals.shape": 286971,
497
+ "model.layers.14.self_attn.q_proj.in_perm.shape": 2048,
498
+ "model.layers.14.self_attn.k_proj.dense_weights.shape": 1048576,
499
+ "model.layers.14.self_attn.k_proj.row_offsets.shape": 4097,
500
+ "model.layers.14.self_attn.k_proj.col_vals.shape": 267434,
501
+ "model.layers.14.self_attn.k_proj.in_perm.shape": 2048,
502
+ "model.layers.14.self_attn.v_proj.dense_weights.shape": 1048576,
503
+ "model.layers.14.self_attn.v_proj.row_offsets.shape": 4097,
504
+ "model.layers.14.self_attn.v_proj.col_vals.shape": 310037,
505
+ "model.layers.14.self_attn.v_proj.in_perm.shape": 2048,
506
+ "model.layers.14.self_attn.o_proj.dense_weights.shape": 1048576,
507
+ "model.layers.14.self_attn.o_proj.row_offsets.shape": 4097,
508
+ "model.layers.14.self_attn.o_proj.col_vals.shape": 278587,
509
+ "model.layers.14.self_attn.o_proj.in_perm.shape": 2048,
510
+ "model.layers.14.mlp.gate_proj.dense_weights.shape": 2818048,
511
+ "model.layers.14.mlp.gate_proj.row_offsets.shape": 11009,
512
+ "model.layers.14.mlp.gate_proj.col_vals.shape": 755528,
513
+ "model.layers.14.mlp.gate_proj.in_perm.shape": 2048,
514
+ "model.layers.14.mlp.up_proj.dense_weights.shape": 2818048,
515
+ "model.layers.14.mlp.up_proj.row_offsets.shape": 11009,
516
+ "model.layers.14.mlp.up_proj.col_vals.shape": 772483,
517
+ "model.layers.14.mlp.up_proj.in_perm.shape": 2048,
518
+ "model.layers.14.mlp.down_proj.dense_weights.shape": 2818048,
519
+ "model.layers.14.mlp.down_proj.row_offsets.shape": 4097,
520
+ "model.layers.14.mlp.down_proj.col_vals.shape": 794709,
521
+ "model.layers.14.mlp.down_proj.in_perm.shape": 5504,
522
+ "model.layers.15.self_attn.q_proj.dense_weights.shape": 1048576,
523
+ "model.layers.15.self_attn.q_proj.row_offsets.shape": 4097,
524
+ "model.layers.15.self_attn.q_proj.col_vals.shape": 286611,
525
+ "model.layers.15.self_attn.q_proj.in_perm.shape": 2048,
526
+ "model.layers.15.self_attn.k_proj.dense_weights.shape": 1048576,
527
+ "model.layers.15.self_attn.k_proj.row_offsets.shape": 4097,
528
+ "model.layers.15.self_attn.k_proj.col_vals.shape": 270554,
529
+ "model.layers.15.self_attn.k_proj.in_perm.shape": 2048,
530
+ "model.layers.15.self_attn.v_proj.dense_weights.shape": 1048576,
531
+ "model.layers.15.self_attn.v_proj.row_offsets.shape": 4097,
532
+ "model.layers.15.self_attn.v_proj.col_vals.shape": 312763,
533
+ "model.layers.15.self_attn.v_proj.in_perm.shape": 2048,
534
+ "model.layers.15.self_attn.o_proj.dense_weights.shape": 1048576,
535
+ "model.layers.15.self_attn.o_proj.row_offsets.shape": 4097,
536
+ "model.layers.15.self_attn.o_proj.col_vals.shape": 278818,
537
+ "model.layers.15.self_attn.o_proj.in_perm.shape": 2048,
538
+ "model.layers.15.mlp.gate_proj.dense_weights.shape": 2818048,
539
+ "model.layers.15.mlp.gate_proj.row_offsets.shape": 11009,
540
+ "model.layers.15.mlp.gate_proj.col_vals.shape": 755604,
541
+ "model.layers.15.mlp.gate_proj.in_perm.shape": 2048,
542
+ "model.layers.15.mlp.up_proj.dense_weights.shape": 2818048,
543
+ "model.layers.15.mlp.up_proj.row_offsets.shape": 11009,
544
+ "model.layers.15.mlp.up_proj.col_vals.shape": 770147,
545
+ "model.layers.15.mlp.up_proj.in_perm.shape": 2048,
546
+ "model.layers.15.mlp.down_proj.dense_weights.shape": 2818048,
547
+ "model.layers.15.mlp.down_proj.row_offsets.shape": 4097,
548
+ "model.layers.15.mlp.down_proj.col_vals.shape": 800737,
549
+ "model.layers.15.mlp.down_proj.in_perm.shape": 5504,
550
+ "model.layers.16.self_attn.q_proj.dense_weights.shape": 1048576,
551
+ "model.layers.16.self_attn.q_proj.row_offsets.shape": 4097,
552
+ "model.layers.16.self_attn.q_proj.col_vals.shape": 285161,
553
+ "model.layers.16.self_attn.q_proj.in_perm.shape": 2048,
554
+ "model.layers.16.self_attn.k_proj.dense_weights.shape": 1048576,
555
+ "model.layers.16.self_attn.k_proj.row_offsets.shape": 4097,
556
+ "model.layers.16.self_attn.k_proj.col_vals.shape": 264469,
557
+ "model.layers.16.self_attn.k_proj.in_perm.shape": 2048,
558
+ "model.layers.16.self_attn.v_proj.dense_weights.shape": 1048576,
559
+ "model.layers.16.self_attn.v_proj.row_offsets.shape": 4097,
560
+ "model.layers.16.self_attn.v_proj.col_vals.shape": 310837,
561
+ "model.layers.16.self_attn.v_proj.in_perm.shape": 2048,
562
+ "model.layers.16.self_attn.o_proj.dense_weights.shape": 1048576,
563
+ "model.layers.16.self_attn.o_proj.row_offsets.shape": 4097,
564
+ "model.layers.16.self_attn.o_proj.col_vals.shape": 280326,
565
+ "model.layers.16.self_attn.o_proj.in_perm.shape": 2048,
566
+ "model.layers.16.mlp.gate_proj.dense_weights.shape": 2818048,
567
+ "model.layers.16.mlp.gate_proj.row_offsets.shape": 11009,
568
+ "model.layers.16.mlp.gate_proj.col_vals.shape": 751402,
569
+ "model.layers.16.mlp.gate_proj.in_perm.shape": 2048,
570
+ "model.layers.16.mlp.up_proj.dense_weights.shape": 2818048,
571
+ "model.layers.16.mlp.up_proj.row_offsets.shape": 11009,
572
+ "model.layers.16.mlp.up_proj.col_vals.shape": 765291,
573
+ "model.layers.16.mlp.up_proj.in_perm.shape": 2048,
574
+ "model.layers.16.mlp.down_proj.dense_weights.shape": 2818048,
575
+ "model.layers.16.mlp.down_proj.row_offsets.shape": 4097,
576
+ "model.layers.16.mlp.down_proj.col_vals.shape": 796628,
577
+ "model.layers.16.mlp.down_proj.in_perm.shape": 5504,
578
+ "model.layers.17.self_attn.q_proj.dense_weights.shape": 1048576,
579
+ "model.layers.17.self_attn.q_proj.row_offsets.shape": 4097,
580
+ "model.layers.17.self_attn.q_proj.col_vals.shape": 285905,
581
+ "model.layers.17.self_attn.q_proj.in_perm.shape": 2048,
582
+ "model.layers.17.self_attn.k_proj.dense_weights.shape": 1048576,
583
+ "model.layers.17.self_attn.k_proj.row_offsets.shape": 4097,
584
+ "model.layers.17.self_attn.k_proj.col_vals.shape": 270381,
585
+ "model.layers.17.self_attn.k_proj.in_perm.shape": 2048,
586
+ "model.layers.17.self_attn.v_proj.dense_weights.shape": 1048576,
587
+ "model.layers.17.self_attn.v_proj.row_offsets.shape": 4097,
588
+ "model.layers.17.self_attn.v_proj.col_vals.shape": 307352,
589
+ "model.layers.17.self_attn.v_proj.in_perm.shape": 2048,
590
+ "model.layers.17.self_attn.o_proj.dense_weights.shape": 1048576,
591
+ "model.layers.17.self_attn.o_proj.row_offsets.shape": 4097,
592
+ "model.layers.17.self_attn.o_proj.col_vals.shape": 281243,
593
+ "model.layers.17.self_attn.o_proj.in_perm.shape": 2048,
594
+ "model.layers.17.mlp.gate_proj.dense_weights.shape": 2818048,
595
+ "model.layers.17.mlp.gate_proj.row_offsets.shape": 11009,
596
+ "model.layers.17.mlp.gate_proj.col_vals.shape": 750543,
597
+ "model.layers.17.mlp.gate_proj.in_perm.shape": 2048,
598
+ "model.layers.17.mlp.up_proj.dense_weights.shape": 2818048,
599
+ "model.layers.17.mlp.up_proj.row_offsets.shape": 11009,
600
+ "model.layers.17.mlp.up_proj.col_vals.shape": 761966,
601
+ "model.layers.17.mlp.up_proj.in_perm.shape": 2048,
602
+ "model.layers.17.mlp.down_proj.dense_weights.shape": 2818048,
603
+ "model.layers.17.mlp.down_proj.row_offsets.shape": 4097,
604
+ "model.layers.17.mlp.down_proj.col_vals.shape": 786876,
605
+ "model.layers.17.mlp.down_proj.in_perm.shape": 5504,
606
+ "model.layers.18.self_attn.q_proj.dense_weights.shape": 1048576,
607
+ "model.layers.18.self_attn.q_proj.row_offsets.shape": 4097,
608
+ "model.layers.18.self_attn.q_proj.col_vals.shape": 287515,
609
+ "model.layers.18.self_attn.q_proj.in_perm.shape": 2048,
610
+ "model.layers.18.self_attn.k_proj.dense_weights.shape": 1048576,
611
+ "model.layers.18.self_attn.k_proj.row_offsets.shape": 4097,
612
+ "model.layers.18.self_attn.k_proj.col_vals.shape": 275355,
613
+ "model.layers.18.self_attn.k_proj.in_perm.shape": 2048,
614
+ "model.layers.18.self_attn.v_proj.dense_weights.shape": 1048576,
615
+ "model.layers.18.self_attn.v_proj.row_offsets.shape": 4097,
616
+ "model.layers.18.self_attn.v_proj.col_vals.shape": 305207,
617
+ "model.layers.18.self_attn.v_proj.in_perm.shape": 2048,
618
+ "model.layers.18.self_attn.o_proj.dense_weights.shape": 1048576,
619
+ "model.layers.18.self_attn.o_proj.row_offsets.shape": 4097,
620
+ "model.layers.18.self_attn.o_proj.col_vals.shape": 280193,
621
+ "model.layers.18.self_attn.o_proj.in_perm.shape": 2048,
622
+ "model.layers.18.mlp.gate_proj.dense_weights.shape": 2818048,
623
+ "model.layers.18.mlp.gate_proj.row_offsets.shape": 11009,
624
+ "model.layers.18.mlp.gate_proj.col_vals.shape": 750879,
625
+ "model.layers.18.mlp.gate_proj.in_perm.shape": 2048,
626
+ "model.layers.18.mlp.up_proj.dense_weights.shape": 2818048,
627
+ "model.layers.18.mlp.up_proj.row_offsets.shape": 11009,
628
+ "model.layers.18.mlp.up_proj.col_vals.shape": 761380,
629
+ "model.layers.18.mlp.up_proj.in_perm.shape": 2048,
630
+ "model.layers.18.mlp.down_proj.dense_weights.shape": 2818048,
631
+ "model.layers.18.mlp.down_proj.row_offsets.shape": 4097,
632
+ "model.layers.18.mlp.down_proj.col_vals.shape": 791345,
633
+ "model.layers.18.mlp.down_proj.in_perm.shape": 5504,
634
+ "model.layers.19.self_attn.q_proj.dense_weights.shape": 1048576,
635
+ "model.layers.19.self_attn.q_proj.row_offsets.shape": 4097,
636
+ "model.layers.19.self_attn.q_proj.col_vals.shape": 284178,
637
+ "model.layers.19.self_attn.q_proj.in_perm.shape": 2048,
638
+ "model.layers.19.self_attn.k_proj.dense_weights.shape": 1048576,
639
+ "model.layers.19.self_attn.k_proj.row_offsets.shape": 4097,
640
+ "model.layers.19.self_attn.k_proj.col_vals.shape": 269926,
641
+ "model.layers.19.self_attn.k_proj.in_perm.shape": 2048,
642
+ "model.layers.19.self_attn.v_proj.dense_weights.shape": 1048576,
643
+ "model.layers.19.self_attn.v_proj.row_offsets.shape": 4097,
644
+ "model.layers.19.self_attn.v_proj.col_vals.shape": 303253,
645
+ "model.layers.19.self_attn.v_proj.in_perm.shape": 2048,
646
+ "model.layers.19.self_attn.o_proj.dense_weights.shape": 1048576,
647
+ "model.layers.19.self_attn.o_proj.row_offsets.shape": 4097,
648
+ "model.layers.19.self_attn.o_proj.col_vals.shape": 281276,
649
+ "model.layers.19.self_attn.o_proj.in_perm.shape": 2048,
650
+ "model.layers.19.mlp.gate_proj.dense_weights.shape": 2818048,
651
+ "model.layers.19.mlp.gate_proj.row_offsets.shape": 11009,
652
+ "model.layers.19.mlp.gate_proj.col_vals.shape": 751227,
653
+ "model.layers.19.mlp.gate_proj.in_perm.shape": 2048,
654
+ "model.layers.19.mlp.up_proj.dense_weights.shape": 2818048,
655
+ "model.layers.19.mlp.up_proj.row_offsets.shape": 11009,
656
+ "model.layers.19.mlp.up_proj.col_vals.shape": 759368,
657
+ "model.layers.19.mlp.up_proj.in_perm.shape": 2048,
658
+ "model.layers.19.mlp.down_proj.dense_weights.shape": 2818048,
659
+ "model.layers.19.mlp.down_proj.row_offsets.shape": 4097,
660
+ "model.layers.19.mlp.down_proj.col_vals.shape": 785349,
661
+ "model.layers.19.mlp.down_proj.in_perm.shape": 5504,
662
+ "model.layers.20.self_attn.q_proj.dense_weights.shape": 1048576,
663
+ "model.layers.20.self_attn.q_proj.row_offsets.shape": 4097,
664
+ "model.layers.20.self_attn.q_proj.col_vals.shape": 283706,
665
+ "model.layers.20.self_attn.q_proj.in_perm.shape": 2048,
666
+ "model.layers.20.self_attn.k_proj.dense_weights.shape": 1048576,
667
+ "model.layers.20.self_attn.k_proj.row_offsets.shape": 4097,
668
+ "model.layers.20.self_attn.k_proj.col_vals.shape": 272655,
669
+ "model.layers.20.self_attn.k_proj.in_perm.shape": 2048,
670
+ "model.layers.20.self_attn.v_proj.dense_weights.shape": 1048576,
671
+ "model.layers.20.self_attn.v_proj.row_offsets.shape": 4097,
672
+ "model.layers.20.self_attn.v_proj.col_vals.shape": 303626,
673
+ "model.layers.20.self_attn.v_proj.in_perm.shape": 2048,
674
+ "model.layers.20.self_attn.o_proj.dense_weights.shape": 1048576,
675
+ "model.layers.20.self_attn.o_proj.row_offsets.shape": 4097,
676
+ "model.layers.20.self_attn.o_proj.col_vals.shape": 278935,
677
+ "model.layers.20.self_attn.o_proj.in_perm.shape": 2048,
678
+ "model.layers.20.mlp.gate_proj.dense_weights.shape": 2818048,
679
+ "model.layers.20.mlp.gate_proj.row_offsets.shape": 11009,
680
+ "model.layers.20.mlp.gate_proj.col_vals.shape": 751411,
681
+ "model.layers.20.mlp.gate_proj.in_perm.shape": 2048,
682
+ "model.layers.20.mlp.up_proj.dense_weights.shape": 2818048,
683
+ "model.layers.20.mlp.up_proj.row_offsets.shape": 11009,
684
+ "model.layers.20.mlp.up_proj.col_vals.shape": 758252,
685
+ "model.layers.20.mlp.up_proj.in_perm.shape": 2048,
686
+ "model.layers.20.mlp.down_proj.dense_weights.shape": 2818048,
687
+ "model.layers.20.mlp.down_proj.row_offsets.shape": 4097,
688
+ "model.layers.20.mlp.down_proj.col_vals.shape": 784248,
689
+ "model.layers.20.mlp.down_proj.in_perm.shape": 5504,
690
+ "model.layers.21.self_attn.q_proj.dense_weights.shape": 1048576,
691
+ "model.layers.21.self_attn.q_proj.row_offsets.shape": 4097,
692
+ "model.layers.21.self_attn.q_proj.col_vals.shape": 284547,
693
+ "model.layers.21.self_attn.q_proj.in_perm.shape": 2048,
694
+ "model.layers.21.self_attn.k_proj.dense_weights.shape": 1048576,
695
+ "model.layers.21.self_attn.k_proj.row_offsets.shape": 4097,
696
+ "model.layers.21.self_attn.k_proj.col_vals.shape": 276845,
697
+ "model.layers.21.self_attn.k_proj.in_perm.shape": 2048,
698
+ "model.layers.21.self_attn.v_proj.dense_weights.shape": 1048576,
699
+ "model.layers.21.self_attn.v_proj.row_offsets.shape": 4097,
700
+ "model.layers.21.self_attn.v_proj.col_vals.shape": 300429,
701
+ "model.layers.21.self_attn.v_proj.in_perm.shape": 2048,
702
+ "model.layers.21.self_attn.o_proj.dense_weights.shape": 1048576,
703
+ "model.layers.21.self_attn.o_proj.row_offsets.shape": 4097,
704
+ "model.layers.21.self_attn.o_proj.col_vals.shape": 278938,
705
+ "model.layers.21.self_attn.o_proj.in_perm.shape": 2048,
706
+ "model.layers.21.mlp.gate_proj.dense_weights.shape": 2818048,
707
+ "model.layers.21.mlp.gate_proj.row_offsets.shape": 11009,
708
+ "model.layers.21.mlp.gate_proj.col_vals.shape": 753249,
709
+ "model.layers.21.mlp.gate_proj.in_perm.shape": 2048,
710
+ "model.layers.21.mlp.up_proj.dense_weights.shape": 2818048,
711
+ "model.layers.21.mlp.up_proj.row_offsets.shape": 11009,
712
+ "model.layers.21.mlp.up_proj.col_vals.shape": 760378,
713
+ "model.layers.21.mlp.up_proj.in_perm.shape": 2048,
714
+ "model.layers.21.mlp.down_proj.dense_weights.shape": 2818048,
715
+ "model.layers.21.mlp.down_proj.row_offsets.shape": 4097,
716
+ "model.layers.21.mlp.down_proj.col_vals.shape": 778977,
717
+ "model.layers.21.mlp.down_proj.in_perm.shape": 5504,
718
+ "model.layers.22.self_attn.q_proj.dense_weights.shape": 1048576,
719
+ "model.layers.22.self_attn.q_proj.row_offsets.shape": 4097,
720
+ "model.layers.22.self_attn.q_proj.col_vals.shape": 287172,
721
+ "model.layers.22.self_attn.q_proj.in_perm.shape": 2048,
722
+ "model.layers.22.self_attn.k_proj.dense_weights.shape": 1048576,
723
+ "model.layers.22.self_attn.k_proj.row_offsets.shape": 4097,
724
+ "model.layers.22.self_attn.k_proj.col_vals.shape": 279951,
725
+ "model.layers.22.self_attn.k_proj.in_perm.shape": 2048,
726
+ "model.layers.22.self_attn.v_proj.dense_weights.shape": 1048576,
727
+ "model.layers.22.self_attn.v_proj.row_offsets.shape": 4097,
728
+ "model.layers.22.self_attn.v_proj.col_vals.shape": 300804,
729
+ "model.layers.22.self_attn.v_proj.in_perm.shape": 2048,
730
+ "model.layers.22.self_attn.o_proj.dense_weights.shape": 1048576,
731
+ "model.layers.22.self_attn.o_proj.row_offsets.shape": 4097,
732
+ "model.layers.22.self_attn.o_proj.col_vals.shape": 277006,
733
+ "model.layers.22.self_attn.o_proj.in_perm.shape": 2048,
734
+ "model.layers.22.mlp.gate_proj.dense_weights.shape": 2818048,
735
+ "model.layers.22.mlp.gate_proj.row_offsets.shape": 11009,
736
+ "model.layers.22.mlp.gate_proj.col_vals.shape": 755441,
737
+ "model.layers.22.mlp.gate_proj.in_perm.shape": 2048,
738
+ "model.layers.22.mlp.up_proj.dense_weights.shape": 2818048,
739
+ "model.layers.22.mlp.up_proj.row_offsets.shape": 11009,
740
+ "model.layers.22.mlp.up_proj.col_vals.shape": 760582,
741
+ "model.layers.22.mlp.up_proj.in_perm.shape": 2048,
742
+ "model.layers.22.mlp.down_proj.dense_weights.shape": 2818048,
743
+ "model.layers.22.mlp.down_proj.row_offsets.shape": 4097,
744
+ "model.layers.22.mlp.down_proj.col_vals.shape": 777317,
745
+ "model.layers.22.mlp.down_proj.in_perm.shape": 5504,
746
+ "model.layers.23.self_attn.q_proj.dense_weights.shape": 1048576,
747
+ "model.layers.23.self_attn.q_proj.row_offsets.shape": 4097,
748
+ "model.layers.23.self_attn.q_proj.col_vals.shape": 286813,
749
+ "model.layers.23.self_attn.q_proj.in_perm.shape": 2048,
750
+ "model.layers.23.self_attn.k_proj.dense_weights.shape": 1048576,
751
+ "model.layers.23.self_attn.k_proj.row_offsets.shape": 4097,
752
+ "model.layers.23.self_attn.k_proj.col_vals.shape": 281775,
753
+ "model.layers.23.self_attn.k_proj.in_perm.shape": 2048,
754
+ "model.layers.23.self_attn.v_proj.dense_weights.shape": 1048576,
755
+ "model.layers.23.self_attn.v_proj.row_offsets.shape": 4097,
756
+ "model.layers.23.self_attn.v_proj.col_vals.shape": 295725,
757
+ "model.layers.23.self_attn.v_proj.in_perm.shape": 2048,
758
+ "model.layers.23.self_attn.o_proj.dense_weights.shape": 1048576,
759
+ "model.layers.23.self_attn.o_proj.row_offsets.shape": 4097,
760
+ "model.layers.23.self_attn.o_proj.col_vals.shape": 279797,
761
+ "model.layers.23.self_attn.o_proj.in_perm.shape": 2048,
762
+ "model.layers.23.mlp.gate_proj.dense_weights.shape": 2818048,
763
+ "model.layers.23.mlp.gate_proj.row_offsets.shape": 11009,
764
+ "model.layers.23.mlp.gate_proj.col_vals.shape": 753193,
765
+ "model.layers.23.mlp.gate_proj.in_perm.shape": 2048,
766
+ "model.layers.23.mlp.up_proj.dense_weights.shape": 2818048,
767
+ "model.layers.23.mlp.up_proj.row_offsets.shape": 11009,
768
+ "model.layers.23.mlp.up_proj.col_vals.shape": 758030,
769
+ "model.layers.23.mlp.up_proj.in_perm.shape": 2048,
770
+ "model.layers.23.mlp.down_proj.dense_weights.shape": 2818048,
771
+ "model.layers.23.mlp.down_proj.row_offsets.shape": 4097,
772
+ "model.layers.23.mlp.down_proj.col_vals.shape": 779986,
773
+ "model.layers.23.mlp.down_proj.in_perm.shape": 5504,
774
+ "model.layers.24.self_attn.q_proj.dense_weights.shape": 1048576,
775
+ "model.layers.24.self_attn.q_proj.row_offsets.shape": 4097,
776
+ "model.layers.24.self_attn.q_proj.col_vals.shape": 285039,
777
+ "model.layers.24.self_attn.q_proj.in_perm.shape": 2048,
778
+ "model.layers.24.self_attn.k_proj.dense_weights.shape": 1048576,
779
+ "model.layers.24.self_attn.k_proj.row_offsets.shape": 4097,
780
+ "model.layers.24.self_attn.k_proj.col_vals.shape": 277202,
781
+ "model.layers.24.self_attn.k_proj.in_perm.shape": 2048,
782
+ "model.layers.24.self_attn.v_proj.dense_weights.shape": 1048576,
783
+ "model.layers.24.self_attn.v_proj.row_offsets.shape": 4097,
784
+ "model.layers.24.self_attn.v_proj.col_vals.shape": 298680,
785
+ "model.layers.24.self_attn.v_proj.in_perm.shape": 2048,
786
+ "model.layers.24.self_attn.o_proj.dense_weights.shape": 1048576,
787
+ "model.layers.24.self_attn.o_proj.row_offsets.shape": 4097,
788
+ "model.layers.24.self_attn.o_proj.col_vals.shape": 279938,
789
+ "model.layers.24.self_attn.o_proj.in_perm.shape": 2048,
790
+ "model.layers.24.mlp.gate_proj.dense_weights.shape": 2818048,
791
+ "model.layers.24.mlp.gate_proj.row_offsets.shape": 11009,
792
+ "model.layers.24.mlp.gate_proj.col_vals.shape": 752983,
793
+ "model.layers.24.mlp.gate_proj.in_perm.shape": 2048,
794
+ "model.layers.24.mlp.up_proj.dense_weights.shape": 2818048,
795
+ "model.layers.24.mlp.up_proj.row_offsets.shape": 11009,
796
+ "model.layers.24.mlp.up_proj.col_vals.shape": 758555,
797
+ "model.layers.24.mlp.up_proj.in_perm.shape": 2048,
798
+ "model.layers.24.mlp.down_proj.dense_weights.shape": 2818048,
799
+ "model.layers.24.mlp.down_proj.row_offsets.shape": 4097,
800
+ "model.layers.24.mlp.down_proj.col_vals.shape": 779069,
801
+ "model.layers.24.mlp.down_proj.in_perm.shape": 5504,
802
+ "model.layers.25.self_attn.q_proj.dense_weights.shape": 1048576,
803
+ "model.layers.25.self_attn.q_proj.row_offsets.shape": 4097,
804
+ "model.layers.25.self_attn.q_proj.col_vals.shape": 284365,
805
+ "model.layers.25.self_attn.q_proj.in_perm.shape": 2048,
806
+ "model.layers.25.self_attn.k_proj.dense_weights.shape": 1048576,
807
+ "model.layers.25.self_attn.k_proj.row_offsets.shape": 4097,
808
+ "model.layers.25.self_attn.k_proj.col_vals.shape": 280541,
809
+ "model.layers.25.self_attn.k_proj.in_perm.shape": 2048,
810
+ "model.layers.25.self_attn.v_proj.dense_weights.shape": 1048576,
811
+ "model.layers.25.self_attn.v_proj.row_offsets.shape": 4097,
812
+ "model.layers.25.self_attn.v_proj.col_vals.shape": 293143,
813
+ "model.layers.25.self_attn.v_proj.in_perm.shape": 2048,
814
+ "model.layers.25.self_attn.o_proj.dense_weights.shape": 1048576,
815
+ "model.layers.25.self_attn.o_proj.row_offsets.shape": 4097,
816
+ "model.layers.25.self_attn.o_proj.col_vals.shape": 278790,
817
+ "model.layers.25.self_attn.o_proj.in_perm.shape": 2048,
818
+ "model.layers.25.mlp.gate_proj.dense_weights.shape": 2818048,
819
+ "model.layers.25.mlp.gate_proj.row_offsets.shape": 11009,
820
+ "model.layers.25.mlp.gate_proj.col_vals.shape": 751453,
821
+ "model.layers.25.mlp.gate_proj.in_perm.shape": 2048,
822
+ "model.layers.25.mlp.up_proj.dense_weights.shape": 2818048,
823
+ "model.layers.25.mlp.up_proj.row_offsets.shape": 11009,
824
+ "model.layers.25.mlp.up_proj.col_vals.shape": 757286,
825
+ "model.layers.25.mlp.up_proj.in_perm.shape": 2048,
826
+ "model.layers.25.mlp.down_proj.dense_weights.shape": 2818048,
827
+ "model.layers.25.mlp.down_proj.row_offsets.shape": 4097,
828
+ "model.layers.25.mlp.down_proj.col_vals.shape": 782955,
829
+ "model.layers.25.mlp.down_proj.in_perm.shape": 5504,
830
+ "model.layers.26.self_attn.q_proj.dense_weights.shape": 1048576,
831
+ "model.layers.26.self_attn.q_proj.row_offsets.shape": 4097,
832
+ "model.layers.26.self_attn.q_proj.col_vals.shape": 284330,
833
+ "model.layers.26.self_attn.q_proj.in_perm.shape": 2048,
834
+ "model.layers.26.self_attn.k_proj.dense_weights.shape": 1048576,
835
+ "model.layers.26.self_attn.k_proj.row_offsets.shape": 4097,
836
+ "model.layers.26.self_attn.k_proj.col_vals.shape": 277278,
837
+ "model.layers.26.self_attn.k_proj.in_perm.shape": 2048,
838
+ "model.layers.26.self_attn.v_proj.dense_weights.shape": 1048576,
839
+ "model.layers.26.self_attn.v_proj.row_offsets.shape": 4097,
840
+ "model.layers.26.self_attn.v_proj.col_vals.shape": 294635,
841
+ "model.layers.26.self_attn.v_proj.in_perm.shape": 2048,
842
+ "model.layers.26.self_attn.o_proj.dense_weights.shape": 1048576,
843
+ "model.layers.26.self_attn.o_proj.row_offsets.shape": 4097,
844
+ "model.layers.26.self_attn.o_proj.col_vals.shape": 275895,
845
+ "model.layers.26.self_attn.o_proj.in_perm.shape": 2048,
846
+ "model.layers.26.mlp.gate_proj.dense_weights.shape": 2818048,
847
+ "model.layers.26.mlp.gate_proj.row_offsets.shape": 11009,
848
+ "model.layers.26.mlp.gate_proj.col_vals.shape": 747723,
849
+ "model.layers.26.mlp.gate_proj.in_perm.shape": 2048,
850
+ "model.layers.26.mlp.up_proj.dense_weights.shape": 2818048,
851
+ "model.layers.26.mlp.up_proj.row_offsets.shape": 11009,
852
+ "model.layers.26.mlp.up_proj.col_vals.shape": 759279,
853
+ "model.layers.26.mlp.up_proj.in_perm.shape": 2048,
854
+ "model.layers.26.mlp.down_proj.dense_weights.shape": 2818048,
855
+ "model.layers.26.mlp.down_proj.row_offsets.shape": 4097,
856
+ "model.layers.26.mlp.down_proj.col_vals.shape": 796751,
857
+ "model.layers.26.mlp.down_proj.in_perm.shape": 5504,
858
+ "model.layers.27.self_attn.q_proj.dense_weights.shape": 1048576,
859
+ "model.layers.27.self_attn.q_proj.row_offsets.shape": 4097,
860
+ "model.layers.27.self_attn.q_proj.col_vals.shape": 282594,
861
+ "model.layers.27.self_attn.q_proj.in_perm.shape": 2048,
862
+ "model.layers.27.self_attn.k_proj.dense_weights.shape": 1048576,
863
+ "model.layers.27.self_attn.k_proj.row_offsets.shape": 4097,
864
+ "model.layers.27.self_attn.k_proj.col_vals.shape": 279388,
865
+ "model.layers.27.self_attn.k_proj.in_perm.shape": 2048,
866
+ "model.layers.27.self_attn.v_proj.dense_weights.shape": 1048576,
867
+ "model.layers.27.self_attn.v_proj.row_offsets.shape": 4097,
868
+ "model.layers.27.self_attn.v_proj.col_vals.shape": 289702,
869
+ "model.layers.27.self_attn.v_proj.in_perm.shape": 2048,
870
+ "model.layers.27.self_attn.o_proj.dense_weights.shape": 1048576,
871
+ "model.layers.27.self_attn.o_proj.row_offsets.shape": 4097,
872
+ "model.layers.27.self_attn.o_proj.col_vals.shape": 279438,
873
+ "model.layers.27.self_attn.o_proj.in_perm.shape": 2048,
874
+ "model.layers.27.mlp.gate_proj.dense_weights.shape": 2818048,
875
+ "model.layers.27.mlp.gate_proj.row_offsets.shape": 11009,
876
+ "model.layers.27.mlp.gate_proj.col_vals.shape": 744601,
877
+ "model.layers.27.mlp.gate_proj.in_perm.shape": 2048,
878
+ "model.layers.27.mlp.up_proj.dense_weights.shape": 2818048,
879
+ "model.layers.27.mlp.up_proj.row_offsets.shape": 11009,
880
+ "model.layers.27.mlp.up_proj.col_vals.shape": 760423,
881
+ "model.layers.27.mlp.up_proj.in_perm.shape": 2048,
882
+ "model.layers.27.mlp.down_proj.dense_weights.shape": 2818048,
883
+ "model.layers.27.mlp.down_proj.row_offsets.shape": 4097,
884
+ "model.layers.27.mlp.down_proj.col_vals.shape": 809936,
885
+ "model.layers.27.mlp.down_proj.in_perm.shape": 5504,
886
+ "model.layers.28.self_attn.q_proj.dense_weights.shape": 1048576,
887
+ "model.layers.28.self_attn.q_proj.row_offsets.shape": 4097,
888
+ "model.layers.28.self_attn.q_proj.col_vals.shape": 283449,
889
+ "model.layers.28.self_attn.q_proj.in_perm.shape": 2048,
890
+ "model.layers.28.self_attn.k_proj.dense_weights.shape": 1048576,
891
+ "model.layers.28.self_attn.k_proj.row_offsets.shape": 4097,
892
+ "model.layers.28.self_attn.k_proj.col_vals.shape": 280044,
893
+ "model.layers.28.self_attn.k_proj.in_perm.shape": 2048,
894
+ "model.layers.28.self_attn.v_proj.dense_weights.shape": 1048576,
895
+ "model.layers.28.self_attn.v_proj.row_offsets.shape": 4097,
896
+ "model.layers.28.self_attn.v_proj.col_vals.shape": 289314,
897
+ "model.layers.28.self_attn.v_proj.in_perm.shape": 2048,
898
+ "model.layers.28.self_attn.o_proj.dense_weights.shape": 1048576,
899
+ "model.layers.28.self_attn.o_proj.row_offsets.shape": 4097,
900
+ "model.layers.28.self_attn.o_proj.col_vals.shape": 272741,
901
+ "model.layers.28.self_attn.o_proj.in_perm.shape": 2048,
902
+ "model.layers.28.mlp.gate_proj.dense_weights.shape": 2818048,
903
+ "model.layers.28.mlp.gate_proj.row_offsets.shape": 11009,
904
+ "model.layers.28.mlp.gate_proj.col_vals.shape": 741289,
905
+ "model.layers.28.mlp.gate_proj.in_perm.shape": 2048,
906
+ "model.layers.28.mlp.up_proj.dense_weights.shape": 2818048,
907
+ "model.layers.28.mlp.up_proj.row_offsets.shape": 11009,
908
+ "model.layers.28.mlp.up_proj.col_vals.shape": 762307,
909
+ "model.layers.28.mlp.up_proj.in_perm.shape": 2048,
910
+ "model.layers.28.mlp.down_proj.dense_weights.shape": 2818048,
911
+ "model.layers.28.mlp.down_proj.row_offsets.shape": 4097,
912
+ "model.layers.28.mlp.down_proj.col_vals.shape": 825477,
913
+ "model.layers.28.mlp.down_proj.in_perm.shape": 5504,
914
+ "model.layers.29.self_attn.q_proj.dense_weights.shape": 1048576,
915
+ "model.layers.29.self_attn.q_proj.row_offsets.shape": 4097,
916
+ "model.layers.29.self_attn.q_proj.col_vals.shape": 281395,
917
+ "model.layers.29.self_attn.q_proj.in_perm.shape": 2048,
918
+ "model.layers.29.self_attn.k_proj.dense_weights.shape": 1048576,
919
+ "model.layers.29.self_attn.k_proj.row_offsets.shape": 4097,
920
+ "model.layers.29.self_attn.k_proj.col_vals.shape": 276980,
921
+ "model.layers.29.self_attn.k_proj.in_perm.shape": 2048,
922
+ "model.layers.29.self_attn.v_proj.dense_weights.shape": 1048576,
923
+ "model.layers.29.self_attn.v_proj.row_offsets.shape": 4097,
924
+ "model.layers.29.self_attn.v_proj.col_vals.shape": 292345,
925
+ "model.layers.29.self_attn.v_proj.in_perm.shape": 2048,
926
+ "model.layers.29.self_attn.o_proj.dense_weights.shape": 1048576,
927
+ "model.layers.29.self_attn.o_proj.row_offsets.shape": 4097,
928
+ "model.layers.29.self_attn.o_proj.col_vals.shape": 276014,
929
+ "model.layers.29.self_attn.o_proj.in_perm.shape": 2048,
930
+ "model.layers.29.mlp.gate_proj.dense_weights.shape": 2818048,
931
+ "model.layers.29.mlp.gate_proj.row_offsets.shape": 11009,
932
+ "model.layers.29.mlp.gate_proj.col_vals.shape": 737393,
933
+ "model.layers.29.mlp.gate_proj.in_perm.shape": 2048,
934
+ "model.layers.29.mlp.up_proj.dense_weights.shape": 2818048,
935
+ "model.layers.29.mlp.up_proj.row_offsets.shape": 11009,
936
+ "model.layers.29.mlp.up_proj.col_vals.shape": 758464,
937
+ "model.layers.29.mlp.up_proj.in_perm.shape": 2048,
938
+ "model.layers.29.mlp.down_proj.dense_weights.shape": 2818048,
939
+ "model.layers.29.mlp.down_proj.row_offsets.shape": 4097,
940
+ "model.layers.29.mlp.down_proj.col_vals.shape": 850037,
941
+ "model.layers.29.mlp.down_proj.in_perm.shape": 5504,
942
+ "model.layers.30.self_attn.q_proj.dense_weights.shape": 1048576,
943
+ "model.layers.30.self_attn.q_proj.row_offsets.shape": 4097,
944
+ "model.layers.30.self_attn.q_proj.col_vals.shape": 281230,
945
+ "model.layers.30.self_attn.q_proj.in_perm.shape": 2048,
946
+ "model.layers.30.self_attn.k_proj.dense_weights.shape": 1048576,
947
+ "model.layers.30.self_attn.k_proj.row_offsets.shape": 4097,
948
+ "model.layers.30.self_attn.k_proj.col_vals.shape": 277682,
949
+ "model.layers.30.self_attn.k_proj.in_perm.shape": 2048,
950
+ "model.layers.30.self_attn.v_proj.dense_weights.shape": 1048576,
951
+ "model.layers.30.self_attn.v_proj.row_offsets.shape": 4097,
952
+ "model.layers.30.self_attn.v_proj.col_vals.shape": 287809,
953
+ "model.layers.30.self_attn.v_proj.in_perm.shape": 2048,
954
+ "model.layers.30.self_attn.o_proj.dense_weights.shape": 1048576,
955
+ "model.layers.30.self_attn.o_proj.row_offsets.shape": 4097,
956
+ "model.layers.30.self_attn.o_proj.col_vals.shape": 277445,
957
+ "model.layers.30.self_attn.o_proj.in_perm.shape": 2048,
958
+ "model.layers.30.mlp.gate_proj.dense_weights.shape": 2818048,
959
+ "model.layers.30.mlp.gate_proj.row_offsets.shape": 11009,
960
+ "model.layers.30.mlp.gate_proj.col_vals.shape": 721613,
961
+ "model.layers.30.mlp.gate_proj.in_perm.shape": 2048,
962
+ "model.layers.30.mlp.up_proj.dense_weights.shape": 2818048,
963
+ "model.layers.30.mlp.up_proj.row_offsets.shape": 11009,
964
+ "model.layers.30.mlp.up_proj.col_vals.shape": 746115,
965
+ "model.layers.30.mlp.up_proj.in_perm.shape": 2048,
966
+ "model.layers.30.mlp.down_proj.dense_weights.shape": 2818048,
967
+ "model.layers.30.mlp.down_proj.row_offsets.shape": 4097,
968
+ "model.layers.30.mlp.down_proj.col_vals.shape": 829475,
969
+ "model.layers.30.mlp.down_proj.in_perm.shape": 5504,
970
+ "model.layers.31.self_attn.q_proj.dense_weights.shape": 1048576,
971
+ "model.layers.31.self_attn.q_proj.row_offsets.shape": 4097,
972
+ "model.layers.31.self_attn.q_proj.col_vals.shape": 277274,
973
+ "model.layers.31.self_attn.q_proj.in_perm.shape": 2048,
974
+ "model.layers.31.self_attn.k_proj.dense_weights.shape": 1048576,
975
+ "model.layers.31.self_attn.k_proj.row_offsets.shape": 4097,
976
+ "model.layers.31.self_attn.k_proj.col_vals.shape": 274320,
977
+ "model.layers.31.self_attn.k_proj.in_perm.shape": 2048,
978
+ "model.layers.31.self_attn.v_proj.dense_weights.shape": 1048576,
979
+ "model.layers.31.self_attn.v_proj.row_offsets.shape": 4097,
980
+ "model.layers.31.self_attn.v_proj.col_vals.shape": 290918,
981
+ "model.layers.31.self_attn.v_proj.in_perm.shape": 2048,
982
+ "model.layers.31.self_attn.o_proj.dense_weights.shape": 1048576,
983
+ "model.layers.31.self_attn.o_proj.row_offsets.shape": 4097,
984
+ "model.layers.31.self_attn.o_proj.col_vals.shape": 266999,
985
+ "model.layers.31.self_attn.o_proj.in_perm.shape": 2048,
986
+ "model.layers.31.mlp.gate_proj.dense_weights.shape": 2818048,
987
+ "model.layers.31.mlp.gate_proj.row_offsets.shape": 11009,
988
+ "model.layers.31.mlp.gate_proj.col_vals.shape": 741720,
989
+ "model.layers.31.mlp.gate_proj.in_perm.shape": 2048,
990
+ "model.layers.31.mlp.up_proj.dense_weights.shape": 2818048,
991
+ "model.layers.31.mlp.up_proj.row_offsets.shape": 11009,
992
+ "model.layers.31.mlp.up_proj.col_vals.shape": 761614,
993
+ "model.layers.31.mlp.up_proj.in_perm.shape": 2048,
994
+ "model.layers.31.mlp.down_proj.dense_weights.shape": 2818048,
995
+ "model.layers.31.mlp.down_proj.row_offsets.shape": 4097,
996
+ "model.layers.31.mlp.down_proj.col_vals.shape": 997160,
997
+ "model.layers.31.mlp.down_proj.in_perm.shape": 5504
998
+ }
999
+ },
1000
+ "_attn_implementation_autoset": false
1001
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b3e5960eeb8939e99c68c9b69ad6b1f2eb4b0892fb1b7c4135502ee9d11680f
3
+ size 4209228916
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": null,
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
+ }