Kearm commited on
Commit
5bd14ea
1 Parent(s): 0e611ba

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. README.md +487 -0
  3. added_tokens.json +24 -0
  4. checkpoint-210/added_tokens.json +24 -0
  5. checkpoint-210/config.json +28 -0
  6. checkpoint-210/generation_config.json +7 -0
  7. checkpoint-210/global_step210/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  8. checkpoint-210/global_step210/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  9. checkpoint-210/global_step210/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  10. checkpoint-210/global_step210/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  11. checkpoint-210/global_step210/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  12. checkpoint-210/global_step210/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  13. checkpoint-210/global_step210/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  14. checkpoint-210/global_step210/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  15. checkpoint-210/global_step210/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  16. checkpoint-210/global_step210/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  17. checkpoint-210/global_step210/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  18. checkpoint-210/global_step210/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  19. checkpoint-210/global_step210/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  20. checkpoint-210/global_step210/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  21. checkpoint-210/global_step210/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  22. checkpoint-210/global_step210/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  23. checkpoint-210/latest +1 -0
  24. checkpoint-210/merges.txt +0 -0
  25. checkpoint-210/model-00001-of-00031.safetensors +3 -0
  26. checkpoint-210/model-00002-of-00031.safetensors +3 -0
  27. checkpoint-210/model-00003-of-00031.safetensors +3 -0
  28. checkpoint-210/model-00004-of-00031.safetensors +3 -0
  29. checkpoint-210/model-00005-of-00031.safetensors +3 -0
  30. checkpoint-210/model-00006-of-00031.safetensors +3 -0
  31. checkpoint-210/model-00007-of-00031.safetensors +3 -0
  32. checkpoint-210/model-00008-of-00031.safetensors +3 -0
  33. checkpoint-210/model-00009-of-00031.safetensors +3 -0
  34. checkpoint-210/model-00010-of-00031.safetensors +3 -0
  35. checkpoint-210/model-00011-of-00031.safetensors +3 -0
  36. checkpoint-210/model-00012-of-00031.safetensors +3 -0
  37. checkpoint-210/model-00013-of-00031.safetensors +3 -0
  38. checkpoint-210/model-00014-of-00031.safetensors +3 -0
  39. checkpoint-210/model-00015-of-00031.safetensors +3 -0
  40. checkpoint-210/model-00016-of-00031.safetensors +3 -0
  41. checkpoint-210/model-00017-of-00031.safetensors +3 -0
  42. checkpoint-210/model-00018-of-00031.safetensors +3 -0
  43. checkpoint-210/model-00019-of-00031.safetensors +3 -0
  44. checkpoint-210/model-00020-of-00031.safetensors +3 -0
  45. checkpoint-210/model-00021-of-00031.safetensors +3 -0
  46. checkpoint-210/model-00022-of-00031.safetensors +3 -0
  47. checkpoint-210/model-00023-of-00031.safetensors +3 -0
  48. checkpoint-210/model-00024-of-00031.safetensors +3 -0
  49. checkpoint-210/model-00025-of-00031.safetensors +3 -0
  50. checkpoint-210/model-00026-of-00031.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-210/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ base_model: Qwen/Qwen2.5-72B
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: EVA-Qwen2.5-72B-SFFT-v0.0
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
16
+ <details><summary>See axolotl config</summary>
17
+
18
+ axolotl version: `0.4.1`
19
+ ```yaml
20
+ base_model: Qwen/Qwen2.5-72B
21
+
22
+ load_in_8bit: false
23
+ load_in_4bit: false
24
+ strict: false
25
+
26
+ plugins:
27
+ - axolotl.integrations.liger.LigerPlugin
28
+ liger_rope: true
29
+ liger_rms_norm: true
30
+ liger_swiglu: false
31
+ liger_fused_linear_cross_entropy: false
32
+
33
+ # plugins:
34
+ # - axolotl.integrations.spectrum.SpectrumPlugin
35
+
36
+ # spectrum_top_fraction: 0.5
37
+ # # Optional if using a pre-scanned model as your base_model. Useful if using a model mirror
38
+ # spectrum_model_name: Qwen/Qwen2.5-32B
39
+
40
+ datasets:
41
+ - path: datasets/deduped_Synthstruct-Gens_processed_sharegpt_converted_cleaned.jsonl
42
+ type: sharegpt
43
+ - path: datasets/opus-instruct-22k-no_refusals-filtered.jsonl
44
+ type: sharegpt
45
+ - path: datasets/Celeste_Filtered.jsonl
46
+ type: sharegpt
47
+ - path: datasets/Gryphe-S3-5-Charcards-names-2k.jsonl
48
+ type: sharegpt
49
+ - path: datasets/deduped_SynthRP-Gens_processed_09-25-2024-ShareGPT_converted_cleaned.jsonl
50
+ type: sharegpt
51
+ - path: datasets/deduped_Gryphe-4o-WP-1k.jsonl
52
+ type: sharegpt
53
+ - path: datasets/deduped_not_samantha_norefusals.jsonl
54
+ type: sharegpt
55
+
56
+ chat_template: chatml
57
+ shuffle_merged_datasets: true
58
+ val_set_size: 0.001
59
+ output_dir: ./EVA-Qwen2.5-72B-SFFT-v0.0
60
+
61
+ sequence_len: 8192
62
+ sample_packing: true
63
+ eval_sample_packing: false
64
+ pad_to_sequence_len: true
65
+
66
+ # adapter: qlora
67
+ # lora_model_dir:
68
+ # lora_r: 64
69
+ # lora_alpha: 128
70
+ # lora_dropout: 0.05
71
+ # lora_target_linear: true
72
+ # peft_use_dora: true
73
+
74
+ unfrozen_parameters:
75
+ - ^lm_head.weight$
76
+ - ^model.embed_tokens.weight$
77
+ # mlp.down_proj layers
78
+ - model.layers.62.mlp.down_proj
79
+ - model.layers.64.mlp.down_proj
80
+ - model.layers.63.mlp.down_proj
81
+ - model.layers.66.mlp.down_proj
82
+ - model.layers.65.mlp.down_proj
83
+ - model.layers.67.mlp.down_proj
84
+ - model.layers.68.mlp.down_proj
85
+ - model.layers.31.mlp.down_proj
86
+ - model.layers.60.mlp.down_proj
87
+ - model.layers.69.mlp.down_proj
88
+ - model.layers.61.mlp.down_proj
89
+ - model.layers.59.mlp.down_proj
90
+ - model.layers.30.mlp.down_proj
91
+ - model.layers.70.mlp.down_proj
92
+ - model.layers.32.mlp.down_proj
93
+ - model.layers.34.mlp.down_proj
94
+ - model.layers.33.mlp.down_proj
95
+ - model.layers.76.mlp.down_proj
96
+ - model.layers.72.mlp.down_proj
97
+ - model.layers.71.mlp.down_proj
98
+ - model.layers.58.mlp.down_proj
99
+ - model.layers.75.mlp.down_proj
100
+ - model.layers.29.mlp.down_proj
101
+ - model.layers.56.mlp.down_proj
102
+ - model.layers.26.mlp.down_proj
103
+ - model.layers.35.mlp.down_proj
104
+ - model.layers.28.mlp.down_proj
105
+ - model.layers.57.mlp.down_proj
106
+ - model.layers.77.mlp.down_proj
107
+ - model.layers.36.mlp.down_proj
108
+ - model.layers.27.mlp.down_proj
109
+ - model.layers.25.mlp.down_proj
110
+ - model.layers.78.mlp.down_proj
111
+ - model.layers.37.mlp.down_proj
112
+ - model.layers.73.mlp.down_proj
113
+ - model.layers.55.mlp.down_proj
114
+ - model.layers.54.mlp.down_proj
115
+ - model.layers.74.mlp.down_proj
116
+ - model.layers.24.mlp.down_proj
117
+ - model.layers.53.mlp.down_proj
118
+ # mlp.gate_proj layers
119
+ - model.layers.78.mlp.gate_proj
120
+ - model.layers.77.mlp.gate_proj
121
+ - model.layers.76.mlp.gate_proj
122
+ - model.layers.79.mlp.gate_proj
123
+ - model.layers.75.mlp.gate_proj
124
+ - model.layers.74.mlp.gate_proj
125
+ - model.layers.73.mlp.gate_proj
126
+ - model.layers.72.mlp.gate_proj
127
+ - model.layers.71.mlp.gate_proj
128
+ - model.layers.70.mlp.gate_proj
129
+ - model.layers.69.mlp.gate_proj
130
+ - model.layers.57.mlp.gate_proj
131
+ - model.layers.54.mlp.gate_proj
132
+ - model.layers.55.mlp.gate_proj
133
+ - model.layers.68.mlp.gate_proj
134
+ - model.layers.63.mlp.gate_proj
135
+ - model.layers.53.mlp.gate_proj
136
+ - model.layers.44.mlp.gate_proj
137
+ - model.layers.45.mlp.gate_proj
138
+ - model.layers.49.mlp.gate_proj
139
+ - model.layers.58.mlp.gate_proj
140
+ - model.layers.46.mlp.gate_proj
141
+ - model.layers.56.mlp.gate_proj
142
+ - model.layers.67.mlp.gate_proj
143
+ - model.layers.62.mlp.gate_proj
144
+ - model.layers.50.mlp.gate_proj
145
+ - model.layers.64.mlp.gate_proj
146
+ - model.layers.52.mlp.gate_proj
147
+ - model.layers.40.mlp.gate_proj
148
+ - model.layers.43.mlp.gate_proj
149
+ - model.layers.48.mlp.gate_proj
150
+ - model.layers.66.mlp.gate_proj
151
+ - model.layers.47.mlp.gate_proj
152
+ - model.layers.59.mlp.gate_proj
153
+ - model.layers.65.mlp.gate_proj
154
+ - model.layers.61.mlp.gate_proj
155
+ - model.layers.60.mlp.gate_proj
156
+ - model.layers.42.mlp.gate_proj
157
+ - model.layers.51.mlp.gate_proj
158
+ - model.layers.41.mlp.gate_proj
159
+ # mlp.up_proj layers
160
+ - model.layers.70.mlp.up_proj
161
+ - model.layers.69.mlp.up_proj
162
+ - model.layers.71.mlp.up_proj
163
+ - model.layers.68.mlp.up_proj
164
+ - model.layers.72.mlp.up_proj
165
+ - model.layers.67.mlp.up_proj
166
+ - model.layers.66.mlp.up_proj
167
+ - model.layers.73.mlp.up_proj
168
+ - model.layers.46.mlp.up_proj
169
+ - model.layers.63.mlp.up_proj
170
+ - model.layers.75.mlp.up_proj
171
+ - model.layers.76.mlp.up_proj
172
+ - model.layers.74.mlp.up_proj
173
+ - model.layers.45.mlp.up_proj
174
+ - model.layers.62.mlp.up_proj
175
+ - model.layers.64.mlp.up_proj
176
+ - model.layers.65.mlp.up_proj
177
+ - model.layers.44.mlp.up_proj
178
+ - model.layers.53.mlp.up_proj
179
+ - model.layers.47.mlp.up_proj
180
+ - model.layers.49.mlp.up_proj
181
+ - model.layers.48.mlp.up_proj
182
+ - model.layers.57.mlp.up_proj
183
+ - model.layers.43.mlp.up_proj
184
+ - model.layers.42.mlp.up_proj
185
+ - model.layers.56.mlp.up_proj
186
+ - model.layers.61.mlp.up_proj
187
+ - model.layers.54.mlp.up_proj
188
+ - model.layers.40.mlp.up_proj
189
+ - model.layers.55.mlp.up_proj
190
+ - model.layers.77.mlp.up_proj
191
+ - model.layers.60.mlp.up_proj
192
+ - model.layers.41.mlp.up_proj
193
+ - model.layers.35.mlp.up_proj
194
+ - model.layers.37.mlp.up_proj
195
+ - model.layers.58.mlp.up_proj
196
+ - model.layers.34.mlp.up_proj
197
+ - model.layers.38.mlp.up_proj
198
+ - model.layers.33.mlp.up_proj
199
+ - model.layers.39.mlp.up_proj
200
+ # self_attn.k_proj layers
201
+ - model.layers.36.self_attn.k_proj
202
+ - model.layers.79.self_attn.k_proj
203
+ - model.layers.35.self_attn.k_proj
204
+ - model.layers.34.self_attn.k_proj
205
+ - model.layers.37.self_attn.k_proj
206
+ - model.layers.33.self_attn.k_proj
207
+ - model.layers.38.self_attn.k_proj
208
+ - model.layers.39.self_attn.k_proj
209
+ - model.layers.74.self_attn.k_proj
210
+ - model.layers.77.self_attn.k_proj
211
+ - model.layers.41.self_attn.k_proj
212
+ - model.layers.69.self_attn.k_proj
213
+ - model.layers.32.self_attn.k_proj
214
+ - model.layers.78.self_attn.k_proj
215
+ - model.layers.30.self_attn.k_proj
216
+ - model.layers.70.self_attn.k_proj
217
+ - model.layers.25.self_attn.k_proj
218
+ - model.layers.42.self_attn.k_proj
219
+ - model.layers.29.self_attn.k_proj
220
+ - model.layers.31.self_attn.k_proj
221
+ - model.layers.68.self_attn.k_proj
222
+ - model.layers.66.self_attn.k_proj
223
+ - model.layers.22.self_attn.k_proj
224
+ - model.layers.65.self_attn.k_proj
225
+ - model.layers.44.self_attn.k_proj
226
+ - model.layers.40.self_attn.k_proj
227
+ - model.layers.63.self_attn.k_proj
228
+ - model.layers.23.self_attn.k_proj
229
+ - model.layers.28.self_attn.k_proj
230
+ - model.layers.24.self_attn.k_proj
231
+ - model.layers.26.self_attn.k_proj
232
+ - model.layers.67.self_attn.k_proj
233
+ - model.layers.75.self_attn.k_proj
234
+ - model.layers.27.self_attn.k_proj
235
+ - model.layers.57.self_attn.k_proj
236
+ - model.layers.64.self_attn.k_proj
237
+ - model.layers.71.self_attn.k_proj
238
+ - model.layers.61.self_attn.k_proj
239
+ - model.layers.72.self_attn.k_proj
240
+ - model.layers.73.self_attn.k_proj
241
+ # self_attn.o_proj layers
242
+ - model.layers.69.self_attn.o_proj
243
+ - model.layers.39.self_attn.o_proj
244
+ - model.layers.16.self_attn.o_proj
245
+ - model.layers.14.self_attn.o_proj
246
+ - model.layers.19.self_attn.o_proj
247
+ - model.layers.42.self_attn.o_proj
248
+ - model.layers.12.self_attn.o_proj
249
+ - model.layers.15.self_attn.o_proj
250
+ - model.layers.17.self_attn.o_proj
251
+ - model.layers.38.self_attn.o_proj
252
+ - model.layers.23.self_attn.o_proj
253
+ - model.layers.22.self_attn.o_proj
254
+ - model.layers.13.self_attn.o_proj
255
+ - model.layers.29.self_attn.o_proj
256
+ - model.layers.41.self_attn.o_proj
257
+ - model.layers.44.self_attn.o_proj
258
+ - model.layers.46.self_attn.o_proj
259
+ - model.layers.45.self_attn.o_proj
260
+ - model.layers.43.self_attn.o_proj
261
+ - model.layers.49.self_attn.o_proj
262
+ - model.layers.30.self_attn.o_proj
263
+ - model.layers.26.self_attn.o_proj
264
+ - model.layers.25.self_attn.o_proj
265
+ - model.layers.37.self_attn.o_proj
266
+ - model.layers.47.self_attn.o_proj
267
+ - model.layers.11.self_attn.o_proj
268
+ - model.layers.18.self_attn.o_proj
269
+ - model.layers.28.self_attn.o_proj
270
+ - model.layers.20.self_attn.o_proj
271
+ - model.layers.27.self_attn.o_proj
272
+ - model.layers.53.self_attn.o_proj
273
+ - model.layers.52.self_attn.o_proj
274
+ - model.layers.35.self_attn.o_proj
275
+ - model.layers.71.self_attn.o_proj
276
+ - model.layers.10.self_attn.o_proj
277
+ - model.layers.3.self_attn.o_proj
278
+ - model.layers.21.self_attn.o_proj
279
+ - model.layers.24.self_attn.o_proj
280
+ - model.layers.68.self_attn.o_proj
281
+ - model.layers.48.self_attn.o_proj
282
+ # self_attn.q_proj layers
283
+ - model.layers.1.self_attn.q_proj
284
+ - model.layers.2.self_attn.q_proj
285
+ - model.layers.3.self_attn.q_proj
286
+ - model.layers.0.self_attn.q_proj
287
+ - model.layers.5.self_attn.q_proj
288
+ - model.layers.4.self_attn.q_proj
289
+ - model.layers.6.self_attn.q_proj
290
+ - model.layers.8.self_attn.q_proj
291
+ - model.layers.7.self_attn.q_proj
292
+ - model.layers.9.self_attn.q_proj
293
+ - model.layers.10.self_attn.q_proj
294
+ - model.layers.68.self_attn.q_proj
295
+ - model.layers.25.self_attn.q_proj
296
+ - model.layers.12.self_attn.q_proj
297
+ - model.layers.54.self_attn.q_proj
298
+ - model.layers.55.self_attn.q_proj
299
+ - model.layers.61.self_attn.q_proj
300
+ - model.layers.18.self_attn.q_proj
301
+ - model.layers.49.self_attn.q_proj
302
+ - model.layers.66.self_attn.q_proj
303
+ - model.layers.72.self_attn.q_proj
304
+ - model.layers.11.self_attn.q_proj
305
+ - model.layers.52.self_attn.q_proj
306
+ - model.layers.64.self_attn.q_proj
307
+ - model.layers.15.self_attn.q_proj
308
+ - model.layers.60.self_attn.q_proj
309
+ - model.layers.50.self_attn.q_proj
310
+ - model.layers.59.self_attn.q_proj
311
+ - model.layers.53.self_attn.q_proj
312
+ - model.layers.48.self_attn.q_proj
313
+ - model.layers.57.self_attn.q_proj
314
+ - model.layers.70.self_attn.q_proj
315
+ - model.layers.17.self_attn.q_proj
316
+ - model.layers.67.self_attn.q_proj
317
+ - model.layers.71.self_attn.q_proj
318
+ - model.layers.62.self_attn.q_proj
319
+ - model.layers.51.self_attn.q_proj
320
+ - model.layers.19.self_attn.q_proj
321
+ - model.layers.58.self_attn.q_proj
322
+ - model.layers.13.self_attn.q_proj
323
+ # self_attn.v_proj layers
324
+ - model.layers.23.self_attn.v_proj
325
+ - model.layers.25.self_attn.v_proj
326
+ - model.layers.26.self_attn.v_proj
327
+ - model.layers.27.self_attn.v_proj
328
+ - model.layers.28.self_attn.v_proj
329
+ - model.layers.29.self_attn.v_proj
330
+ - model.layers.30.self_attn.v_proj
331
+ - model.layers.31.self_attn.v_proj
332
+ - model.layers.34.self_attn.v_proj
333
+ - model.layers.35.self_attn.v_proj
334
+ - model.layers.36.self_attn.v_proj
335
+ - model.layers.37.self_attn.v_proj
336
+ - model.layers.38.self_attn.v_proj
337
+ - model.layers.42.self_attn.v_proj
338
+ - model.layers.48.self_attn.v_proj
339
+ - model.layers.57.self_attn.v_proj
340
+ - model.layers.58.self_attn.v_proj
341
+ - model.layers.61.self_attn.v_proj
342
+ - model.layers.63.self_attn.v_proj
343
+ - model.layers.64.self_attn.v_proj
344
+ - model.layers.65.self_attn.v_proj
345
+ - model.layers.66.self_attn.v_proj
346
+ - model.layers.69.self_attn.v_proj
347
+ - model.layers.70.self_attn.v_proj
348
+ - model.layers.74.self_attn.v_proj
349
+ - model.layers.75.self_attn.v_proj
350
+ - model.layers.72.self_attn.v_proj
351
+ - model.layers.39.self_attn.v_proj
352
+ - model.layers.41.self_attn.v_proj
353
+ - model.layers.40.self_attn.v_proj
354
+ - model.layers.33.self_attn.v_proj
355
+ - model.layers.59.self_attn.v_proj
356
+ - model.layers.16.self_attn.v_proj
357
+ - model.layers.15.self_attn.v_proj
358
+ - model.layers.76.self_attn.v_proj
359
+ - model.layers.24.self_attn.v_proj
360
+ - model.layers.68.self_attn.v_proj
361
+ - model.layers.67.self_attn.v_proj
362
+ - model.layers.55.self_attn.v_proj
363
+ - model.layers.44.self_attn.v_proj
364
+
365
+
366
+ wandb_project: EVA-Qwen2.5-72B-SFFT-v0.0
367
+ wandb_entity:
368
+ wandb_watch:
369
+ wandb_name: Unit-00
370
+ wandb_log_model:
371
+
372
+ gradient_accumulation_steps: 4
373
+ micro_batch_size: 4
374
+ num_epochs: 3
375
+ optimizer: paged_adamw_8bit
376
+ lr_scheduler: cosine
377
+ learning_rate: 0.00005
378
+ max_grad_norm: 3
379
+
380
+ train_on_inputs: false
381
+ group_by_length: false
382
+ bf16: auto
383
+ fp16:
384
+ tf32: false
385
+
386
+ gradient_checkpointing: "unsloth"
387
+ # gradient_checkpointing_kwargs:
388
+ # use_reentrant: true
389
+ early_stopping_patience:
390
+ resume_from_checkpoint:
391
+ local_rank:
392
+ logging_steps: 1
393
+ xformers_attention:
394
+ flash_attention: true
395
+
396
+ warmup_steps: 20
397
+ evals_per_epoch: 4
398
+ saves_per_epoch: 2
399
+ save_total_limit: 1
400
+ save_safetensors: true
401
+ hub_model_id:
402
+ hub_strategy:
403
+ debug:
404
+ deepspeed: deepspeed_configs/zero3_bf16.json
405
+ weight_decay: 0.1
406
+ # fsdp:
407
+ # - full_shard
408
+ # - auto_wrap
409
+ # fsdp_config:
410
+ # fsdp_limit_all_gathers: true
411
+ # fsdp_sync_module_states: false
412
+ # fsdp_offload_params: true
413
+ # fsdp_cpu_ram_efficient_loading: true
414
+ # fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
415
+ # fsdp_transformer_layer_cls_to_wrap: Qwen2DecoderLayer
416
+ # fsdp_activation_checkpointing: true
417
+ # fsdp_state_dict_type: SHARDED_STATE_DICT # Changed from FULL_STATE_DICT
418
+ # fsdp_sharding_strategy: FULL_SHARD
419
+ # fsdp_forward_prefetch: false # Added
420
+ # fsdp_backward_prefetch: "BACKWARD_PRE" # Added
421
+ # fsdp_backward_prefetch_limit: 1 # Added
422
+ # fsdp_mixed_precision: BF16 # Added
423
+ ```
424
+
425
+ </details><br>
426
+
427
+ # EVA-Qwen2.5-72B-SFFT-v0.0
428
+
429
+ This model is a fine-tuned version of [Qwen/Qwen2.5-72B](https://huggingface.co/Qwen/Qwen2.5-72B) on the None dataset.
430
+ It achieves the following results on the evaluation set:
431
+ - Loss: 3.2818
432
+
433
+ ## Model description
434
+
435
+ More information needed
436
+
437
+ ## Intended uses & limitations
438
+
439
+ More information needed
440
+
441
+ ## Training and evaluation data
442
+
443
+ More information needed
444
+
445
+ ## Training procedure
446
+
447
+ ### Training hyperparameters
448
+
449
+ The following hyperparameters were used during training:
450
+ - learning_rate: 5e-05
451
+ - train_batch_size: 4
452
+ - eval_batch_size: 4
453
+ - seed: 42
454
+ - distributed_type: multi-GPU
455
+ - num_devices: 8
456
+ - gradient_accumulation_steps: 4
457
+ - total_train_batch_size: 128
458
+ - total_eval_batch_size: 32
459
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
460
+ - lr_scheduler_type: cosine
461
+ - lr_scheduler_warmup_steps: 20
462
+ - num_epochs: 3
463
+
464
+ ### Training results
465
+
466
+ | Training Loss | Epoch | Step | Validation Loss |
467
+ |:-------------:|:------:|:----:|:---------------:|
468
+ | 1.3286 | 0.0142 | 1 | 2.9734 |
469
+ | 1.0713 | 0.2562 | 18 | 3.7951 |
470
+ | 0.9051 | 0.5125 | 36 | 3.3342 |
471
+ | 0.8746 | 0.7687 | 54 | 3.2625 |
472
+ | 0.6216 | 1.0214 | 72 | 3.2244 |
473
+ | 0.6158 | 1.2786 | 90 | 3.2810 |
474
+ | 0.57 | 1.5357 | 108 | 3.2375 |
475
+ | 0.5213 | 1.7929 | 126 | 3.1606 |
476
+ | 0.3178 | 2.0427 | 144 | 3.2384 |
477
+ | 0.2809 | 2.2989 | 162 | 3.2971 |
478
+ | 0.3067 | 2.5552 | 180 | 3.2886 |
479
+ | 0.3005 | 2.8114 | 198 | 3.2818 |
480
+
481
+
482
+ ### Framework versions
483
+
484
+ - Transformers 4.45.2
485
+ - Pytorch 2.5.0+rocm6.1
486
+ - Datasets 3.0.1
487
+ - Tokenizers 0.20.1
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
checkpoint-210/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
checkpoint-210/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2.5-72B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151643,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 8192,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 29568,
12
+ "max_position_embeddings": 131072,
13
+ "max_window_layers": 80,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 64,
16
+ "num_hidden_layers": 80,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.45.2",
25
+ "use_cache": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 152064
28
+ }
checkpoint-210/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": 151643,
5
+ "max_new_tokens": 2048,
6
+ "transformers_version": "4.45.2"
7
+ }
checkpoint-210/global_step210/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:996996bf77b008c1cf816967f546ae4d58a161f0574c4b831730fd6bdabb3f2b
3
+ size 28217893566
checkpoint-210/global_step210/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07b7720fd62ea3d9fd38ba3347f230ad70a37f7f1448124d5d9418b6b5a5b81e
3
+ size 28217893566
checkpoint-210/global_step210/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b7bb50d7da191b5ede17fbbf0ba5b68bc10003bbda9d2d99e5d1235cd81ebf1
3
+ size 28217893566
checkpoint-210/global_step210/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e3c195778543bb9427ebb28233e48bdb8d86adff030caf0ab62a39bf6e9946
3
+ size 28217893566
checkpoint-210/global_step210/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c931625fe6f16dd569c3737c714b5c839616aac1767ca9a19eea00c85194c11
3
+ size 28217893566
checkpoint-210/global_step210/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2297ae10f0f7eeea0dcd86088ec0cda46a44b0125e57e88c1c316f93fdde309c
3
+ size 28217893566
checkpoint-210/global_step210/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a51a7080bb23e55d1f2589df5d362cd907f51dc7abd039932a1991488c48cc73
3
+ size 28217893566
checkpoint-210/global_step210/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88259fcb8a114dedca334faa14808af0f4560830142b7cfec822dc746f44a7c1
3
+ size 28217893566
checkpoint-210/global_step210/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b162e206f7251f827b67cb173fb501024809de3497fa05d53f8f615b5a82d42
3
+ size 8777680678
checkpoint-210/global_step210/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:657ad4d79f24ad9e170bc91e8843c98d231edf48fdf4abfe3d63df949fed5d8c
3
+ size 8777680678
checkpoint-210/global_step210/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f86a083cce3726ada9e1e5fdf2edaab0e002a59ec50e8653c44f32457410106
3
+ size 8777680678
checkpoint-210/global_step210/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd8fe0a823ac717272ead9aa953d36f3fde56804ce5fed77d0012f37c7d4111
3
+ size 8777680678
checkpoint-210/global_step210/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ae1b3c71aa81e4f2650aec3530817e3a8dda15a48e4012e10fc7dba9ea297a
3
+ size 8777680678
checkpoint-210/global_step210/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95ad3ab64eb05cbb0bd66e4e9d34bdbf8bfbe5140691cc5f9f8419a0879fa31
3
+ size 8777680678
checkpoint-210/global_step210/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6771a9ead7f8f61ce5cfec50d42eec77685c6170d65f407872012d3abd5011c2
3
+ size 8777680678
checkpoint-210/global_step210/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9bf20d39559d14cff2514f7767a178eebdcf6c9b1ca0692b073ff01edf3f106
3
+ size 8777680678
checkpoint-210/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step210
checkpoint-210/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-210/model-00001-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6283b95883174a0010d6b08b70fb07cca5424410f975b0c449abacbefea69486
3
+ size 4548798728
checkpoint-210/model-00002-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:354caf94a38818ab94f16778248a18b6b49954dd72df15ac69efe24fd3872db2
3
+ size 4964101384
checkpoint-210/model-00003-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90447f72054bfcaef625a197ed1f21da1fe5ffc2f92bd0b51098bce3b1e420b2
3
+ size 4781637328
checkpoint-210/model-00004-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89991e9eee27a74f4ad99a94c91b20ac247905e7a0e76cdd8bdfd01f146f820f
3
+ size 4781670320
checkpoint-210/model-00005-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d2ffd282d745b824836867702f1a7ada2feef1fe12f5772144af55e3411844e
3
+ size 4781670360
checkpoint-210/model-00006-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6019601b99d56674855e304b7214a7e6a69f1167af401ba80d20627e0fbfef
3
+ size 4964101416
checkpoint-210/model-00007-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8684a1ea1f4ebd624e6f25f5b44ad73b23699a441d59180eddccaa0716ab5ec8
3
+ size 4781637360
checkpoint-210/model-00008-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b77a90c689a4cf985f7e99a6e3a8fc3f9eb6a02839170da767693e3bb20053
3
+ size 4781670360
checkpoint-210/model-00009-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6301a54cc2e7b9219e03029cda29df8201c7ee0475a78ad5ccfeacd048d4d9ec
3
+ size 4781670360
checkpoint-210/model-00010-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:451e6d5280429ec0dbf21235c37aaa5fb8cfe7d368af6c6641432491d0edc6ad
3
+ size 4964101416
checkpoint-210/model-00011-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d9c2032ec85f3d67a2fbd2b33f14442dea92af54e7e8e3bf5f46cb6755290b1
3
+ size 4781637360
checkpoint-210/model-00012-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7aafa00eee2c10ddd1aa52d1aef0d63f99d55297238938060baf0e7fe7d82bb
3
+ size 4781670360
checkpoint-210/model-00013-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adb3278eb2f0a9b369b622fd9dec997bb765da3039bbbe72a819703a721b6a81
3
+ size 4781670360
checkpoint-210/model-00014-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:733511c0c942894a39ab560b432b3016fea7e80a32fdbec4438f1163656001e5
3
+ size 4964101416
checkpoint-210/model-00015-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:111fcfbf7cd24bf0fe9c0f74ad73283b8c047043024360ae27db12483e755935
3
+ size 4781637360
checkpoint-210/model-00016-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89de2ba64909565266586d8af5c0551401073dbc6af8a223746b25e3af2d418
3
+ size 4781670360
checkpoint-210/model-00017-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de3aea5b0576779fd947ffdd706f1a7e1de9e76590121abcd8d5f79e414a5f2
3
+ size 4781670360
checkpoint-210/model-00018-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c140753d668bb459e937c069df58f14d47a3bf9a619d0a93ccd5df44abdd6e99
3
+ size 4964101416
checkpoint-210/model-00019-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76140cd75e3a027aa2ecefaf1584ac178c0d51a8b4af920394ea6b30fe280d6c
3
+ size 4781637360
checkpoint-210/model-00020-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e27c57e22d9f2e5d4b6768e4b68cdb3c956ec8acd408147b4f59641472c92b65
3
+ size 4781670360
checkpoint-210/model-00021-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a50d899e73850db4244b7350a97f7ab79cfca829ab911d3613a610b0414f3e57
3
+ size 4781670360
checkpoint-210/model-00022-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:874fada9d4d34248e77d301b593a4f26358417682670839df3630d07984f3c16
3
+ size 4964101416
checkpoint-210/model-00023-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e262581bfb84ca6009a351a8a8c6ac3c3fa97fa49035f2ac434259891fa38e26
3
+ size 4781637360
checkpoint-210/model-00024-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b123a23395ec5cfd2ca1a916bbc16f1e700064ea4ec8fc25f6f7b434449e3ba
3
+ size 4781670360
checkpoint-210/model-00025-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f2a37ccb426f7904efc4cf0b6ee4c1322672c5485ac1fbfc0df3fca9aec3cde
3
+ size 4781670360
checkpoint-210/model-00026-of-00031.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48424fb8b57eb87235a584d0cb572a9f79144786cb8f2c98a5ee701a2e44cb6a
3
+ size 4964101416