Training in progress, step 666, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cc5125f76d6dbaaf8f53b7058f9db944f682d3b54268cdcab102643bbb5c715
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2353d647b00d14aca06f778f9d43a65a8201b5792af6fb89150d357af16ee31c
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4dfc30b5618d8f9126fd758d49456abcb3bac7a76ca1747eea78894ae958013
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4151,6 +4151,524 @@
|
|
4151 |
"learning_rate": 7.866831072749692e-06,
|
4152 |
"loss": 1.0413,
|
4153 |
"step": 592
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4154 |
}
|
4155 |
],
|
4156 |
"logging_steps": 1,
|
@@ -4170,7 +4688,7 @@
|
|
4170 |
"attributes": {}
|
4171 |
}
|
4172 |
},
|
4173 |
-
"total_flos": 6.
|
4174 |
"train_batch_size": 4,
|
4175 |
"trial_name": null,
|
4176 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7308641975308642,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 666,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4151 |
"learning_rate": 7.866831072749692e-06,
|
4152 |
"loss": 1.0413,
|
4153 |
"step": 592
|
4154 |
+
},
|
4155 |
+
{
|
4156 |
+
"epoch": 0.6507544581618656,
|
4157 |
+
"grad_norm": 0.13166101276874542,
|
4158 |
+
"learning_rate": 7.842170160295932e-06,
|
4159 |
+
"loss": 1.0842,
|
4160 |
+
"step": 593
|
4161 |
+
},
|
4162 |
+
{
|
4163 |
+
"epoch": 0.6518518518518519,
|
4164 |
+
"grad_norm": 0.13434740900993347,
|
4165 |
+
"learning_rate": 7.817509247842171e-06,
|
4166 |
+
"loss": 1.1033,
|
4167 |
+
"step": 594
|
4168 |
+
},
|
4169 |
+
{
|
4170 |
+
"epoch": 0.6529492455418381,
|
4171 |
+
"grad_norm": 0.1200101375579834,
|
4172 |
+
"learning_rate": 7.79284833538841e-06,
|
4173 |
+
"loss": 1.08,
|
4174 |
+
"step": 595
|
4175 |
+
},
|
4176 |
+
{
|
4177 |
+
"epoch": 0.6540466392318244,
|
4178 |
+
"grad_norm": 0.13683106005191803,
|
4179 |
+
"learning_rate": 7.768187422934649e-06,
|
4180 |
+
"loss": 1.128,
|
4181 |
+
"step": 596
|
4182 |
+
},
|
4183 |
+
{
|
4184 |
+
"epoch": 0.6551440329218107,
|
4185 |
+
"grad_norm": 0.13702082633972168,
|
4186 |
+
"learning_rate": 7.743526510480888e-06,
|
4187 |
+
"loss": 1.1325,
|
4188 |
+
"step": 597
|
4189 |
+
},
|
4190 |
+
{
|
4191 |
+
"epoch": 0.656241426611797,
|
4192 |
+
"grad_norm": 0.13655568659305573,
|
4193 |
+
"learning_rate": 7.718865598027127e-06,
|
4194 |
+
"loss": 1.0282,
|
4195 |
+
"step": 598
|
4196 |
+
},
|
4197 |
+
{
|
4198 |
+
"epoch": 0.6573388203017833,
|
4199 |
+
"grad_norm": 0.11861226707696915,
|
4200 |
+
"learning_rate": 7.694204685573366e-06,
|
4201 |
+
"loss": 1.1373,
|
4202 |
+
"step": 599
|
4203 |
+
},
|
4204 |
+
{
|
4205 |
+
"epoch": 0.6584362139917695,
|
4206 |
+
"grad_norm": 0.13324734568595886,
|
4207 |
+
"learning_rate": 7.669543773119606e-06,
|
4208 |
+
"loss": 1.1823,
|
4209 |
+
"step": 600
|
4210 |
+
},
|
4211 |
+
{
|
4212 |
+
"epoch": 0.6595336076817558,
|
4213 |
+
"grad_norm": 0.13969723880290985,
|
4214 |
+
"learning_rate": 7.644882860665845e-06,
|
4215 |
+
"loss": 1.1193,
|
4216 |
+
"step": 601
|
4217 |
+
},
|
4218 |
+
{
|
4219 |
+
"epoch": 0.6606310013717421,
|
4220 |
+
"grad_norm": 0.138763889670372,
|
4221 |
+
"learning_rate": 7.620221948212084e-06,
|
4222 |
+
"loss": 1.1123,
|
4223 |
+
"step": 602
|
4224 |
+
},
|
4225 |
+
{
|
4226 |
+
"epoch": 0.6617283950617284,
|
4227 |
+
"grad_norm": 0.13153599202632904,
|
4228 |
+
"learning_rate": 7.595561035758323e-06,
|
4229 |
+
"loss": 1.1058,
|
4230 |
+
"step": 603
|
4231 |
+
},
|
4232 |
+
{
|
4233 |
+
"epoch": 0.6628257887517147,
|
4234 |
+
"grad_norm": 0.1288379430770874,
|
4235 |
+
"learning_rate": 7.570900123304563e-06,
|
4236 |
+
"loss": 1.0909,
|
4237 |
+
"step": 604
|
4238 |
+
},
|
4239 |
+
{
|
4240 |
+
"epoch": 0.663923182441701,
|
4241 |
+
"grad_norm": 0.1367582231760025,
|
4242 |
+
"learning_rate": 7.546239210850802e-06,
|
4243 |
+
"loss": 1.0334,
|
4244 |
+
"step": 605
|
4245 |
+
},
|
4246 |
+
{
|
4247 |
+
"epoch": 0.6650205761316872,
|
4248 |
+
"grad_norm": 0.12939676642417908,
|
4249 |
+
"learning_rate": 7.521578298397041e-06,
|
4250 |
+
"loss": 1.0775,
|
4251 |
+
"step": 606
|
4252 |
+
},
|
4253 |
+
{
|
4254 |
+
"epoch": 0.6661179698216735,
|
4255 |
+
"grad_norm": 0.13814114034175873,
|
4256 |
+
"learning_rate": 7.4969173859432805e-06,
|
4257 |
+
"loss": 1.1047,
|
4258 |
+
"step": 607
|
4259 |
+
},
|
4260 |
+
{
|
4261 |
+
"epoch": 0.6672153635116598,
|
4262 |
+
"grad_norm": 0.1455143690109253,
|
4263 |
+
"learning_rate": 7.472256473489519e-06,
|
4264 |
+
"loss": 1.163,
|
4265 |
+
"step": 608
|
4266 |
+
},
|
4267 |
+
{
|
4268 |
+
"epoch": 0.6683127572016461,
|
4269 |
+
"grad_norm": 0.15828116238117218,
|
4270 |
+
"learning_rate": 7.447595561035758e-06,
|
4271 |
+
"loss": 1.0329,
|
4272 |
+
"step": 609
|
4273 |
+
},
|
4274 |
+
{
|
4275 |
+
"epoch": 0.6694101508916324,
|
4276 |
+
"grad_norm": 0.14122609794139862,
|
4277 |
+
"learning_rate": 7.422934648581999e-06,
|
4278 |
+
"loss": 1.1115,
|
4279 |
+
"step": 610
|
4280 |
+
},
|
4281 |
+
{
|
4282 |
+
"epoch": 0.6705075445816187,
|
4283 |
+
"grad_norm": 0.12997443974018097,
|
4284 |
+
"learning_rate": 7.398273736128238e-06,
|
4285 |
+
"loss": 1.1093,
|
4286 |
+
"step": 611
|
4287 |
+
},
|
4288 |
+
{
|
4289 |
+
"epoch": 0.671604938271605,
|
4290 |
+
"grad_norm": 0.13643480837345123,
|
4291 |
+
"learning_rate": 7.373612823674477e-06,
|
4292 |
+
"loss": 1.13,
|
4293 |
+
"step": 612
|
4294 |
+
},
|
4295 |
+
{
|
4296 |
+
"epoch": 0.6727023319615912,
|
4297 |
+
"grad_norm": 0.12696883082389832,
|
4298 |
+
"learning_rate": 7.348951911220716e-06,
|
4299 |
+
"loss": 1.1004,
|
4300 |
+
"step": 613
|
4301 |
+
},
|
4302 |
+
{
|
4303 |
+
"epoch": 0.6737997256515775,
|
4304 |
+
"grad_norm": 0.14005936682224274,
|
4305 |
+
"learning_rate": 7.324290998766955e-06,
|
4306 |
+
"loss": 1.0574,
|
4307 |
+
"step": 614
|
4308 |
+
},
|
4309 |
+
{
|
4310 |
+
"epoch": 0.6748971193415638,
|
4311 |
+
"grad_norm": 0.14040903747081757,
|
4312 |
+
"learning_rate": 7.299630086313194e-06,
|
4313 |
+
"loss": 1.1028,
|
4314 |
+
"step": 615
|
4315 |
+
},
|
4316 |
+
{
|
4317 |
+
"epoch": 0.6759945130315501,
|
4318 |
+
"grad_norm": 0.12855766713619232,
|
4319 |
+
"learning_rate": 7.274969173859433e-06,
|
4320 |
+
"loss": 1.1411,
|
4321 |
+
"step": 616
|
4322 |
+
},
|
4323 |
+
{
|
4324 |
+
"epoch": 0.6770919067215363,
|
4325 |
+
"grad_norm": 0.14175771176815033,
|
4326 |
+
"learning_rate": 7.250308261405673e-06,
|
4327 |
+
"loss": 1.077,
|
4328 |
+
"step": 617
|
4329 |
+
},
|
4330 |
+
{
|
4331 |
+
"epoch": 0.6781893004115226,
|
4332 |
+
"grad_norm": 0.12823879718780518,
|
4333 |
+
"learning_rate": 7.225647348951912e-06,
|
4334 |
+
"loss": 1.0089,
|
4335 |
+
"step": 618
|
4336 |
+
},
|
4337 |
+
{
|
4338 |
+
"epoch": 0.6792866941015089,
|
4339 |
+
"grad_norm": 0.13076744973659515,
|
4340 |
+
"learning_rate": 7.200986436498151e-06,
|
4341 |
+
"loss": 1.1641,
|
4342 |
+
"step": 619
|
4343 |
+
},
|
4344 |
+
{
|
4345 |
+
"epoch": 0.6803840877914952,
|
4346 |
+
"grad_norm": 0.1256016492843628,
|
4347 |
+
"learning_rate": 7.1763255240443905e-06,
|
4348 |
+
"loss": 1.1092,
|
4349 |
+
"step": 620
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 0.6814814814814815,
|
4353 |
+
"grad_norm": 0.14268584549427032,
|
4354 |
+
"learning_rate": 7.1516646115906294e-06,
|
4355 |
+
"loss": 1.0106,
|
4356 |
+
"step": 621
|
4357 |
+
},
|
4358 |
+
{
|
4359 |
+
"epoch": 0.6825788751714678,
|
4360 |
+
"grad_norm": 0.13120578229427338,
|
4361 |
+
"learning_rate": 7.127003699136868e-06,
|
4362 |
+
"loss": 1.0598,
|
4363 |
+
"step": 622
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 0.683676268861454,
|
4367 |
+
"grad_norm": 0.13504907488822937,
|
4368 |
+
"learning_rate": 7.102342786683107e-06,
|
4369 |
+
"loss": 1.0966,
|
4370 |
+
"step": 623
|
4371 |
+
},
|
4372 |
+
{
|
4373 |
+
"epoch": 0.6847736625514403,
|
4374 |
+
"grad_norm": 0.12563414871692657,
|
4375 |
+
"learning_rate": 7.077681874229347e-06,
|
4376 |
+
"loss": 1.0724,
|
4377 |
+
"step": 624
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 0.6858710562414266,
|
4381 |
+
"grad_norm": 0.13522499799728394,
|
4382 |
+
"learning_rate": 7.053020961775586e-06,
|
4383 |
+
"loss": 1.1182,
|
4384 |
+
"step": 625
|
4385 |
+
},
|
4386 |
+
{
|
4387 |
+
"epoch": 0.6869684499314129,
|
4388 |
+
"grad_norm": 0.12960287928581238,
|
4389 |
+
"learning_rate": 7.028360049321825e-06,
|
4390 |
+
"loss": 1.1252,
|
4391 |
+
"step": 626
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 0.6880658436213992,
|
4395 |
+
"grad_norm": 0.13346299529075623,
|
4396 |
+
"learning_rate": 7.003699136868065e-06,
|
4397 |
+
"loss": 1.1317,
|
4398 |
+
"step": 627
|
4399 |
+
},
|
4400 |
+
{
|
4401 |
+
"epoch": 0.6891632373113855,
|
4402 |
+
"grad_norm": 0.1333625316619873,
|
4403 |
+
"learning_rate": 6.979038224414304e-06,
|
4404 |
+
"loss": 1.1505,
|
4405 |
+
"step": 628
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 0.6902606310013717,
|
4409 |
+
"grad_norm": 0.1392945796251297,
|
4410 |
+
"learning_rate": 6.954377311960543e-06,
|
4411 |
+
"loss": 1.1966,
|
4412 |
+
"step": 629
|
4413 |
+
},
|
4414 |
+
{
|
4415 |
+
"epoch": 0.691358024691358,
|
4416 |
+
"grad_norm": 0.12204419821500778,
|
4417 |
+
"learning_rate": 6.929716399506782e-06,
|
4418 |
+
"loss": 1.1243,
|
4419 |
+
"step": 630
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 0.6924554183813443,
|
4423 |
+
"grad_norm": 0.1395426094532013,
|
4424 |
+
"learning_rate": 6.905055487053022e-06,
|
4425 |
+
"loss": 1.0475,
|
4426 |
+
"step": 631
|
4427 |
+
},
|
4428 |
+
{
|
4429 |
+
"epoch": 0.6935528120713306,
|
4430 |
+
"grad_norm": 0.13325053453445435,
|
4431 |
+
"learning_rate": 6.880394574599261e-06,
|
4432 |
+
"loss": 1.0344,
|
4433 |
+
"step": 632
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 0.6946502057613169,
|
4437 |
+
"grad_norm": 0.14765462279319763,
|
4438 |
+
"learning_rate": 6.8557336621455e-06,
|
4439 |
+
"loss": 1.0965,
|
4440 |
+
"step": 633
|
4441 |
+
},
|
4442 |
+
{
|
4443 |
+
"epoch": 0.6957475994513032,
|
4444 |
+
"grad_norm": 0.12556719779968262,
|
4445 |
+
"learning_rate": 6.8310727496917395e-06,
|
4446 |
+
"loss": 1.199,
|
4447 |
+
"step": 634
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 0.6968449931412894,
|
4451 |
+
"grad_norm": 0.12908804416656494,
|
4452 |
+
"learning_rate": 6.806411837237978e-06,
|
4453 |
+
"loss": 1.1594,
|
4454 |
+
"step": 635
|
4455 |
+
},
|
4456 |
+
{
|
4457 |
+
"epoch": 0.6979423868312757,
|
4458 |
+
"grad_norm": 0.1703738272190094,
|
4459 |
+
"learning_rate": 6.781750924784217e-06,
|
4460 |
+
"loss": 1.0171,
|
4461 |
+
"step": 636
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 0.699039780521262,
|
4465 |
+
"grad_norm": 0.12791863083839417,
|
4466 |
+
"learning_rate": 6.757090012330457e-06,
|
4467 |
+
"loss": 1.2105,
|
4468 |
+
"step": 637
|
4469 |
+
},
|
4470 |
+
{
|
4471 |
+
"epoch": 0.7001371742112483,
|
4472 |
+
"grad_norm": 0.17011161148548126,
|
4473 |
+
"learning_rate": 6.732429099876696e-06,
|
4474 |
+
"loss": 1.0192,
|
4475 |
+
"step": 638
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 0.7012345679012346,
|
4479 |
+
"grad_norm": 0.14074620604515076,
|
4480 |
+
"learning_rate": 6.707768187422935e-06,
|
4481 |
+
"loss": 1.1763,
|
4482 |
+
"step": 639
|
4483 |
+
},
|
4484 |
+
{
|
4485 |
+
"epoch": 0.7023319615912208,
|
4486 |
+
"grad_norm": 0.13788381218910217,
|
4487 |
+
"learning_rate": 6.683107274969174e-06,
|
4488 |
+
"loss": 1.0638,
|
4489 |
+
"step": 640
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 0.7034293552812071,
|
4493 |
+
"grad_norm": 0.13305304944515228,
|
4494 |
+
"learning_rate": 6.6584463625154135e-06,
|
4495 |
+
"loss": 1.1449,
|
4496 |
+
"step": 641
|
4497 |
+
},
|
4498 |
+
{
|
4499 |
+
"epoch": 0.7045267489711934,
|
4500 |
+
"grad_norm": 0.1297188103199005,
|
4501 |
+
"learning_rate": 6.633785450061652e-06,
|
4502 |
+
"loss": 1.1244,
|
4503 |
+
"step": 642
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 0.7056241426611797,
|
4507 |
+
"grad_norm": 0.12216539680957794,
|
4508 |
+
"learning_rate": 6.609124537607891e-06,
|
4509 |
+
"loss": 1.099,
|
4510 |
+
"step": 643
|
4511 |
+
},
|
4512 |
+
{
|
4513 |
+
"epoch": 0.706721536351166,
|
4514 |
+
"grad_norm": 0.12714643776416779,
|
4515 |
+
"learning_rate": 6.584463625154132e-06,
|
4516 |
+
"loss": 1.1373,
|
4517 |
+
"step": 644
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 0.7078189300411523,
|
4521 |
+
"grad_norm": 0.12196072936058044,
|
4522 |
+
"learning_rate": 6.559802712700371e-06,
|
4523 |
+
"loss": 1.1225,
|
4524 |
+
"step": 645
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 0.7089163237311386,
|
4528 |
+
"grad_norm": 0.1701362133026123,
|
4529 |
+
"learning_rate": 6.53514180024661e-06,
|
4530 |
+
"loss": 0.991,
|
4531 |
+
"step": 646
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 0.7100137174211248,
|
4535 |
+
"grad_norm": 0.1309044361114502,
|
4536 |
+
"learning_rate": 6.5104808877928495e-06,
|
4537 |
+
"loss": 1.1614,
|
4538 |
+
"step": 647
|
4539 |
+
},
|
4540 |
+
{
|
4541 |
+
"epoch": 0.7111111111111111,
|
4542 |
+
"grad_norm": 0.1310199499130249,
|
4543 |
+
"learning_rate": 6.485819975339088e-06,
|
4544 |
+
"loss": 1.1724,
|
4545 |
+
"step": 648
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 0.7122085048010974,
|
4549 |
+
"grad_norm": 0.15935364365577698,
|
4550 |
+
"learning_rate": 6.461159062885327e-06,
|
4551 |
+
"loss": 1.0417,
|
4552 |
+
"step": 649
|
4553 |
+
},
|
4554 |
+
{
|
4555 |
+
"epoch": 0.7133058984910837,
|
4556 |
+
"grad_norm": 0.13248024880886078,
|
4557 |
+
"learning_rate": 6.436498150431566e-06,
|
4558 |
+
"loss": 1.2158,
|
4559 |
+
"step": 650
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 0.7144032921810699,
|
4563 |
+
"grad_norm": 0.14017465710639954,
|
4564 |
+
"learning_rate": 6.411837237977806e-06,
|
4565 |
+
"loss": 1.1212,
|
4566 |
+
"step": 651
|
4567 |
+
},
|
4568 |
+
{
|
4569 |
+
"epoch": 0.7155006858710562,
|
4570 |
+
"grad_norm": 0.13974924385547638,
|
4571 |
+
"learning_rate": 6.387176325524045e-06,
|
4572 |
+
"loss": 1.0866,
|
4573 |
+
"step": 652
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 0.7165980795610425,
|
4577 |
+
"grad_norm": 0.13914860785007477,
|
4578 |
+
"learning_rate": 6.362515413070284e-06,
|
4579 |
+
"loss": 1.046,
|
4580 |
+
"step": 653
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 0.7176954732510288,
|
4584 |
+
"grad_norm": 0.1510930210351944,
|
4585 |
+
"learning_rate": 6.3378545006165236e-06,
|
4586 |
+
"loss": 0.9835,
|
4587 |
+
"step": 654
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 0.7187928669410151,
|
4591 |
+
"grad_norm": 0.13082289695739746,
|
4592 |
+
"learning_rate": 6.3131935881627625e-06,
|
4593 |
+
"loss": 1.1769,
|
4594 |
+
"step": 655
|
4595 |
+
},
|
4596 |
+
{
|
4597 |
+
"epoch": 0.7198902606310014,
|
4598 |
+
"grad_norm": 0.14069297909736633,
|
4599 |
+
"learning_rate": 6.288532675709001e-06,
|
4600 |
+
"loss": 1.0869,
|
4601 |
+
"step": 656
|
4602 |
+
},
|
4603 |
+
{
|
4604 |
+
"epoch": 0.7209876543209877,
|
4605 |
+
"grad_norm": 0.1553945541381836,
|
4606 |
+
"learning_rate": 6.263871763255241e-06,
|
4607 |
+
"loss": 1.0641,
|
4608 |
+
"step": 657
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 0.722085048010974,
|
4612 |
+
"grad_norm": 0.14064814150333405,
|
4613 |
+
"learning_rate": 6.23921085080148e-06,
|
4614 |
+
"loss": 1.1924,
|
4615 |
+
"step": 658
|
4616 |
+
},
|
4617 |
+
{
|
4618 |
+
"epoch": 0.7231824417009602,
|
4619 |
+
"grad_norm": 0.1389569491147995,
|
4620 |
+
"learning_rate": 6.214549938347719e-06,
|
4621 |
+
"loss": 1.0729,
|
4622 |
+
"step": 659
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 0.7242798353909465,
|
4626 |
+
"grad_norm": 0.14110144972801208,
|
4627 |
+
"learning_rate": 6.189889025893958e-06,
|
4628 |
+
"loss": 1.1349,
|
4629 |
+
"step": 660
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 0.7253772290809328,
|
4633 |
+
"grad_norm": 0.13982906937599182,
|
4634 |
+
"learning_rate": 6.1652281134401985e-06,
|
4635 |
+
"loss": 1.0304,
|
4636 |
+
"step": 661
|
4637 |
+
},
|
4638 |
+
{
|
4639 |
+
"epoch": 0.7264746227709191,
|
4640 |
+
"grad_norm": 0.12203299254179001,
|
4641 |
+
"learning_rate": 6.140567200986437e-06,
|
4642 |
+
"loss": 1.2023,
|
4643 |
+
"step": 662
|
4644 |
+
},
|
4645 |
+
{
|
4646 |
+
"epoch": 0.7275720164609053,
|
4647 |
+
"grad_norm": 0.1401350200176239,
|
4648 |
+
"learning_rate": 6.115906288532676e-06,
|
4649 |
+
"loss": 1.0947,
|
4650 |
+
"step": 663
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 0.7286694101508916,
|
4654 |
+
"grad_norm": 0.14056162536144257,
|
4655 |
+
"learning_rate": 6.091245376078916e-06,
|
4656 |
+
"loss": 1.073,
|
4657 |
+
"step": 664
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 0.7297668038408779,
|
4661 |
+
"grad_norm": 0.13901904225349426,
|
4662 |
+
"learning_rate": 6.066584463625155e-06,
|
4663 |
+
"loss": 1.134,
|
4664 |
+
"step": 665
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 0.7308641975308642,
|
4668 |
+
"grad_norm": 0.1339583396911621,
|
4669 |
+
"learning_rate": 6.041923551171394e-06,
|
4670 |
+
"loss": 1.2012,
|
4671 |
+
"step": 666
|
4672 |
}
|
4673 |
],
|
4674 |
"logging_steps": 1,
|
|
|
4688 |
"attributes": {}
|
4689 |
}
|
4690 |
},
|
4691 |
+
"total_flos": 6.904814083900785e+17,
|
4692 |
"train_batch_size": 4,
|
4693 |
"trial_name": null,
|
4694 |
"trial_params": null
|