DuongTrongChi commited on
Commit
58ae0bb
·
verified ·
1 Parent(s): 3fbd685

Training in progress, step 666, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc1c655855b293e8483dd4fb04c85c74b9d78570e6d69ec02a9a00041397ab4c
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc5125f76d6dbaaf8f53b7058f9db944f682d3b54268cdcab102643bbb5c715
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f6c1de617623b9f3b9b3b92049c151ab4dd84b2058cf77fac1fd8af1afe7765
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2353d647b00d14aca06f778f9d43a65a8201b5792af6fb89150d357af16ee31c
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:514895963f5ba684bb297b72010037f5ca38e8ef847d63ce0102594f529ff421
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4dfc30b5618d8f9126fd758d49456abcb3bac7a76ca1747eea78894ae958013
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6496570644718793,
5
  "eval_steps": 500,
6
- "global_step": 592,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4151,6 +4151,524 @@
4151
  "learning_rate": 7.866831072749692e-06,
4152
  "loss": 1.0413,
4153
  "step": 592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4154
  }
4155
  ],
4156
  "logging_steps": 1,
@@ -4170,7 +4688,7 @@
4170
  "attributes": {}
4171
  }
4172
  },
4173
- "total_flos": 6.134242377148416e+17,
4174
  "train_batch_size": 4,
4175
  "trial_name": null,
4176
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7308641975308642,
5
  "eval_steps": 500,
6
+ "global_step": 666,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4151
  "learning_rate": 7.866831072749692e-06,
4152
  "loss": 1.0413,
4153
  "step": 592
4154
+ },
4155
+ {
4156
+ "epoch": 0.6507544581618656,
4157
+ "grad_norm": 0.13166101276874542,
4158
+ "learning_rate": 7.842170160295932e-06,
4159
+ "loss": 1.0842,
4160
+ "step": 593
4161
+ },
4162
+ {
4163
+ "epoch": 0.6518518518518519,
4164
+ "grad_norm": 0.13434740900993347,
4165
+ "learning_rate": 7.817509247842171e-06,
4166
+ "loss": 1.1033,
4167
+ "step": 594
4168
+ },
4169
+ {
4170
+ "epoch": 0.6529492455418381,
4171
+ "grad_norm": 0.1200101375579834,
4172
+ "learning_rate": 7.79284833538841e-06,
4173
+ "loss": 1.08,
4174
+ "step": 595
4175
+ },
4176
+ {
4177
+ "epoch": 0.6540466392318244,
4178
+ "grad_norm": 0.13683106005191803,
4179
+ "learning_rate": 7.768187422934649e-06,
4180
+ "loss": 1.128,
4181
+ "step": 596
4182
+ },
4183
+ {
4184
+ "epoch": 0.6551440329218107,
4185
+ "grad_norm": 0.13702082633972168,
4186
+ "learning_rate": 7.743526510480888e-06,
4187
+ "loss": 1.1325,
4188
+ "step": 597
4189
+ },
4190
+ {
4191
+ "epoch": 0.656241426611797,
4192
+ "grad_norm": 0.13655568659305573,
4193
+ "learning_rate": 7.718865598027127e-06,
4194
+ "loss": 1.0282,
4195
+ "step": 598
4196
+ },
4197
+ {
4198
+ "epoch": 0.6573388203017833,
4199
+ "grad_norm": 0.11861226707696915,
4200
+ "learning_rate": 7.694204685573366e-06,
4201
+ "loss": 1.1373,
4202
+ "step": 599
4203
+ },
4204
+ {
4205
+ "epoch": 0.6584362139917695,
4206
+ "grad_norm": 0.13324734568595886,
4207
+ "learning_rate": 7.669543773119606e-06,
4208
+ "loss": 1.1823,
4209
+ "step": 600
4210
+ },
4211
+ {
4212
+ "epoch": 0.6595336076817558,
4213
+ "grad_norm": 0.13969723880290985,
4214
+ "learning_rate": 7.644882860665845e-06,
4215
+ "loss": 1.1193,
4216
+ "step": 601
4217
+ },
4218
+ {
4219
+ "epoch": 0.6606310013717421,
4220
+ "grad_norm": 0.138763889670372,
4221
+ "learning_rate": 7.620221948212084e-06,
4222
+ "loss": 1.1123,
4223
+ "step": 602
4224
+ },
4225
+ {
4226
+ "epoch": 0.6617283950617284,
4227
+ "grad_norm": 0.13153599202632904,
4228
+ "learning_rate": 7.595561035758323e-06,
4229
+ "loss": 1.1058,
4230
+ "step": 603
4231
+ },
4232
+ {
4233
+ "epoch": 0.6628257887517147,
4234
+ "grad_norm": 0.1288379430770874,
4235
+ "learning_rate": 7.570900123304563e-06,
4236
+ "loss": 1.0909,
4237
+ "step": 604
4238
+ },
4239
+ {
4240
+ "epoch": 0.663923182441701,
4241
+ "grad_norm": 0.1367582231760025,
4242
+ "learning_rate": 7.546239210850802e-06,
4243
+ "loss": 1.0334,
4244
+ "step": 605
4245
+ },
4246
+ {
4247
+ "epoch": 0.6650205761316872,
4248
+ "grad_norm": 0.12939676642417908,
4249
+ "learning_rate": 7.521578298397041e-06,
4250
+ "loss": 1.0775,
4251
+ "step": 606
4252
+ },
4253
+ {
4254
+ "epoch": 0.6661179698216735,
4255
+ "grad_norm": 0.13814114034175873,
4256
+ "learning_rate": 7.4969173859432805e-06,
4257
+ "loss": 1.1047,
4258
+ "step": 607
4259
+ },
4260
+ {
4261
+ "epoch": 0.6672153635116598,
4262
+ "grad_norm": 0.1455143690109253,
4263
+ "learning_rate": 7.472256473489519e-06,
4264
+ "loss": 1.163,
4265
+ "step": 608
4266
+ },
4267
+ {
4268
+ "epoch": 0.6683127572016461,
4269
+ "grad_norm": 0.15828116238117218,
4270
+ "learning_rate": 7.447595561035758e-06,
4271
+ "loss": 1.0329,
4272
+ "step": 609
4273
+ },
4274
+ {
4275
+ "epoch": 0.6694101508916324,
4276
+ "grad_norm": 0.14122609794139862,
4277
+ "learning_rate": 7.422934648581999e-06,
4278
+ "loss": 1.1115,
4279
+ "step": 610
4280
+ },
4281
+ {
4282
+ "epoch": 0.6705075445816187,
4283
+ "grad_norm": 0.12997443974018097,
4284
+ "learning_rate": 7.398273736128238e-06,
4285
+ "loss": 1.1093,
4286
+ "step": 611
4287
+ },
4288
+ {
4289
+ "epoch": 0.671604938271605,
4290
+ "grad_norm": 0.13643480837345123,
4291
+ "learning_rate": 7.373612823674477e-06,
4292
+ "loss": 1.13,
4293
+ "step": 612
4294
+ },
4295
+ {
4296
+ "epoch": 0.6727023319615912,
4297
+ "grad_norm": 0.12696883082389832,
4298
+ "learning_rate": 7.348951911220716e-06,
4299
+ "loss": 1.1004,
4300
+ "step": 613
4301
+ },
4302
+ {
4303
+ "epoch": 0.6737997256515775,
4304
+ "grad_norm": 0.14005936682224274,
4305
+ "learning_rate": 7.324290998766955e-06,
4306
+ "loss": 1.0574,
4307
+ "step": 614
4308
+ },
4309
+ {
4310
+ "epoch": 0.6748971193415638,
4311
+ "grad_norm": 0.14040903747081757,
4312
+ "learning_rate": 7.299630086313194e-06,
4313
+ "loss": 1.1028,
4314
+ "step": 615
4315
+ },
4316
+ {
4317
+ "epoch": 0.6759945130315501,
4318
+ "grad_norm": 0.12855766713619232,
4319
+ "learning_rate": 7.274969173859433e-06,
4320
+ "loss": 1.1411,
4321
+ "step": 616
4322
+ },
4323
+ {
4324
+ "epoch": 0.6770919067215363,
4325
+ "grad_norm": 0.14175771176815033,
4326
+ "learning_rate": 7.250308261405673e-06,
4327
+ "loss": 1.077,
4328
+ "step": 617
4329
+ },
4330
+ {
4331
+ "epoch": 0.6781893004115226,
4332
+ "grad_norm": 0.12823879718780518,
4333
+ "learning_rate": 7.225647348951912e-06,
4334
+ "loss": 1.0089,
4335
+ "step": 618
4336
+ },
4337
+ {
4338
+ "epoch": 0.6792866941015089,
4339
+ "grad_norm": 0.13076744973659515,
4340
+ "learning_rate": 7.200986436498151e-06,
4341
+ "loss": 1.1641,
4342
+ "step": 619
4343
+ },
4344
+ {
4345
+ "epoch": 0.6803840877914952,
4346
+ "grad_norm": 0.1256016492843628,
4347
+ "learning_rate": 7.1763255240443905e-06,
4348
+ "loss": 1.1092,
4349
+ "step": 620
4350
+ },
4351
+ {
4352
+ "epoch": 0.6814814814814815,
4353
+ "grad_norm": 0.14268584549427032,
4354
+ "learning_rate": 7.1516646115906294e-06,
4355
+ "loss": 1.0106,
4356
+ "step": 621
4357
+ },
4358
+ {
4359
+ "epoch": 0.6825788751714678,
4360
+ "grad_norm": 0.13120578229427338,
4361
+ "learning_rate": 7.127003699136868e-06,
4362
+ "loss": 1.0598,
4363
+ "step": 622
4364
+ },
4365
+ {
4366
+ "epoch": 0.683676268861454,
4367
+ "grad_norm": 0.13504907488822937,
4368
+ "learning_rate": 7.102342786683107e-06,
4369
+ "loss": 1.0966,
4370
+ "step": 623
4371
+ },
4372
+ {
4373
+ "epoch": 0.6847736625514403,
4374
+ "grad_norm": 0.12563414871692657,
4375
+ "learning_rate": 7.077681874229347e-06,
4376
+ "loss": 1.0724,
4377
+ "step": 624
4378
+ },
4379
+ {
4380
+ "epoch": 0.6858710562414266,
4381
+ "grad_norm": 0.13522499799728394,
4382
+ "learning_rate": 7.053020961775586e-06,
4383
+ "loss": 1.1182,
4384
+ "step": 625
4385
+ },
4386
+ {
4387
+ "epoch": 0.6869684499314129,
4388
+ "grad_norm": 0.12960287928581238,
4389
+ "learning_rate": 7.028360049321825e-06,
4390
+ "loss": 1.1252,
4391
+ "step": 626
4392
+ },
4393
+ {
4394
+ "epoch": 0.6880658436213992,
4395
+ "grad_norm": 0.13346299529075623,
4396
+ "learning_rate": 7.003699136868065e-06,
4397
+ "loss": 1.1317,
4398
+ "step": 627
4399
+ },
4400
+ {
4401
+ "epoch": 0.6891632373113855,
4402
+ "grad_norm": 0.1333625316619873,
4403
+ "learning_rate": 6.979038224414304e-06,
4404
+ "loss": 1.1505,
4405
+ "step": 628
4406
+ },
4407
+ {
4408
+ "epoch": 0.6902606310013717,
4409
+ "grad_norm": 0.1392945796251297,
4410
+ "learning_rate": 6.954377311960543e-06,
4411
+ "loss": 1.1966,
4412
+ "step": 629
4413
+ },
4414
+ {
4415
+ "epoch": 0.691358024691358,
4416
+ "grad_norm": 0.12204419821500778,
4417
+ "learning_rate": 6.929716399506782e-06,
4418
+ "loss": 1.1243,
4419
+ "step": 630
4420
+ },
4421
+ {
4422
+ "epoch": 0.6924554183813443,
4423
+ "grad_norm": 0.1395426094532013,
4424
+ "learning_rate": 6.905055487053022e-06,
4425
+ "loss": 1.0475,
4426
+ "step": 631
4427
+ },
4428
+ {
4429
+ "epoch": 0.6935528120713306,
4430
+ "grad_norm": 0.13325053453445435,
4431
+ "learning_rate": 6.880394574599261e-06,
4432
+ "loss": 1.0344,
4433
+ "step": 632
4434
+ },
4435
+ {
4436
+ "epoch": 0.6946502057613169,
4437
+ "grad_norm": 0.14765462279319763,
4438
+ "learning_rate": 6.8557336621455e-06,
4439
+ "loss": 1.0965,
4440
+ "step": 633
4441
+ },
4442
+ {
4443
+ "epoch": 0.6957475994513032,
4444
+ "grad_norm": 0.12556719779968262,
4445
+ "learning_rate": 6.8310727496917395e-06,
4446
+ "loss": 1.199,
4447
+ "step": 634
4448
+ },
4449
+ {
4450
+ "epoch": 0.6968449931412894,
4451
+ "grad_norm": 0.12908804416656494,
4452
+ "learning_rate": 6.806411837237978e-06,
4453
+ "loss": 1.1594,
4454
+ "step": 635
4455
+ },
4456
+ {
4457
+ "epoch": 0.6979423868312757,
4458
+ "grad_norm": 0.1703738272190094,
4459
+ "learning_rate": 6.781750924784217e-06,
4460
+ "loss": 1.0171,
4461
+ "step": 636
4462
+ },
4463
+ {
4464
+ "epoch": 0.699039780521262,
4465
+ "grad_norm": 0.12791863083839417,
4466
+ "learning_rate": 6.757090012330457e-06,
4467
+ "loss": 1.2105,
4468
+ "step": 637
4469
+ },
4470
+ {
4471
+ "epoch": 0.7001371742112483,
4472
+ "grad_norm": 0.17011161148548126,
4473
+ "learning_rate": 6.732429099876696e-06,
4474
+ "loss": 1.0192,
4475
+ "step": 638
4476
+ },
4477
+ {
4478
+ "epoch": 0.7012345679012346,
4479
+ "grad_norm": 0.14074620604515076,
4480
+ "learning_rate": 6.707768187422935e-06,
4481
+ "loss": 1.1763,
4482
+ "step": 639
4483
+ },
4484
+ {
4485
+ "epoch": 0.7023319615912208,
4486
+ "grad_norm": 0.13788381218910217,
4487
+ "learning_rate": 6.683107274969174e-06,
4488
+ "loss": 1.0638,
4489
+ "step": 640
4490
+ },
4491
+ {
4492
+ "epoch": 0.7034293552812071,
4493
+ "grad_norm": 0.13305304944515228,
4494
+ "learning_rate": 6.6584463625154135e-06,
4495
+ "loss": 1.1449,
4496
+ "step": 641
4497
+ },
4498
+ {
4499
+ "epoch": 0.7045267489711934,
4500
+ "grad_norm": 0.1297188103199005,
4501
+ "learning_rate": 6.633785450061652e-06,
4502
+ "loss": 1.1244,
4503
+ "step": 642
4504
+ },
4505
+ {
4506
+ "epoch": 0.7056241426611797,
4507
+ "grad_norm": 0.12216539680957794,
4508
+ "learning_rate": 6.609124537607891e-06,
4509
+ "loss": 1.099,
4510
+ "step": 643
4511
+ },
4512
+ {
4513
+ "epoch": 0.706721536351166,
4514
+ "grad_norm": 0.12714643776416779,
4515
+ "learning_rate": 6.584463625154132e-06,
4516
+ "loss": 1.1373,
4517
+ "step": 644
4518
+ },
4519
+ {
4520
+ "epoch": 0.7078189300411523,
4521
+ "grad_norm": 0.12196072936058044,
4522
+ "learning_rate": 6.559802712700371e-06,
4523
+ "loss": 1.1225,
4524
+ "step": 645
4525
+ },
4526
+ {
4527
+ "epoch": 0.7089163237311386,
4528
+ "grad_norm": 0.1701362133026123,
4529
+ "learning_rate": 6.53514180024661e-06,
4530
+ "loss": 0.991,
4531
+ "step": 646
4532
+ },
4533
+ {
4534
+ "epoch": 0.7100137174211248,
4535
+ "grad_norm": 0.1309044361114502,
4536
+ "learning_rate": 6.5104808877928495e-06,
4537
+ "loss": 1.1614,
4538
+ "step": 647
4539
+ },
4540
+ {
4541
+ "epoch": 0.7111111111111111,
4542
+ "grad_norm": 0.1310199499130249,
4543
+ "learning_rate": 6.485819975339088e-06,
4544
+ "loss": 1.1724,
4545
+ "step": 648
4546
+ },
4547
+ {
4548
+ "epoch": 0.7122085048010974,
4549
+ "grad_norm": 0.15935364365577698,
4550
+ "learning_rate": 6.461159062885327e-06,
4551
+ "loss": 1.0417,
4552
+ "step": 649
4553
+ },
4554
+ {
4555
+ "epoch": 0.7133058984910837,
4556
+ "grad_norm": 0.13248024880886078,
4557
+ "learning_rate": 6.436498150431566e-06,
4558
+ "loss": 1.2158,
4559
+ "step": 650
4560
+ },
4561
+ {
4562
+ "epoch": 0.7144032921810699,
4563
+ "grad_norm": 0.14017465710639954,
4564
+ "learning_rate": 6.411837237977806e-06,
4565
+ "loss": 1.1212,
4566
+ "step": 651
4567
+ },
4568
+ {
4569
+ "epoch": 0.7155006858710562,
4570
+ "grad_norm": 0.13974924385547638,
4571
+ "learning_rate": 6.387176325524045e-06,
4572
+ "loss": 1.0866,
4573
+ "step": 652
4574
+ },
4575
+ {
4576
+ "epoch": 0.7165980795610425,
4577
+ "grad_norm": 0.13914860785007477,
4578
+ "learning_rate": 6.362515413070284e-06,
4579
+ "loss": 1.046,
4580
+ "step": 653
4581
+ },
4582
+ {
4583
+ "epoch": 0.7176954732510288,
4584
+ "grad_norm": 0.1510930210351944,
4585
+ "learning_rate": 6.3378545006165236e-06,
4586
+ "loss": 0.9835,
4587
+ "step": 654
4588
+ },
4589
+ {
4590
+ "epoch": 0.7187928669410151,
4591
+ "grad_norm": 0.13082289695739746,
4592
+ "learning_rate": 6.3131935881627625e-06,
4593
+ "loss": 1.1769,
4594
+ "step": 655
4595
+ },
4596
+ {
4597
+ "epoch": 0.7198902606310014,
4598
+ "grad_norm": 0.14069297909736633,
4599
+ "learning_rate": 6.288532675709001e-06,
4600
+ "loss": 1.0869,
4601
+ "step": 656
4602
+ },
4603
+ {
4604
+ "epoch": 0.7209876543209877,
4605
+ "grad_norm": 0.1553945541381836,
4606
+ "learning_rate": 6.263871763255241e-06,
4607
+ "loss": 1.0641,
4608
+ "step": 657
4609
+ },
4610
+ {
4611
+ "epoch": 0.722085048010974,
4612
+ "grad_norm": 0.14064814150333405,
4613
+ "learning_rate": 6.23921085080148e-06,
4614
+ "loss": 1.1924,
4615
+ "step": 658
4616
+ },
4617
+ {
4618
+ "epoch": 0.7231824417009602,
4619
+ "grad_norm": 0.1389569491147995,
4620
+ "learning_rate": 6.214549938347719e-06,
4621
+ "loss": 1.0729,
4622
+ "step": 659
4623
+ },
4624
+ {
4625
+ "epoch": 0.7242798353909465,
4626
+ "grad_norm": 0.14110144972801208,
4627
+ "learning_rate": 6.189889025893958e-06,
4628
+ "loss": 1.1349,
4629
+ "step": 660
4630
+ },
4631
+ {
4632
+ "epoch": 0.7253772290809328,
4633
+ "grad_norm": 0.13982906937599182,
4634
+ "learning_rate": 6.1652281134401985e-06,
4635
+ "loss": 1.0304,
4636
+ "step": 661
4637
+ },
4638
+ {
4639
+ "epoch": 0.7264746227709191,
4640
+ "grad_norm": 0.12203299254179001,
4641
+ "learning_rate": 6.140567200986437e-06,
4642
+ "loss": 1.2023,
4643
+ "step": 662
4644
+ },
4645
+ {
4646
+ "epoch": 0.7275720164609053,
4647
+ "grad_norm": 0.1401350200176239,
4648
+ "learning_rate": 6.115906288532676e-06,
4649
+ "loss": 1.0947,
4650
+ "step": 663
4651
+ },
4652
+ {
4653
+ "epoch": 0.7286694101508916,
4654
+ "grad_norm": 0.14056162536144257,
4655
+ "learning_rate": 6.091245376078916e-06,
4656
+ "loss": 1.073,
4657
+ "step": 664
4658
+ },
4659
+ {
4660
+ "epoch": 0.7297668038408779,
4661
+ "grad_norm": 0.13901904225349426,
4662
+ "learning_rate": 6.066584463625155e-06,
4663
+ "loss": 1.134,
4664
+ "step": 665
4665
+ },
4666
+ {
4667
+ "epoch": 0.7308641975308642,
4668
+ "grad_norm": 0.1339583396911621,
4669
+ "learning_rate": 6.041923551171394e-06,
4670
+ "loss": 1.2012,
4671
+ "step": 666
4672
  }
4673
  ],
4674
  "logging_steps": 1,
 
4688
  "attributes": {}
4689
  }
4690
  },
4691
+ "total_flos": 6.904814083900785e+17,
4692
  "train_batch_size": 4,
4693
  "trial_name": null,
4694
  "trial_params": null