DuongTrongChi commited on
Commit
7f67ab7
1 Parent(s): e203dcd

Training in progress, step 635, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6589280cb3cff27d0e3f809d9fe18d16f0e9a7c5605ca58835189f572dabff16
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e6916f7e36c566c02cf5b2c732bbba7342ba9b478d907f6a86bacf7cb2c0150
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0682c52911bfd561a6f52732b0960db62f88ed27fe66503c2c995a4eaf4cdaed
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a06e698c180e26ad266a872f2b31ce48f7f30f3e994431201bfae0f415a7c1a3
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c64a34b4af24bd84d792037de6a5cdeb1e9758d386ffdf2a30823c12441032d4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:673f846db062d0e3d1543ca2e762819fe3771910bf6b5b7e181954499f0e9914
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8474111953246279,
5
  "eval_steps": 500,
6
- "global_step": 580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4067,6 +4067,391 @@
4067
  "learning_rate": 3.5616438356164386e-06,
4068
  "loss": 1.1487,
4069
  "step": 580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4070
  }
4071
  ],
4072
  "logging_steps": 1,
@@ -4086,7 +4471,7 @@
4086
  "attributes": {}
4087
  }
4088
  },
4089
- "total_flos": 6.557703270576169e+17,
4090
  "train_batch_size": 4,
4091
  "trial_name": null,
4092
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9277691535019633,
5
  "eval_steps": 500,
6
+ "global_step": 635,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4067
  "learning_rate": 3.5616438356164386e-06,
4068
  "loss": 1.1487,
4069
  "step": 580
4070
+ },
4071
+ {
4072
+ "epoch": 0.8488722491096703,
4073
+ "grad_norm": 0.12069284915924072,
4074
+ "learning_rate": 3.527397260273973e-06,
4075
+ "loss": 1.1901,
4076
+ "step": 581
4077
+ },
4078
+ {
4079
+ "epoch": 0.8503333028947128,
4080
+ "grad_norm": 0.11841464787721634,
4081
+ "learning_rate": 3.4931506849315072e-06,
4082
+ "loss": 1.1714,
4083
+ "step": 582
4084
+ },
4085
+ {
4086
+ "epoch": 0.8517943566797552,
4087
+ "grad_norm": 0.12204127013683319,
4088
+ "learning_rate": 3.458904109589041e-06,
4089
+ "loss": 1.1501,
4090
+ "step": 583
4091
+ },
4092
+ {
4093
+ "epoch": 0.8532554104647977,
4094
+ "grad_norm": 0.13133108615875244,
4095
+ "learning_rate": 3.4246575342465754e-06,
4096
+ "loss": 1.2586,
4097
+ "step": 584
4098
+ },
4099
+ {
4100
+ "epoch": 0.8547164642498402,
4101
+ "grad_norm": 0.11903316527605057,
4102
+ "learning_rate": 3.39041095890411e-06,
4103
+ "loss": 1.2399,
4104
+ "step": 585
4105
+ },
4106
+ {
4107
+ "epoch": 0.8561775180348826,
4108
+ "grad_norm": 0.1073235422372818,
4109
+ "learning_rate": 3.356164383561644e-06,
4110
+ "loss": 1.2036,
4111
+ "step": 586
4112
+ },
4113
+ {
4114
+ "epoch": 0.8576385718199251,
4115
+ "grad_norm": 0.1356821060180664,
4116
+ "learning_rate": 3.3219178082191784e-06,
4117
+ "loss": 1.1168,
4118
+ "step": 587
4119
+ },
4120
+ {
4121
+ "epoch": 0.8590996256049676,
4122
+ "grad_norm": 0.1314031332731247,
4123
+ "learning_rate": 3.2876712328767123e-06,
4124
+ "loss": 1.1407,
4125
+ "step": 588
4126
+ },
4127
+ {
4128
+ "epoch": 0.86056067939001,
4129
+ "grad_norm": 0.1098315566778183,
4130
+ "learning_rate": 3.253424657534247e-06,
4131
+ "loss": 1.1737,
4132
+ "step": 589
4133
+ },
4134
+ {
4135
+ "epoch": 0.8620217331750525,
4136
+ "grad_norm": 0.12484846264123917,
4137
+ "learning_rate": 3.2191780821917813e-06,
4138
+ "loss": 1.29,
4139
+ "step": 590
4140
+ },
4141
+ {
4142
+ "epoch": 0.863482786960095,
4143
+ "grad_norm": 0.12023176997900009,
4144
+ "learning_rate": 3.184931506849315e-06,
4145
+ "loss": 1.2455,
4146
+ "step": 591
4147
+ },
4148
+ {
4149
+ "epoch": 0.8649438407451374,
4150
+ "grad_norm": 0.1124570220708847,
4151
+ "learning_rate": 3.1506849315068495e-06,
4152
+ "loss": 1.1792,
4153
+ "step": 592
4154
+ },
4155
+ {
4156
+ "epoch": 0.8664048945301799,
4157
+ "grad_norm": 0.11319974809885025,
4158
+ "learning_rate": 3.1164383561643843e-06,
4159
+ "loss": 1.1708,
4160
+ "step": 593
4161
+ },
4162
+ {
4163
+ "epoch": 0.8678659483152223,
4164
+ "grad_norm": 0.11844311654567719,
4165
+ "learning_rate": 3.082191780821918e-06,
4166
+ "loss": 1.1962,
4167
+ "step": 594
4168
+ },
4169
+ {
4170
+ "epoch": 0.8693270021002648,
4171
+ "grad_norm": 0.12392130494117737,
4172
+ "learning_rate": 3.0479452054794525e-06,
4173
+ "loss": 1.1748,
4174
+ "step": 595
4175
+ },
4176
+ {
4177
+ "epoch": 0.8707880558853073,
4178
+ "grad_norm": 0.1305224448442459,
4179
+ "learning_rate": 3.0136986301369864e-06,
4180
+ "loss": 1.2388,
4181
+ "step": 596
4182
+ },
4183
+ {
4184
+ "epoch": 0.8722491096703497,
4185
+ "grad_norm": 0.11098136007785797,
4186
+ "learning_rate": 2.979452054794521e-06,
4187
+ "loss": 1.2109,
4188
+ "step": 597
4189
+ },
4190
+ {
4191
+ "epoch": 0.8737101634553922,
4192
+ "grad_norm": 0.11488241702318192,
4193
+ "learning_rate": 2.945205479452055e-06,
4194
+ "loss": 1.2222,
4195
+ "step": 598
4196
+ },
4197
+ {
4198
+ "epoch": 0.8751712172404347,
4199
+ "grad_norm": 0.11296083778142929,
4200
+ "learning_rate": 2.9109589041095893e-06,
4201
+ "loss": 1.2425,
4202
+ "step": 599
4203
+ },
4204
+ {
4205
+ "epoch": 0.8766322710254771,
4206
+ "grad_norm": 0.11494144797325134,
4207
+ "learning_rate": 2.876712328767123e-06,
4208
+ "loss": 1.1589,
4209
+ "step": 600
4210
+ },
4211
+ {
4212
+ "epoch": 0.8780933248105196,
4213
+ "grad_norm": 0.11837083101272583,
4214
+ "learning_rate": 2.842465753424658e-06,
4215
+ "loss": 1.1556,
4216
+ "step": 601
4217
+ },
4218
+ {
4219
+ "epoch": 0.8795543785955621,
4220
+ "grad_norm": 0.12680098414421082,
4221
+ "learning_rate": 2.8082191780821922e-06,
4222
+ "loss": 1.1742,
4223
+ "step": 602
4224
+ },
4225
+ {
4226
+ "epoch": 0.8810154323806045,
4227
+ "grad_norm": 0.12157981842756271,
4228
+ "learning_rate": 2.773972602739726e-06,
4229
+ "loss": 1.1715,
4230
+ "step": 603
4231
+ },
4232
+ {
4233
+ "epoch": 0.882476486165647,
4234
+ "grad_norm": 0.11859599500894547,
4235
+ "learning_rate": 2.7397260273972604e-06,
4236
+ "loss": 1.1988,
4237
+ "step": 604
4238
+ },
4239
+ {
4240
+ "epoch": 0.8839375399506895,
4241
+ "grad_norm": 0.11733684688806534,
4242
+ "learning_rate": 2.705479452054795e-06,
4243
+ "loss": 1.2507,
4244
+ "step": 605
4245
+ },
4246
+ {
4247
+ "epoch": 0.8853985937357319,
4248
+ "grad_norm": 0.11254255473613739,
4249
+ "learning_rate": 2.671232876712329e-06,
4250
+ "loss": 1.1843,
4251
+ "step": 606
4252
+ },
4253
+ {
4254
+ "epoch": 0.8868596475207744,
4255
+ "grad_norm": 0.11433933675289154,
4256
+ "learning_rate": 2.6369863013698634e-06,
4257
+ "loss": 1.1129,
4258
+ "step": 607
4259
+ },
4260
+ {
4261
+ "epoch": 0.8883207013058169,
4262
+ "grad_norm": 0.11361224204301834,
4263
+ "learning_rate": 2.6027397260273973e-06,
4264
+ "loss": 1.1883,
4265
+ "step": 608
4266
+ },
4267
+ {
4268
+ "epoch": 0.8897817550908593,
4269
+ "grad_norm": 0.11042333394289017,
4270
+ "learning_rate": 2.568493150684932e-06,
4271
+ "loss": 1.2322,
4272
+ "step": 609
4273
+ },
4274
+ {
4275
+ "epoch": 0.8912428088759018,
4276
+ "grad_norm": 0.1262194812297821,
4277
+ "learning_rate": 2.534246575342466e-06,
4278
+ "loss": 1.1767,
4279
+ "step": 610
4280
+ },
4281
+ {
4282
+ "epoch": 0.8927038626609443,
4283
+ "grad_norm": 0.10622208565473557,
4284
+ "learning_rate": 2.5e-06,
4285
+ "loss": 1.2355,
4286
+ "step": 611
4287
+ },
4288
+ {
4289
+ "epoch": 0.8941649164459866,
4290
+ "grad_norm": 0.11434955894947052,
4291
+ "learning_rate": 2.4657534246575345e-06,
4292
+ "loss": 1.1571,
4293
+ "step": 612
4294
+ },
4295
+ {
4296
+ "epoch": 0.8956259702310291,
4297
+ "grad_norm": 0.11948630958795547,
4298
+ "learning_rate": 2.431506849315069e-06,
4299
+ "loss": 1.1345,
4300
+ "step": 613
4301
+ },
4302
+ {
4303
+ "epoch": 0.8970870240160715,
4304
+ "grad_norm": 0.119502492249012,
4305
+ "learning_rate": 2.3972602739726027e-06,
4306
+ "loss": 1.1296,
4307
+ "step": 614
4308
+ },
4309
+ {
4310
+ "epoch": 0.898548077801114,
4311
+ "grad_norm": 0.1145474910736084,
4312
+ "learning_rate": 2.363013698630137e-06,
4313
+ "loss": 1.2316,
4314
+ "step": 615
4315
+ },
4316
+ {
4317
+ "epoch": 0.9000091315861565,
4318
+ "grad_norm": 0.11403004080057144,
4319
+ "learning_rate": 2.3287671232876713e-06,
4320
+ "loss": 1.1536,
4321
+ "step": 616
4322
+ },
4323
+ {
4324
+ "epoch": 0.9014701853711989,
4325
+ "grad_norm": 0.11280905455350876,
4326
+ "learning_rate": 2.2945205479452057e-06,
4327
+ "loss": 1.1922,
4328
+ "step": 617
4329
+ },
4330
+ {
4331
+ "epoch": 0.9029312391562414,
4332
+ "grad_norm": 0.10832927376031876,
4333
+ "learning_rate": 2.26027397260274e-06,
4334
+ "loss": 1.2386,
4335
+ "step": 618
4336
+ },
4337
+ {
4338
+ "epoch": 0.9043922929412839,
4339
+ "grad_norm": 0.11291555315256119,
4340
+ "learning_rate": 2.2260273972602743e-06,
4341
+ "loss": 1.1788,
4342
+ "step": 619
4343
+ },
4344
+ {
4345
+ "epoch": 0.9058533467263263,
4346
+ "grad_norm": 0.1250094771385193,
4347
+ "learning_rate": 2.191780821917808e-06,
4348
+ "loss": 1.2142,
4349
+ "step": 620
4350
+ },
4351
+ {
4352
+ "epoch": 0.9073144005113688,
4353
+ "grad_norm": 0.11631559580564499,
4354
+ "learning_rate": 2.1575342465753425e-06,
4355
+ "loss": 1.2121,
4356
+ "step": 621
4357
+ },
4358
+ {
4359
+ "epoch": 0.9087754542964113,
4360
+ "grad_norm": 0.11511734873056412,
4361
+ "learning_rate": 2.123287671232877e-06,
4362
+ "loss": 1.1137,
4363
+ "step": 622
4364
+ },
4365
+ {
4366
+ "epoch": 0.9102365080814537,
4367
+ "grad_norm": 0.12305217236280441,
4368
+ "learning_rate": 2.089041095890411e-06,
4369
+ "loss": 1.1745,
4370
+ "step": 623
4371
+ },
4372
+ {
4373
+ "epoch": 0.9116975618664962,
4374
+ "grad_norm": 0.14875584840774536,
4375
+ "learning_rate": 2.0547945205479454e-06,
4376
+ "loss": 1.1986,
4377
+ "step": 624
4378
+ },
4379
+ {
4380
+ "epoch": 0.9131586156515387,
4381
+ "grad_norm": 0.13255374133586884,
4382
+ "learning_rate": 2.0205479452054797e-06,
4383
+ "loss": 1.2716,
4384
+ "step": 625
4385
+ },
4386
+ {
4387
+ "epoch": 0.9146196694365811,
4388
+ "grad_norm": 0.13747917115688324,
4389
+ "learning_rate": 1.9863013698630136e-06,
4390
+ "loss": 1.1092,
4391
+ "step": 626
4392
+ },
4393
+ {
4394
+ "epoch": 0.9160807232216236,
4395
+ "grad_norm": 0.11340590566396713,
4396
+ "learning_rate": 1.952054794520548e-06,
4397
+ "loss": 1.2085,
4398
+ "step": 627
4399
+ },
4400
+ {
4401
+ "epoch": 0.9175417770066661,
4402
+ "grad_norm": 0.11387283354997635,
4403
+ "learning_rate": 1.9178082191780823e-06,
4404
+ "loss": 1.2549,
4405
+ "step": 628
4406
+ },
4407
+ {
4408
+ "epoch": 0.9190028307917085,
4409
+ "grad_norm": 0.11490115523338318,
4410
+ "learning_rate": 1.8835616438356166e-06,
4411
+ "loss": 1.1344,
4412
+ "step": 629
4413
+ },
4414
+ {
4415
+ "epoch": 0.920463884576751,
4416
+ "grad_norm": 0.10832976549863815,
4417
+ "learning_rate": 1.8493150684931507e-06,
4418
+ "loss": 1.2395,
4419
+ "step": 630
4420
+ },
4421
+ {
4422
+ "epoch": 0.9219249383617935,
4423
+ "grad_norm": 0.12825772166252136,
4424
+ "learning_rate": 1.8150684931506852e-06,
4425
+ "loss": 1.2296,
4426
+ "step": 631
4427
+ },
4428
+ {
4429
+ "epoch": 0.9233859921468359,
4430
+ "grad_norm": 0.1021864116191864,
4431
+ "learning_rate": 1.7808219178082193e-06,
4432
+ "loss": 1.2589,
4433
+ "step": 632
4434
+ },
4435
+ {
4436
+ "epoch": 0.9248470459318784,
4437
+ "grad_norm": 0.12429718673229218,
4438
+ "learning_rate": 1.7465753424657536e-06,
4439
+ "loss": 1.1167,
4440
+ "step": 633
4441
+ },
4442
+ {
4443
+ "epoch": 0.9263080997169209,
4444
+ "grad_norm": 0.10847421735525131,
4445
+ "learning_rate": 1.7123287671232877e-06,
4446
+ "loss": 1.1975,
4447
+ "step": 634
4448
+ },
4449
+ {
4450
+ "epoch": 0.9277691535019633,
4451
+ "grad_norm": 0.11923690140247345,
4452
+ "learning_rate": 1.678082191780822e-06,
4453
+ "loss": 1.1595,
4454
+ "step": 635
4455
  }
4456
  ],
4457
  "logging_steps": 1,
 
4471
  "attributes": {}
4472
  }
4473
  },
4474
+ "total_flos": 7.18235035100799e+17,
4475
  "train_batch_size": 4,
4476
  "trial_name": null,
4477
  "trial_params": null