DuongTrongChi commited on
Commit
2fd6798
·
verified ·
1 Parent(s): 8383832

Training in progress, step 911, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82d8bacab76dbcf5eb4f92f5f82c3e20484571fea802ab9933f74589a3eabfb9
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c90016a866284a2715dd96997296bc544f7f98889e7c3a425100f87afad0cf
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:494168482585a7bfcbfbcc705c8748da82c9e8a80556079a12a6d8916e7a8125
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4a7bafef733afd7ab58bb79bbd7b589528548bcb6cb645b64505a525cc544ab
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b85a44ab20a866b548a1f13ff0bee3fdd4760db12bf889cf65646cf2995e841c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04e2e9566872b589840ca5036c48f962e578ce55c36abdd70f84617113393bbb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9733882030178327,
5
  "eval_steps": 500,
6
- "global_step": 887,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6216,6 +6216,174 @@
6216
  "learning_rate": 5.91861898890259e-07,
6217
  "loss": 1.1123,
6218
  "step": 887
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6219
  }
6220
  ],
6221
  "logging_steps": 1,
@@ -6230,12 +6398,12 @@
6230
  "should_evaluate": false,
6231
  "should_log": false,
6232
  "should_save": true,
6233
- "should_training_stop": false
6234
  },
6235
  "attributes": {}
6236
  }
6237
  },
6238
- "total_flos": 9.202552775203185e+17,
6239
  "train_batch_size": 4,
6240
  "trial_name": null,
6241
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9997256515775035,
5
  "eval_steps": 500,
6
+ "global_step": 911,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6216
  "learning_rate": 5.91861898890259e-07,
6217
  "loss": 1.1123,
6218
  "step": 887
6219
+ },
6220
+ {
6221
+ "epoch": 0.974485596707819,
6222
+ "grad_norm": 0.13672898709774017,
6223
+ "learning_rate": 5.672009864364982e-07,
6224
+ "loss": 1.1418,
6225
+ "step": 888
6226
+ },
6227
+ {
6228
+ "epoch": 0.9755829903978052,
6229
+ "grad_norm": 0.14941120147705078,
6230
+ "learning_rate": 5.425400739827374e-07,
6231
+ "loss": 1.1893,
6232
+ "step": 889
6233
+ },
6234
+ {
6235
+ "epoch": 0.9766803840877915,
6236
+ "grad_norm": 0.13195830583572388,
6237
+ "learning_rate": 5.178791615289766e-07,
6238
+ "loss": 1.1235,
6239
+ "step": 890
6240
+ },
6241
+ {
6242
+ "epoch": 0.9777777777777777,
6243
+ "grad_norm": 0.13043729960918427,
6244
+ "learning_rate": 4.932182490752158e-07,
6245
+ "loss": 1.1237,
6246
+ "step": 891
6247
+ },
6248
+ {
6249
+ "epoch": 0.978875171467764,
6250
+ "grad_norm": 0.1450997292995453,
6251
+ "learning_rate": 4.6855733662145503e-07,
6252
+ "loss": 1.1434,
6253
+ "step": 892
6254
+ },
6255
+ {
6256
+ "epoch": 0.9799725651577503,
6257
+ "grad_norm": 0.12888318300247192,
6258
+ "learning_rate": 4.438964241676942e-07,
6259
+ "loss": 1.1535,
6260
+ "step": 893
6261
+ },
6262
+ {
6263
+ "epoch": 0.9810699588477366,
6264
+ "grad_norm": 0.1358352154493332,
6265
+ "learning_rate": 4.1923551171393343e-07,
6266
+ "loss": 1.0979,
6267
+ "step": 894
6268
+ },
6269
+ {
6270
+ "epoch": 0.9821673525377229,
6271
+ "grad_norm": 0.1400756686925888,
6272
+ "learning_rate": 3.9457459926017265e-07,
6273
+ "loss": 1.0588,
6274
+ "step": 895
6275
+ },
6276
+ {
6277
+ "epoch": 0.9832647462277092,
6278
+ "grad_norm": 0.1559644639492035,
6279
+ "learning_rate": 3.699136868064119e-07,
6280
+ "loss": 1.137,
6281
+ "step": 896
6282
+ },
6283
+ {
6284
+ "epoch": 0.9843621399176955,
6285
+ "grad_norm": 0.13535606861114502,
6286
+ "learning_rate": 3.4525277435265105e-07,
6287
+ "loss": 1.1953,
6288
+ "step": 897
6289
+ },
6290
+ {
6291
+ "epoch": 0.9854595336076818,
6292
+ "grad_norm": 0.1321636438369751,
6293
+ "learning_rate": 3.205918618988903e-07,
6294
+ "loss": 1.0905,
6295
+ "step": 898
6296
+ },
6297
+ {
6298
+ "epoch": 0.9865569272976681,
6299
+ "grad_norm": 0.14353665709495544,
6300
+ "learning_rate": 2.959309494451295e-07,
6301
+ "loss": 1.126,
6302
+ "step": 899
6303
+ },
6304
+ {
6305
+ "epoch": 0.9876543209876543,
6306
+ "grad_norm": 0.1350976675748825,
6307
+ "learning_rate": 2.712700369913687e-07,
6308
+ "loss": 1.1387,
6309
+ "step": 900
6310
+ },
6311
+ {
6312
+ "epoch": 0.9887517146776406,
6313
+ "grad_norm": 0.13206009566783905,
6314
+ "learning_rate": 2.466091245376079e-07,
6315
+ "loss": 1.0879,
6316
+ "step": 901
6317
+ },
6318
+ {
6319
+ "epoch": 0.9898491083676269,
6320
+ "grad_norm": 0.13680791854858398,
6321
+ "learning_rate": 2.219482120838471e-07,
6322
+ "loss": 1.1285,
6323
+ "step": 902
6324
+ },
6325
+ {
6326
+ "epoch": 0.9909465020576131,
6327
+ "grad_norm": 0.17720668017864227,
6328
+ "learning_rate": 1.9728729963008633e-07,
6329
+ "loss": 1.0461,
6330
+ "step": 903
6331
+ },
6332
+ {
6333
+ "epoch": 0.9920438957475994,
6334
+ "grad_norm": 0.14731575548648834,
6335
+ "learning_rate": 1.7262638717632553e-07,
6336
+ "loss": 1.0347,
6337
+ "step": 904
6338
+ },
6339
+ {
6340
+ "epoch": 0.9931412894375857,
6341
+ "grad_norm": 0.14285138249397278,
6342
+ "learning_rate": 1.4796547472256475e-07,
6343
+ "loss": 1.0603,
6344
+ "step": 905
6345
+ },
6346
+ {
6347
+ "epoch": 0.994238683127572,
6348
+ "grad_norm": 0.1308155655860901,
6349
+ "learning_rate": 1.2330456226880395e-07,
6350
+ "loss": 1.101,
6351
+ "step": 906
6352
+ },
6353
+ {
6354
+ "epoch": 0.9953360768175583,
6355
+ "grad_norm": 0.1355922371149063,
6356
+ "learning_rate": 9.864364981504316e-08,
6357
+ "loss": 1.2046,
6358
+ "step": 907
6359
+ },
6360
+ {
6361
+ "epoch": 0.9964334705075446,
6362
+ "grad_norm": 0.13389693200588226,
6363
+ "learning_rate": 7.398273736128238e-08,
6364
+ "loss": 1.1953,
6365
+ "step": 908
6366
+ },
6367
+ {
6368
+ "epoch": 0.9975308641975309,
6369
+ "grad_norm": 0.1277182251214981,
6370
+ "learning_rate": 4.932182490752158e-08,
6371
+ "loss": 1.1153,
6372
+ "step": 909
6373
+ },
6374
+ {
6375
+ "epoch": 0.9986282578875172,
6376
+ "grad_norm": 0.1549104005098343,
6377
+ "learning_rate": 2.466091245376079e-08,
6378
+ "loss": 1.0726,
6379
+ "step": 910
6380
+ },
6381
+ {
6382
+ "epoch": 0.9997256515775035,
6383
+ "grad_norm": 0.1443055421113968,
6384
+ "learning_rate": 0.0,
6385
+ "loss": 1.1051,
6386
+ "step": 911
6387
  }
6388
  ],
6389
  "logging_steps": 1,
 
6398
  "should_evaluate": false,
6399
  "should_log": false,
6400
  "should_save": true,
6401
+ "should_training_stop": true
6402
  },
6403
  "attributes": {}
6404
  }
6405
  },
6406
+ "total_flos": 9.455125939404595e+17,
6407
  "train_batch_size": 4,
6408
  "trial_name": null,
6409
  "trial_params": null