DuongTrongChi commited on
Commit
c4433b6
·
verified ·
1 Parent(s): a5861d0

Training in progress, step 468, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:061c854124c7de3bd91c0cfc837955b45adbf61e56d4945a11afee8302238f82
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d3b69465f580520acc9f202ffa4379391ff40a467272aa1bb1f587492ab0ce6
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5f296aecef4fc444fa010d18c111b83dd4188df7ebba16d0170d94f0cace3fa
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a355e0aa5b1f268bc54e7ab331360ae4e94422b16cd818e29aa21183cfe7a9
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bb4c66b605a6bb121ef17d8f0d98eeb35f0d2d7ca95ea55a77f54d5a44ec986
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f65e9bb5e56bd8df1e486444b8ee6dae7ecf218b93c8b810d2422fba26163752
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4927297668038409,
5
  "eval_steps": 500,
6
- "global_step": 449,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3150,6 +3150,139 @@
3150
  "learning_rate": 1.1393341553637486e-05,
3151
  "loss": 1.1277,
3152
  "step": 449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3153
  }
3154
  ],
3155
  "logging_steps": 1,
@@ -3169,7 +3302,7 @@
3169
  "attributes": {}
3170
  }
3171
  },
3172
- "total_flos": 4.664204424013824e+17,
3173
  "train_batch_size": 4,
3174
  "trial_name": null,
3175
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5135802469135803,
5
  "eval_steps": 500,
6
+ "global_step": 468,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3150
  "learning_rate": 1.1393341553637486e-05,
3151
  "loss": 1.1277,
3152
  "step": 449
3153
+ },
3154
+ {
3155
+ "epoch": 0.49382716049382713,
3156
+ "grad_norm": 0.13308392465114594,
3157
+ "learning_rate": 1.1368680641183724e-05,
3158
+ "loss": 1.0997,
3159
+ "step": 450
3160
+ },
3161
+ {
3162
+ "epoch": 0.4949245541838134,
3163
+ "grad_norm": 0.11568623036146164,
3164
+ "learning_rate": 1.1344019728729965e-05,
3165
+ "loss": 1.2082,
3166
+ "step": 451
3167
+ },
3168
+ {
3169
+ "epoch": 0.4960219478737997,
3170
+ "grad_norm": 0.12799036502838135,
3171
+ "learning_rate": 1.1319358816276202e-05,
3172
+ "loss": 1.1844,
3173
+ "step": 452
3174
+ },
3175
+ {
3176
+ "epoch": 0.497119341563786,
3177
+ "grad_norm": 0.12493016570806503,
3178
+ "learning_rate": 1.1294697903822443e-05,
3179
+ "loss": 1.2041,
3180
+ "step": 453
3181
+ },
3182
+ {
3183
+ "epoch": 0.4982167352537723,
3184
+ "grad_norm": 0.12631264328956604,
3185
+ "learning_rate": 1.1270036991368682e-05,
3186
+ "loss": 1.0397,
3187
+ "step": 454
3188
+ },
3189
+ {
3190
+ "epoch": 0.4993141289437586,
3191
+ "grad_norm": 0.12955130636692047,
3192
+ "learning_rate": 1.124537607891492e-05,
3193
+ "loss": 1.0504,
3194
+ "step": 455
3195
+ },
3196
+ {
3197
+ "epoch": 0.5004115226337449,
3198
+ "grad_norm": 0.12372354418039322,
3199
+ "learning_rate": 1.122071516646116e-05,
3200
+ "loss": 1.1982,
3201
+ "step": 456
3202
+ },
3203
+ {
3204
+ "epoch": 0.5015089163237312,
3205
+ "grad_norm": 0.13814988732337952,
3206
+ "learning_rate": 1.11960542540074e-05,
3207
+ "loss": 1.201,
3208
+ "step": 457
3209
+ },
3210
+ {
3211
+ "epoch": 0.5026063100137175,
3212
+ "grad_norm": 0.11566805094480515,
3213
+ "learning_rate": 1.1171393341553637e-05,
3214
+ "loss": 1.1334,
3215
+ "step": 458
3216
+ },
3217
+ {
3218
+ "epoch": 0.5037037037037037,
3219
+ "grad_norm": 0.11871378123760223,
3220
+ "learning_rate": 1.1146732429099878e-05,
3221
+ "loss": 1.1315,
3222
+ "step": 459
3223
+ },
3224
+ {
3225
+ "epoch": 0.50480109739369,
3226
+ "grad_norm": 0.12469706684350967,
3227
+ "learning_rate": 1.1122071516646115e-05,
3228
+ "loss": 1.1309,
3229
+ "step": 460
3230
+ },
3231
+ {
3232
+ "epoch": 0.5058984910836762,
3233
+ "grad_norm": 0.12486052513122559,
3234
+ "learning_rate": 1.1097410604192356e-05,
3235
+ "loss": 1.0966,
3236
+ "step": 461
3237
+ },
3238
+ {
3239
+ "epoch": 0.5069958847736625,
3240
+ "grad_norm": 0.12366752326488495,
3241
+ "learning_rate": 1.1072749691738596e-05,
3242
+ "loss": 1.1861,
3243
+ "step": 462
3244
+ },
3245
+ {
3246
+ "epoch": 0.5080932784636488,
3247
+ "grad_norm": 0.1204606145620346,
3248
+ "learning_rate": 1.1048088779284834e-05,
3249
+ "loss": 1.1,
3250
+ "step": 463
3251
+ },
3252
+ {
3253
+ "epoch": 0.5091906721536351,
3254
+ "grad_norm": 0.15034319460391998,
3255
+ "learning_rate": 1.1023427866831074e-05,
3256
+ "loss": 1.1683,
3257
+ "step": 464
3258
+ },
3259
+ {
3260
+ "epoch": 0.5102880658436214,
3261
+ "grad_norm": 0.1372024267911911,
3262
+ "learning_rate": 1.0998766954377313e-05,
3263
+ "loss": 1.1134,
3264
+ "step": 465
3265
+ },
3266
+ {
3267
+ "epoch": 0.5113854595336077,
3268
+ "grad_norm": 0.13857926428318024,
3269
+ "learning_rate": 1.097410604192355e-05,
3270
+ "loss": 1.1922,
3271
+ "step": 466
3272
+ },
3273
+ {
3274
+ "epoch": 0.512482853223594,
3275
+ "grad_norm": 0.1584538072347641,
3276
+ "learning_rate": 1.0949445129469791e-05,
3277
+ "loss": 1.139,
3278
+ "step": 467
3279
+ },
3280
+ {
3281
+ "epoch": 0.5135802469135803,
3282
+ "grad_norm": 0.14659465849399567,
3283
+ "learning_rate": 1.0924784217016032e-05,
3284
+ "loss": 1.1054,
3285
+ "step": 468
3286
  }
3287
  ],
3288
  "logging_steps": 1,
 
3302
  "attributes": {}
3303
  }
3304
  },
3305
+ "total_flos": 4.861679479484129e+17,
3306
  "train_batch_size": 4,
3307
  "trial_name": null,
3308
  "trial_params": null