DuongTrongChi commited on
Commit
1fd37d8
·
verified ·
1 Parent(s): d08dbf0

Training in progress, step 475, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f29bece7f5d3a6d7983929b36fdd27d718d95657f793d9d5e6f3cfa88297f85
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76ebf58b46d7d773c0e60f3c3b3202c39175dc82e54c189f3267d0947c4a8ff
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afc3e3ee70cf903c1796f57d4e5235a441242375ef1f9a858eae3e8d05f28343
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b4cdbdbd72fca14cbd95d40faf9abb8a8ecb3ba4993c400be6e7b437b4b824
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4088348bc24d9e4ac19382802a5af616a59820aa6b3e226460dd20d33eb18c94
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b050456dfa3d81625079076f253a2e5a55a9198ab0c9ed74cbb8cd2fe6a1e442
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6340973427084284,
5
  "eval_steps": 500,
6
- "global_step": 434,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3045,6 +3045,293 @@
3045
  "learning_rate": 8.56164383561644e-06,
3046
  "loss": 1.1743,
3047
  "step": 434
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3048
  }
3049
  ],
3050
  "logging_steps": 1,
@@ -3064,7 +3351,7 @@
3064
  "attributes": {}
3065
  }
3066
  },
3067
- "total_flos": 4.8957186703303066e+17,
3068
  "train_batch_size": 4,
3069
  "trial_name": null,
3070
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6940005478951694,
5
  "eval_steps": 500,
6
+ "global_step": 475,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3045
  "learning_rate": 8.56164383561644e-06,
3046
  "loss": 1.1743,
3047
  "step": 434
3048
+ },
3049
+ {
3050
+ "epoch": 0.6355583964934709,
3051
+ "grad_norm": 0.1085817888379097,
3052
+ "learning_rate": 8.527397260273972e-06,
3053
+ "loss": 1.2021,
3054
+ "step": 435
3055
+ },
3056
+ {
3057
+ "epoch": 0.6370194502785134,
3058
+ "grad_norm": 0.10217051953077316,
3059
+ "learning_rate": 8.493150684931507e-06,
3060
+ "loss": 1.1815,
3061
+ "step": 436
3062
+ },
3063
+ {
3064
+ "epoch": 0.6384805040635558,
3065
+ "grad_norm": 0.11223044246435165,
3066
+ "learning_rate": 8.458904109589042e-06,
3067
+ "loss": 1.1238,
3068
+ "step": 437
3069
+ },
3070
+ {
3071
+ "epoch": 0.6399415578485983,
3072
+ "grad_norm": 0.10959354788064957,
3073
+ "learning_rate": 8.424657534246577e-06,
3074
+ "loss": 1.116,
3075
+ "step": 438
3076
+ },
3077
+ {
3078
+ "epoch": 0.6414026116336408,
3079
+ "grad_norm": 0.12990103662014008,
3080
+ "learning_rate": 8.39041095890411e-06,
3081
+ "loss": 1.1134,
3082
+ "step": 439
3083
+ },
3084
+ {
3085
+ "epoch": 0.6428636654186832,
3086
+ "grad_norm": 0.11417476832866669,
3087
+ "learning_rate": 8.356164383561644e-06,
3088
+ "loss": 1.2019,
3089
+ "step": 440
3090
+ },
3091
+ {
3092
+ "epoch": 0.6443247192037257,
3093
+ "grad_norm": 0.10849736630916595,
3094
+ "learning_rate": 8.32191780821918e-06,
3095
+ "loss": 1.1575,
3096
+ "step": 441
3097
+ },
3098
+ {
3099
+ "epoch": 0.6457857729887682,
3100
+ "grad_norm": 0.12259836494922638,
3101
+ "learning_rate": 8.287671232876712e-06,
3102
+ "loss": 1.1847,
3103
+ "step": 442
3104
+ },
3105
+ {
3106
+ "epoch": 0.6472468267738106,
3107
+ "grad_norm": 0.11938966810703278,
3108
+ "learning_rate": 8.253424657534247e-06,
3109
+ "loss": 1.2109,
3110
+ "step": 443
3111
+ },
3112
+ {
3113
+ "epoch": 0.6487078805588531,
3114
+ "grad_norm": 0.11072079837322235,
3115
+ "learning_rate": 8.219178082191782e-06,
3116
+ "loss": 1.1742,
3117
+ "step": 444
3118
+ },
3119
+ {
3120
+ "epoch": 0.6501689343438956,
3121
+ "grad_norm": 0.10626699030399323,
3122
+ "learning_rate": 8.184931506849316e-06,
3123
+ "loss": 1.1866,
3124
+ "step": 445
3125
+ },
3126
+ {
3127
+ "epoch": 0.651629988128938,
3128
+ "grad_norm": 0.109890878200531,
3129
+ "learning_rate": 8.150684931506851e-06,
3130
+ "loss": 1.1477,
3131
+ "step": 446
3132
+ },
3133
+ {
3134
+ "epoch": 0.6530910419139805,
3135
+ "grad_norm": 0.11042490601539612,
3136
+ "learning_rate": 8.116438356164384e-06,
3137
+ "loss": 1.2544,
3138
+ "step": 447
3139
+ },
3140
+ {
3141
+ "epoch": 0.654552095699023,
3142
+ "grad_norm": 0.11169801652431488,
3143
+ "learning_rate": 8.082191780821919e-06,
3144
+ "loss": 1.1274,
3145
+ "step": 448
3146
+ },
3147
+ {
3148
+ "epoch": 0.6560131494840654,
3149
+ "grad_norm": 0.10873094201087952,
3150
+ "learning_rate": 8.047945205479452e-06,
3151
+ "loss": 1.1965,
3152
+ "step": 449
3153
+ },
3154
+ {
3155
+ "epoch": 0.6574742032691079,
3156
+ "grad_norm": 0.11143123358488083,
3157
+ "learning_rate": 8.013698630136987e-06,
3158
+ "loss": 1.1708,
3159
+ "step": 450
3160
+ },
3161
+ {
3162
+ "epoch": 0.6589352570541503,
3163
+ "grad_norm": 0.12092313915491104,
3164
+ "learning_rate": 7.979452054794521e-06,
3165
+ "loss": 1.2115,
3166
+ "step": 451
3167
+ },
3168
+ {
3169
+ "epoch": 0.6603963108391928,
3170
+ "grad_norm": 0.1247633770108223,
3171
+ "learning_rate": 7.945205479452055e-06,
3172
+ "loss": 1.1683,
3173
+ "step": 452
3174
+ },
3175
+ {
3176
+ "epoch": 0.6618573646242353,
3177
+ "grad_norm": 0.11757193505764008,
3178
+ "learning_rate": 7.910958904109591e-06,
3179
+ "loss": 1.187,
3180
+ "step": 453
3181
+ },
3182
+ {
3183
+ "epoch": 0.6633184184092777,
3184
+ "grad_norm": 0.10670476406812668,
3185
+ "learning_rate": 7.876712328767124e-06,
3186
+ "loss": 1.0998,
3187
+ "step": 454
3188
+ },
3189
+ {
3190
+ "epoch": 0.6647794721943201,
3191
+ "grad_norm": 0.11120694130659103,
3192
+ "learning_rate": 7.842465753424659e-06,
3193
+ "loss": 1.1952,
3194
+ "step": 455
3195
+ },
3196
+ {
3197
+ "epoch": 0.6662405259793626,
3198
+ "grad_norm": 0.10676517337560654,
3199
+ "learning_rate": 7.808219178082192e-06,
3200
+ "loss": 1.1609,
3201
+ "step": 456
3202
+ },
3203
+ {
3204
+ "epoch": 0.667701579764405,
3205
+ "grad_norm": 0.10845296084880829,
3206
+ "learning_rate": 7.773972602739727e-06,
3207
+ "loss": 1.1445,
3208
+ "step": 457
3209
+ },
3210
+ {
3211
+ "epoch": 0.6691626335494475,
3212
+ "grad_norm": 0.1130744218826294,
3213
+ "learning_rate": 7.739726027397261e-06,
3214
+ "loss": 1.2327,
3215
+ "step": 458
3216
+ },
3217
+ {
3218
+ "epoch": 0.67062368733449,
3219
+ "grad_norm": 0.12214113771915436,
3220
+ "learning_rate": 7.705479452054794e-06,
3221
+ "loss": 1.2415,
3222
+ "step": 459
3223
+ },
3224
+ {
3225
+ "epoch": 0.6720847411195324,
3226
+ "grad_norm": 0.10830514878034592,
3227
+ "learning_rate": 7.671232876712329e-06,
3228
+ "loss": 1.2456,
3229
+ "step": 460
3230
+ },
3231
+ {
3232
+ "epoch": 0.6735457949045749,
3233
+ "grad_norm": 0.11725237220525742,
3234
+ "learning_rate": 7.636986301369864e-06,
3235
+ "loss": 1.1838,
3236
+ "step": 461
3237
+ },
3238
+ {
3239
+ "epoch": 0.6750068486896174,
3240
+ "grad_norm": 0.12461910396814346,
3241
+ "learning_rate": 7.6027397260273985e-06,
3242
+ "loss": 1.1989,
3243
+ "step": 462
3244
+ },
3245
+ {
3246
+ "epoch": 0.6764679024746598,
3247
+ "grad_norm": 0.11189593374729156,
3248
+ "learning_rate": 7.568493150684932e-06,
3249
+ "loss": 1.1218,
3250
+ "step": 463
3251
+ },
3252
+ {
3253
+ "epoch": 0.6779289562597023,
3254
+ "grad_norm": 0.1076999306678772,
3255
+ "learning_rate": 7.534246575342466e-06,
3256
+ "loss": 1.125,
3257
+ "step": 464
3258
+ },
3259
+ {
3260
+ "epoch": 0.6793900100447448,
3261
+ "grad_norm": 0.13751359283924103,
3262
+ "learning_rate": 7.500000000000001e-06,
3263
+ "loss": 1.1334,
3264
+ "step": 465
3265
+ },
3266
+ {
3267
+ "epoch": 0.6808510638297872,
3268
+ "grad_norm": 0.11828191578388214,
3269
+ "learning_rate": 7.465753424657535e-06,
3270
+ "loss": 1.2438,
3271
+ "step": 466
3272
+ },
3273
+ {
3274
+ "epoch": 0.6823121176148297,
3275
+ "grad_norm": 0.11072523146867752,
3276
+ "learning_rate": 7.431506849315069e-06,
3277
+ "loss": 1.2299,
3278
+ "step": 467
3279
+ },
3280
+ {
3281
+ "epoch": 0.6837731713998721,
3282
+ "grad_norm": 0.1260717362165451,
3283
+ "learning_rate": 7.397260273972603e-06,
3284
+ "loss": 1.2204,
3285
+ "step": 468
3286
+ },
3287
+ {
3288
+ "epoch": 0.6852342251849146,
3289
+ "grad_norm": 0.11779427528381348,
3290
+ "learning_rate": 7.3630136986301374e-06,
3291
+ "loss": 1.1887,
3292
+ "step": 469
3293
+ },
3294
+ {
3295
+ "epoch": 0.6866952789699571,
3296
+ "grad_norm": 0.11070991307497025,
3297
+ "learning_rate": 7.328767123287672e-06,
3298
+ "loss": 1.2137,
3299
+ "step": 470
3300
+ },
3301
+ {
3302
+ "epoch": 0.6881563327549995,
3303
+ "grad_norm": 0.11925278604030609,
3304
+ "learning_rate": 7.294520547945206e-06,
3305
+ "loss": 1.1415,
3306
+ "step": 471
3307
+ },
3308
+ {
3309
+ "epoch": 0.689617386540042,
3310
+ "grad_norm": 0.11368401348590851,
3311
+ "learning_rate": 7.260273972602741e-06,
3312
+ "loss": 1.2588,
3313
+ "step": 472
3314
+ },
3315
+ {
3316
+ "epoch": 0.6910784403250845,
3317
+ "grad_norm": 0.11111228913068771,
3318
+ "learning_rate": 7.226027397260275e-06,
3319
+ "loss": 1.148,
3320
+ "step": 473
3321
+ },
3322
+ {
3323
+ "epoch": 0.6925394941101269,
3324
+ "grad_norm": 0.12571550905704498,
3325
+ "learning_rate": 7.191780821917809e-06,
3326
+ "loss": 1.167,
3327
+ "step": 474
3328
+ },
3329
+ {
3330
+ "epoch": 0.6940005478951694,
3331
+ "grad_norm": 0.11622565984725952,
3332
+ "learning_rate": 7.1575342465753425e-06,
3333
+ "loss": 1.2028,
3334
+ "step": 475
3335
  }
3336
  ],
3337
  "logging_steps": 1,
 
3351
  "attributes": {}
3352
  }
3353
  },
3354
+ "total_flos": 5.357403510693028e+17,
3355
  "train_batch_size": 4,
3356
  "trial_name": null,
3357
  "trial_params": null