Training in progress, step 475, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a76ebf58b46d7d773c0e60f3c3b3202c39175dc82e54c189f3267d0947c4a8ff
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62b4cdbdbd72fca14cbd95d40faf9abb8a8ecb3ba4993c400be6e7b437b4b824
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b050456dfa3d81625079076f253a2e5a55a9198ab0c9ed74cbb8cd2fe6a1e442
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3045,6 +3045,293 @@
|
|
3045 |
"learning_rate": 8.56164383561644e-06,
|
3046 |
"loss": 1.1743,
|
3047 |
"step": 434
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3048 |
}
|
3049 |
],
|
3050 |
"logging_steps": 1,
|
@@ -3064,7 +3351,7 @@
|
|
3064 |
"attributes": {}
|
3065 |
}
|
3066 |
},
|
3067 |
-
"total_flos":
|
3068 |
"train_batch_size": 4,
|
3069 |
"trial_name": null,
|
3070 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6940005478951694,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 475,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3045 |
"learning_rate": 8.56164383561644e-06,
|
3046 |
"loss": 1.1743,
|
3047 |
"step": 434
|
3048 |
+
},
|
3049 |
+
{
|
3050 |
+
"epoch": 0.6355583964934709,
|
3051 |
+
"grad_norm": 0.1085817888379097,
|
3052 |
+
"learning_rate": 8.527397260273972e-06,
|
3053 |
+
"loss": 1.2021,
|
3054 |
+
"step": 435
|
3055 |
+
},
|
3056 |
+
{
|
3057 |
+
"epoch": 0.6370194502785134,
|
3058 |
+
"grad_norm": 0.10217051953077316,
|
3059 |
+
"learning_rate": 8.493150684931507e-06,
|
3060 |
+
"loss": 1.1815,
|
3061 |
+
"step": 436
|
3062 |
+
},
|
3063 |
+
{
|
3064 |
+
"epoch": 0.6384805040635558,
|
3065 |
+
"grad_norm": 0.11223044246435165,
|
3066 |
+
"learning_rate": 8.458904109589042e-06,
|
3067 |
+
"loss": 1.1238,
|
3068 |
+
"step": 437
|
3069 |
+
},
|
3070 |
+
{
|
3071 |
+
"epoch": 0.6399415578485983,
|
3072 |
+
"grad_norm": 0.10959354788064957,
|
3073 |
+
"learning_rate": 8.424657534246577e-06,
|
3074 |
+
"loss": 1.116,
|
3075 |
+
"step": 438
|
3076 |
+
},
|
3077 |
+
{
|
3078 |
+
"epoch": 0.6414026116336408,
|
3079 |
+
"grad_norm": 0.12990103662014008,
|
3080 |
+
"learning_rate": 8.39041095890411e-06,
|
3081 |
+
"loss": 1.1134,
|
3082 |
+
"step": 439
|
3083 |
+
},
|
3084 |
+
{
|
3085 |
+
"epoch": 0.6428636654186832,
|
3086 |
+
"grad_norm": 0.11417476832866669,
|
3087 |
+
"learning_rate": 8.356164383561644e-06,
|
3088 |
+
"loss": 1.2019,
|
3089 |
+
"step": 440
|
3090 |
+
},
|
3091 |
+
{
|
3092 |
+
"epoch": 0.6443247192037257,
|
3093 |
+
"grad_norm": 0.10849736630916595,
|
3094 |
+
"learning_rate": 8.32191780821918e-06,
|
3095 |
+
"loss": 1.1575,
|
3096 |
+
"step": 441
|
3097 |
+
},
|
3098 |
+
{
|
3099 |
+
"epoch": 0.6457857729887682,
|
3100 |
+
"grad_norm": 0.12259836494922638,
|
3101 |
+
"learning_rate": 8.287671232876712e-06,
|
3102 |
+
"loss": 1.1847,
|
3103 |
+
"step": 442
|
3104 |
+
},
|
3105 |
+
{
|
3106 |
+
"epoch": 0.6472468267738106,
|
3107 |
+
"grad_norm": 0.11938966810703278,
|
3108 |
+
"learning_rate": 8.253424657534247e-06,
|
3109 |
+
"loss": 1.2109,
|
3110 |
+
"step": 443
|
3111 |
+
},
|
3112 |
+
{
|
3113 |
+
"epoch": 0.6487078805588531,
|
3114 |
+
"grad_norm": 0.11072079837322235,
|
3115 |
+
"learning_rate": 8.219178082191782e-06,
|
3116 |
+
"loss": 1.1742,
|
3117 |
+
"step": 444
|
3118 |
+
},
|
3119 |
+
{
|
3120 |
+
"epoch": 0.6501689343438956,
|
3121 |
+
"grad_norm": 0.10626699030399323,
|
3122 |
+
"learning_rate": 8.184931506849316e-06,
|
3123 |
+
"loss": 1.1866,
|
3124 |
+
"step": 445
|
3125 |
+
},
|
3126 |
+
{
|
3127 |
+
"epoch": 0.651629988128938,
|
3128 |
+
"grad_norm": 0.109890878200531,
|
3129 |
+
"learning_rate": 8.150684931506851e-06,
|
3130 |
+
"loss": 1.1477,
|
3131 |
+
"step": 446
|
3132 |
+
},
|
3133 |
+
{
|
3134 |
+
"epoch": 0.6530910419139805,
|
3135 |
+
"grad_norm": 0.11042490601539612,
|
3136 |
+
"learning_rate": 8.116438356164384e-06,
|
3137 |
+
"loss": 1.2544,
|
3138 |
+
"step": 447
|
3139 |
+
},
|
3140 |
+
{
|
3141 |
+
"epoch": 0.654552095699023,
|
3142 |
+
"grad_norm": 0.11169801652431488,
|
3143 |
+
"learning_rate": 8.082191780821919e-06,
|
3144 |
+
"loss": 1.1274,
|
3145 |
+
"step": 448
|
3146 |
+
},
|
3147 |
+
{
|
3148 |
+
"epoch": 0.6560131494840654,
|
3149 |
+
"grad_norm": 0.10873094201087952,
|
3150 |
+
"learning_rate": 8.047945205479452e-06,
|
3151 |
+
"loss": 1.1965,
|
3152 |
+
"step": 449
|
3153 |
+
},
|
3154 |
+
{
|
3155 |
+
"epoch": 0.6574742032691079,
|
3156 |
+
"grad_norm": 0.11143123358488083,
|
3157 |
+
"learning_rate": 8.013698630136987e-06,
|
3158 |
+
"loss": 1.1708,
|
3159 |
+
"step": 450
|
3160 |
+
},
|
3161 |
+
{
|
3162 |
+
"epoch": 0.6589352570541503,
|
3163 |
+
"grad_norm": 0.12092313915491104,
|
3164 |
+
"learning_rate": 7.979452054794521e-06,
|
3165 |
+
"loss": 1.2115,
|
3166 |
+
"step": 451
|
3167 |
+
},
|
3168 |
+
{
|
3169 |
+
"epoch": 0.6603963108391928,
|
3170 |
+
"grad_norm": 0.1247633770108223,
|
3171 |
+
"learning_rate": 7.945205479452055e-06,
|
3172 |
+
"loss": 1.1683,
|
3173 |
+
"step": 452
|
3174 |
+
},
|
3175 |
+
{
|
3176 |
+
"epoch": 0.6618573646242353,
|
3177 |
+
"grad_norm": 0.11757193505764008,
|
3178 |
+
"learning_rate": 7.910958904109591e-06,
|
3179 |
+
"loss": 1.187,
|
3180 |
+
"step": 453
|
3181 |
+
},
|
3182 |
+
{
|
3183 |
+
"epoch": 0.6633184184092777,
|
3184 |
+
"grad_norm": 0.10670476406812668,
|
3185 |
+
"learning_rate": 7.876712328767124e-06,
|
3186 |
+
"loss": 1.0998,
|
3187 |
+
"step": 454
|
3188 |
+
},
|
3189 |
+
{
|
3190 |
+
"epoch": 0.6647794721943201,
|
3191 |
+
"grad_norm": 0.11120694130659103,
|
3192 |
+
"learning_rate": 7.842465753424659e-06,
|
3193 |
+
"loss": 1.1952,
|
3194 |
+
"step": 455
|
3195 |
+
},
|
3196 |
+
{
|
3197 |
+
"epoch": 0.6662405259793626,
|
3198 |
+
"grad_norm": 0.10676517337560654,
|
3199 |
+
"learning_rate": 7.808219178082192e-06,
|
3200 |
+
"loss": 1.1609,
|
3201 |
+
"step": 456
|
3202 |
+
},
|
3203 |
+
{
|
3204 |
+
"epoch": 0.667701579764405,
|
3205 |
+
"grad_norm": 0.10845296084880829,
|
3206 |
+
"learning_rate": 7.773972602739727e-06,
|
3207 |
+
"loss": 1.1445,
|
3208 |
+
"step": 457
|
3209 |
+
},
|
3210 |
+
{
|
3211 |
+
"epoch": 0.6691626335494475,
|
3212 |
+
"grad_norm": 0.1130744218826294,
|
3213 |
+
"learning_rate": 7.739726027397261e-06,
|
3214 |
+
"loss": 1.2327,
|
3215 |
+
"step": 458
|
3216 |
+
},
|
3217 |
+
{
|
3218 |
+
"epoch": 0.67062368733449,
|
3219 |
+
"grad_norm": 0.12214113771915436,
|
3220 |
+
"learning_rate": 7.705479452054794e-06,
|
3221 |
+
"loss": 1.2415,
|
3222 |
+
"step": 459
|
3223 |
+
},
|
3224 |
+
{
|
3225 |
+
"epoch": 0.6720847411195324,
|
3226 |
+
"grad_norm": 0.10830514878034592,
|
3227 |
+
"learning_rate": 7.671232876712329e-06,
|
3228 |
+
"loss": 1.2456,
|
3229 |
+
"step": 460
|
3230 |
+
},
|
3231 |
+
{
|
3232 |
+
"epoch": 0.6735457949045749,
|
3233 |
+
"grad_norm": 0.11725237220525742,
|
3234 |
+
"learning_rate": 7.636986301369864e-06,
|
3235 |
+
"loss": 1.1838,
|
3236 |
+
"step": 461
|
3237 |
+
},
|
3238 |
+
{
|
3239 |
+
"epoch": 0.6750068486896174,
|
3240 |
+
"grad_norm": 0.12461910396814346,
|
3241 |
+
"learning_rate": 7.6027397260273985e-06,
|
3242 |
+
"loss": 1.1989,
|
3243 |
+
"step": 462
|
3244 |
+
},
|
3245 |
+
{
|
3246 |
+
"epoch": 0.6764679024746598,
|
3247 |
+
"grad_norm": 0.11189593374729156,
|
3248 |
+
"learning_rate": 7.568493150684932e-06,
|
3249 |
+
"loss": 1.1218,
|
3250 |
+
"step": 463
|
3251 |
+
},
|
3252 |
+
{
|
3253 |
+
"epoch": 0.6779289562597023,
|
3254 |
+
"grad_norm": 0.1076999306678772,
|
3255 |
+
"learning_rate": 7.534246575342466e-06,
|
3256 |
+
"loss": 1.125,
|
3257 |
+
"step": 464
|
3258 |
+
},
|
3259 |
+
{
|
3260 |
+
"epoch": 0.6793900100447448,
|
3261 |
+
"grad_norm": 0.13751359283924103,
|
3262 |
+
"learning_rate": 7.500000000000001e-06,
|
3263 |
+
"loss": 1.1334,
|
3264 |
+
"step": 465
|
3265 |
+
},
|
3266 |
+
{
|
3267 |
+
"epoch": 0.6808510638297872,
|
3268 |
+
"grad_norm": 0.11828191578388214,
|
3269 |
+
"learning_rate": 7.465753424657535e-06,
|
3270 |
+
"loss": 1.2438,
|
3271 |
+
"step": 466
|
3272 |
+
},
|
3273 |
+
{
|
3274 |
+
"epoch": 0.6823121176148297,
|
3275 |
+
"grad_norm": 0.11072523146867752,
|
3276 |
+
"learning_rate": 7.431506849315069e-06,
|
3277 |
+
"loss": 1.2299,
|
3278 |
+
"step": 467
|
3279 |
+
},
|
3280 |
+
{
|
3281 |
+
"epoch": 0.6837731713998721,
|
3282 |
+
"grad_norm": 0.1260717362165451,
|
3283 |
+
"learning_rate": 7.397260273972603e-06,
|
3284 |
+
"loss": 1.2204,
|
3285 |
+
"step": 468
|
3286 |
+
},
|
3287 |
+
{
|
3288 |
+
"epoch": 0.6852342251849146,
|
3289 |
+
"grad_norm": 0.11779427528381348,
|
3290 |
+
"learning_rate": 7.3630136986301374e-06,
|
3291 |
+
"loss": 1.1887,
|
3292 |
+
"step": 469
|
3293 |
+
},
|
3294 |
+
{
|
3295 |
+
"epoch": 0.6866952789699571,
|
3296 |
+
"grad_norm": 0.11070991307497025,
|
3297 |
+
"learning_rate": 7.328767123287672e-06,
|
3298 |
+
"loss": 1.2137,
|
3299 |
+
"step": 470
|
3300 |
+
},
|
3301 |
+
{
|
3302 |
+
"epoch": 0.6881563327549995,
|
3303 |
+
"grad_norm": 0.11925278604030609,
|
3304 |
+
"learning_rate": 7.294520547945206e-06,
|
3305 |
+
"loss": 1.1415,
|
3306 |
+
"step": 471
|
3307 |
+
},
|
3308 |
+
{
|
3309 |
+
"epoch": 0.689617386540042,
|
3310 |
+
"grad_norm": 0.11368401348590851,
|
3311 |
+
"learning_rate": 7.260273972602741e-06,
|
3312 |
+
"loss": 1.2588,
|
3313 |
+
"step": 472
|
3314 |
+
},
|
3315 |
+
{
|
3316 |
+
"epoch": 0.6910784403250845,
|
3317 |
+
"grad_norm": 0.11111228913068771,
|
3318 |
+
"learning_rate": 7.226027397260275e-06,
|
3319 |
+
"loss": 1.148,
|
3320 |
+
"step": 473
|
3321 |
+
},
|
3322 |
+
{
|
3323 |
+
"epoch": 0.6925394941101269,
|
3324 |
+
"grad_norm": 0.12571550905704498,
|
3325 |
+
"learning_rate": 7.191780821917809e-06,
|
3326 |
+
"loss": 1.167,
|
3327 |
+
"step": 474
|
3328 |
+
},
|
3329 |
+
{
|
3330 |
+
"epoch": 0.6940005478951694,
|
3331 |
+
"grad_norm": 0.11622565984725952,
|
3332 |
+
"learning_rate": 7.1575342465753425e-06,
|
3333 |
+
"loss": 1.2028,
|
3334 |
+
"step": 475
|
3335 |
}
|
3336 |
],
|
3337 |
"logging_steps": 1,
|
|
|
3351 |
"attributes": {}
|
3352 |
}
|
3353 |
},
|
3354 |
+
"total_flos": 5.357403510693028e+17,
|
3355 |
"train_batch_size": 4,
|
3356 |
"trial_name": null,
|
3357 |
"trial_params": null
|