Training in progress, step 775, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0943a41a553462c4d372d11f0db7e7ac74b82191589741e8cc65f7c9bf91ac59
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee780c3dfac7ae5d183bedef0b48a628ef423302dc70107b1ca832ddb1dcaca5
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7deeaf0e67bca4eb52e599acdab6e6d26eff4b93874f78b39bfb4318b5785931
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5264,6 +5264,174 @@
|
|
5264 |
"learning_rate": 3.9457459926017264e-06,
|
5265 |
"loss": 1.2137,
|
5266 |
"step": 751
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5267 |
}
|
5268 |
],
|
5269 |
"logging_steps": 1,
|
@@ -5283,7 +5451,7 @@
|
|
5283 |
"attributes": {}
|
5284 |
}
|
5285 |
},
|
5286 |
-
"total_flos":
|
5287 |
"train_batch_size": 4,
|
5288 |
"trial_name": null,
|
5289 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.850480109739369,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 775,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5264 |
"learning_rate": 3.9457459926017264e-06,
|
5265 |
"loss": 1.2137,
|
5266 |
"step": 751
|
5267 |
+
},
|
5268 |
+
{
|
5269 |
+
"epoch": 0.8252400548696845,
|
5270 |
+
"grad_norm": 0.14159835875034332,
|
5271 |
+
"learning_rate": 3.921085080147966e-06,
|
5272 |
+
"loss": 1.1786,
|
5273 |
+
"step": 752
|
5274 |
+
},
|
5275 |
+
{
|
5276 |
+
"epoch": 0.8263374485596707,
|
5277 |
+
"grad_norm": 0.13204412162303925,
|
5278 |
+
"learning_rate": 3.896424167694205e-06,
|
5279 |
+
"loss": 1.0827,
|
5280 |
+
"step": 753
|
5281 |
+
},
|
5282 |
+
{
|
5283 |
+
"epoch": 0.827434842249657,
|
5284 |
+
"grad_norm": 0.14544348418712616,
|
5285 |
+
"learning_rate": 3.871763255240444e-06,
|
5286 |
+
"loss": 1.2168,
|
5287 |
+
"step": 754
|
5288 |
+
},
|
5289 |
+
{
|
5290 |
+
"epoch": 0.8285322359396433,
|
5291 |
+
"grad_norm": 0.13837821781635284,
|
5292 |
+
"learning_rate": 3.847102342786683e-06,
|
5293 |
+
"loss": 1.1306,
|
5294 |
+
"step": 755
|
5295 |
+
},
|
5296 |
+
{
|
5297 |
+
"epoch": 0.8296296296296296,
|
5298 |
+
"grad_norm": 0.13542281091213226,
|
5299 |
+
"learning_rate": 3.822441430332923e-06,
|
5300 |
+
"loss": 1.065,
|
5301 |
+
"step": 756
|
5302 |
+
},
|
5303 |
+
{
|
5304 |
+
"epoch": 0.8307270233196159,
|
5305 |
+
"grad_norm": 0.1563270390033722,
|
5306 |
+
"learning_rate": 3.7977805178791616e-06,
|
5307 |
+
"loss": 1.0928,
|
5308 |
+
"step": 757
|
5309 |
+
},
|
5310 |
+
{
|
5311 |
+
"epoch": 0.8318244170096022,
|
5312 |
+
"grad_norm": 0.1355254054069519,
|
5313 |
+
"learning_rate": 3.773119605425401e-06,
|
5314 |
+
"loss": 1.1382,
|
5315 |
+
"step": 758
|
5316 |
+
},
|
5317 |
+
{
|
5318 |
+
"epoch": 0.8329218106995885,
|
5319 |
+
"grad_norm": 0.14081105589866638,
|
5320 |
+
"learning_rate": 3.7484586929716402e-06,
|
5321 |
+
"loss": 1.0408,
|
5322 |
+
"step": 759
|
5323 |
+
},
|
5324 |
+
{
|
5325 |
+
"epoch": 0.8340192043895748,
|
5326 |
+
"grad_norm": 0.14367350935935974,
|
5327 |
+
"learning_rate": 3.723797780517879e-06,
|
5328 |
+
"loss": 1.12,
|
5329 |
+
"step": 760
|
5330 |
+
},
|
5331 |
+
{
|
5332 |
+
"epoch": 0.8351165980795611,
|
5333 |
+
"grad_norm": 0.13955897092819214,
|
5334 |
+
"learning_rate": 3.699136868064119e-06,
|
5335 |
+
"loss": 1.1122,
|
5336 |
+
"step": 761
|
5337 |
+
},
|
5338 |
+
{
|
5339 |
+
"epoch": 0.8362139917695474,
|
5340 |
+
"grad_norm": 0.13528084754943848,
|
5341 |
+
"learning_rate": 3.674475955610358e-06,
|
5342 |
+
"loss": 1.1463,
|
5343 |
+
"step": 762
|
5344 |
+
},
|
5345 |
+
{
|
5346 |
+
"epoch": 0.8373113854595337,
|
5347 |
+
"grad_norm": 0.13060660660266876,
|
5348 |
+
"learning_rate": 3.649815043156597e-06,
|
5349 |
+
"loss": 1.088,
|
5350 |
+
"step": 763
|
5351 |
+
},
|
5352 |
+
{
|
5353 |
+
"epoch": 0.83840877914952,
|
5354 |
+
"grad_norm": 0.14304772019386292,
|
5355 |
+
"learning_rate": 3.6251541307028365e-06,
|
5356 |
+
"loss": 1.13,
|
5357 |
+
"step": 764
|
5358 |
+
},
|
5359 |
+
{
|
5360 |
+
"epoch": 0.8395061728395061,
|
5361 |
+
"grad_norm": 0.129106804728508,
|
5362 |
+
"learning_rate": 3.6004932182490754e-06,
|
5363 |
+
"loss": 1.0758,
|
5364 |
+
"step": 765
|
5365 |
+
},
|
5366 |
+
{
|
5367 |
+
"epoch": 0.8406035665294924,
|
5368 |
+
"grad_norm": 0.14966481924057007,
|
5369 |
+
"learning_rate": 3.5758323057953147e-06,
|
5370 |
+
"loss": 1.051,
|
5371 |
+
"step": 766
|
5372 |
+
},
|
5373 |
+
{
|
5374 |
+
"epoch": 0.8417009602194787,
|
5375 |
+
"grad_norm": 0.13731549680233002,
|
5376 |
+
"learning_rate": 3.5511713933415536e-06,
|
5377 |
+
"loss": 1.1467,
|
5378 |
+
"step": 767
|
5379 |
+
},
|
5380 |
+
{
|
5381 |
+
"epoch": 0.842798353909465,
|
5382 |
+
"grad_norm": 0.16249963641166687,
|
5383 |
+
"learning_rate": 3.526510480887793e-06,
|
5384 |
+
"loss": 1.1939,
|
5385 |
+
"step": 768
|
5386 |
+
},
|
5387 |
+
{
|
5388 |
+
"epoch": 0.8438957475994513,
|
5389 |
+
"grad_norm": 0.1546361893415451,
|
5390 |
+
"learning_rate": 3.5018495684340327e-06,
|
5391 |
+
"loss": 1.1762,
|
5392 |
+
"step": 769
|
5393 |
+
},
|
5394 |
+
{
|
5395 |
+
"epoch": 0.8449931412894376,
|
5396 |
+
"grad_norm": 0.1352168768644333,
|
5397 |
+
"learning_rate": 3.4771886559802716e-06,
|
5398 |
+
"loss": 1.1351,
|
5399 |
+
"step": 770
|
5400 |
+
},
|
5401 |
+
{
|
5402 |
+
"epoch": 0.8460905349794239,
|
5403 |
+
"grad_norm": 0.13795001804828644,
|
5404 |
+
"learning_rate": 3.452527743526511e-06,
|
5405 |
+
"loss": 1.0621,
|
5406 |
+
"step": 771
|
5407 |
+
},
|
5408 |
+
{
|
5409 |
+
"epoch": 0.8471879286694102,
|
5410 |
+
"grad_norm": 0.13399291038513184,
|
5411 |
+
"learning_rate": 3.42786683107275e-06,
|
5412 |
+
"loss": 1.1674,
|
5413 |
+
"step": 772
|
5414 |
+
},
|
5415 |
+
{
|
5416 |
+
"epoch": 0.8482853223593965,
|
5417 |
+
"grad_norm": 0.1293582171201706,
|
5418 |
+
"learning_rate": 3.403205918618989e-06,
|
5419 |
+
"loss": 1.1216,
|
5420 |
+
"step": 773
|
5421 |
+
},
|
5422 |
+
{
|
5423 |
+
"epoch": 0.8493827160493828,
|
5424 |
+
"grad_norm": 0.13657528162002563,
|
5425 |
+
"learning_rate": 3.3785450061652285e-06,
|
5426 |
+
"loss": 1.1037,
|
5427 |
+
"step": 774
|
5428 |
+
},
|
5429 |
+
{
|
5430 |
+
"epoch": 0.850480109739369,
|
5431 |
+
"grad_norm": 0.14344428479671478,
|
5432 |
+
"learning_rate": 3.3538840937114674e-06,
|
5433 |
+
"loss": 1.1299,
|
5434 |
+
"step": 775
|
5435 |
}
|
5436 |
],
|
5437 |
"logging_steps": 1,
|
|
|
5451 |
"attributes": {}
|
5452 |
}
|
5453 |
},
|
5454 |
+
"total_flos": 8.037403550740316e+17,
|
5455 |
"train_batch_size": 4,
|
5456 |
"trial_name": null,
|
5457 |
"trial_params": null
|