Upload folder using huggingface_hub
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1402 -2
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1871270621
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49b325f279a618ef1e84ca54855d06e1d7d0d6988cdbc57f7de78bae805f5d56
|
3 |
size 1871270621
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 950390010
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e35810c6378a8ff0a6f90fada4aa175d65b30e620d245d984156f060fe0ef804
|
3 |
size 950390010
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:061c3f32322b4bfb8a669efd69a5c77ef588488103eef1c71d7da816d8cd47b3
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f822c379d510590e26ea8c9ab9d921762a1e9d7661efc14719772acc2f21f941
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -22407,6 +22407,1406 @@
|
|
22407 |
"learning_rate": 4.988068499954578e-05,
|
22408 |
"loss": 106.1032,
|
22409 |
"step": 32000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22410 |
}
|
22411 |
],
|
22412 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.13736430224994647,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 34000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
22407 |
"learning_rate": 4.988068499954578e-05,
|
22408 |
"loss": 106.1032,
|
22409 |
"step": 32000
|
22410 |
+
},
|
22411 |
+
{
|
22412 |
+
"epoch": 0.12932445044178784,
|
22413 |
+
"grad_norm": 1423.832763671875,
|
22414 |
+
"learning_rate": 4.9880344128437115e-05,
|
22415 |
+
"loss": 138.5585,
|
22416 |
+
"step": 32010
|
22417 |
+
},
|
22418 |
+
{
|
22419 |
+
"epoch": 0.12936485170715548,
|
22420 |
+
"grad_norm": 2598.307373046875,
|
22421 |
+
"learning_rate": 4.988000277227334e-05,
|
22422 |
+
"loss": 115.3402,
|
22423 |
+
"step": 32020
|
22424 |
+
},
|
22425 |
+
{
|
22426 |
+
"epoch": 0.1294052529725231,
|
22427 |
+
"grad_norm": 1134.399169921875,
|
22428 |
+
"learning_rate": 4.987966093106109e-05,
|
22429 |
+
"loss": 90.1487,
|
22430 |
+
"step": 32030
|
22431 |
+
},
|
22432 |
+
{
|
22433 |
+
"epoch": 0.12944565423789073,
|
22434 |
+
"grad_norm": 765.7633056640625,
|
22435 |
+
"learning_rate": 4.987931860480705e-05,
|
22436 |
+
"loss": 100.8343,
|
22437 |
+
"step": 32040
|
22438 |
+
},
|
22439 |
+
{
|
22440 |
+
"epoch": 0.12948605550325837,
|
22441 |
+
"grad_norm": 1252.3284912109375,
|
22442 |
+
"learning_rate": 4.987897579351788e-05,
|
22443 |
+
"loss": 78.8595,
|
22444 |
+
"step": 32050
|
22445 |
+
},
|
22446 |
+
{
|
22447 |
+
"epoch": 0.12952645676862598,
|
22448 |
+
"grad_norm": 4336.3466796875,
|
22449 |
+
"learning_rate": 4.987863249720027e-05,
|
22450 |
+
"loss": 95.2753,
|
22451 |
+
"step": 32060
|
22452 |
+
},
|
22453 |
+
{
|
22454 |
+
"epoch": 0.12956685803399362,
|
22455 |
+
"grad_norm": 1328.06396484375,
|
22456 |
+
"learning_rate": 4.987828871586091e-05,
|
22457 |
+
"loss": 101.3385,
|
22458 |
+
"step": 32070
|
22459 |
+
},
|
22460 |
+
{
|
22461 |
+
"epoch": 0.12960725929936126,
|
22462 |
+
"grad_norm": 520.3338012695312,
|
22463 |
+
"learning_rate": 4.987794444950651e-05,
|
22464 |
+
"loss": 153.1254,
|
22465 |
+
"step": 32080
|
22466 |
+
},
|
22467 |
+
{
|
22468 |
+
"epoch": 0.1296476605647289,
|
22469 |
+
"grad_norm": 694.16015625,
|
22470 |
+
"learning_rate": 4.987759969814377e-05,
|
22471 |
+
"loss": 93.5572,
|
22472 |
+
"step": 32090
|
22473 |
+
},
|
22474 |
+
{
|
22475 |
+
"epoch": 0.1296880618300965,
|
22476 |
+
"grad_norm": 1401.1295166015625,
|
22477 |
+
"learning_rate": 4.987725446177941e-05,
|
22478 |
+
"loss": 93.8664,
|
22479 |
+
"step": 32100
|
22480 |
+
},
|
22481 |
+
{
|
22482 |
+
"epoch": 0.12972846309546415,
|
22483 |
+
"grad_norm": 928.5399780273438,
|
22484 |
+
"learning_rate": 4.9876908740420175e-05,
|
22485 |
+
"loss": 74.3067,
|
22486 |
+
"step": 32110
|
22487 |
+
},
|
22488 |
+
{
|
22489 |
+
"epoch": 0.1297688643608318,
|
22490 |
+
"grad_norm": 1556.6954345703125,
|
22491 |
+
"learning_rate": 4.98765625340728e-05,
|
22492 |
+
"loss": 85.9339,
|
22493 |
+
"step": 32120
|
22494 |
+
},
|
22495 |
+
{
|
22496 |
+
"epoch": 0.1298092656261994,
|
22497 |
+
"grad_norm": 783.3317260742188,
|
22498 |
+
"learning_rate": 4.987621584274402e-05,
|
22499 |
+
"loss": 85.9349,
|
22500 |
+
"step": 32130
|
22501 |
+
},
|
22502 |
+
{
|
22503 |
+
"epoch": 0.12984966689156704,
|
22504 |
+
"grad_norm": 423.84014892578125,
|
22505 |
+
"learning_rate": 4.9875868666440604e-05,
|
22506 |
+
"loss": 49.8796,
|
22507 |
+
"step": 32140
|
22508 |
+
},
|
22509 |
+
{
|
22510 |
+
"epoch": 0.12989006815693468,
|
22511 |
+
"grad_norm": 806.3920288085938,
|
22512 |
+
"learning_rate": 4.987552100516933e-05,
|
22513 |
+
"loss": 66.9737,
|
22514 |
+
"step": 32150
|
22515 |
+
},
|
22516 |
+
{
|
22517 |
+
"epoch": 0.1299304694223023,
|
22518 |
+
"grad_norm": 974.2014770507812,
|
22519 |
+
"learning_rate": 4.987517285893697e-05,
|
22520 |
+
"loss": 131.7362,
|
22521 |
+
"step": 32160
|
22522 |
+
},
|
22523 |
+
{
|
22524 |
+
"epoch": 0.12997087068766994,
|
22525 |
+
"grad_norm": 3982.545654296875,
|
22526 |
+
"learning_rate": 4.9874824227750305e-05,
|
22527 |
+
"loss": 130.7934,
|
22528 |
+
"step": 32170
|
22529 |
+
},
|
22530 |
+
{
|
22531 |
+
"epoch": 0.13001127195303758,
|
22532 |
+
"grad_norm": 936.3251953125,
|
22533 |
+
"learning_rate": 4.987447511161612e-05,
|
22534 |
+
"loss": 86.1245,
|
22535 |
+
"step": 32180
|
22536 |
+
},
|
22537 |
+
{
|
22538 |
+
"epoch": 0.1300516732184052,
|
22539 |
+
"grad_norm": 1825.6453857421875,
|
22540 |
+
"learning_rate": 4.987412551054126e-05,
|
22541 |
+
"loss": 95.4473,
|
22542 |
+
"step": 32190
|
22543 |
+
},
|
22544 |
+
{
|
22545 |
+
"epoch": 0.13009207448377283,
|
22546 |
+
"grad_norm": 1493.9964599609375,
|
22547 |
+
"learning_rate": 4.987377542453251e-05,
|
22548 |
+
"loss": 78.5434,
|
22549 |
+
"step": 32200
|
22550 |
+
},
|
22551 |
+
{
|
22552 |
+
"epoch": 0.13013247574914047,
|
22553 |
+
"grad_norm": 748.4346923828125,
|
22554 |
+
"learning_rate": 4.9873424853596695e-05,
|
22555 |
+
"loss": 87.2075,
|
22556 |
+
"step": 32210
|
22557 |
+
},
|
22558 |
+
{
|
22559 |
+
"epoch": 0.13017287701450808,
|
22560 |
+
"grad_norm": 1414.2127685546875,
|
22561 |
+
"learning_rate": 4.987307379774066e-05,
|
22562 |
+
"loss": 95.8589,
|
22563 |
+
"step": 32220
|
22564 |
+
},
|
22565 |
+
{
|
22566 |
+
"epoch": 0.13021327827987572,
|
22567 |
+
"grad_norm": 799.97705078125,
|
22568 |
+
"learning_rate": 4.987272225697125e-05,
|
22569 |
+
"loss": 94.7114,
|
22570 |
+
"step": 32230
|
22571 |
+
},
|
22572 |
+
{
|
22573 |
+
"epoch": 0.13025367954524336,
|
22574 |
+
"grad_norm": 2368.305419921875,
|
22575 |
+
"learning_rate": 4.987237023129531e-05,
|
22576 |
+
"loss": 93.2615,
|
22577 |
+
"step": 32240
|
22578 |
+
},
|
22579 |
+
{
|
22580 |
+
"epoch": 0.130294080810611,
|
22581 |
+
"grad_norm": 525.53759765625,
|
22582 |
+
"learning_rate": 4.987201772071971e-05,
|
22583 |
+
"loss": 100.8188,
|
22584 |
+
"step": 32250
|
22585 |
+
},
|
22586 |
+
{
|
22587 |
+
"epoch": 0.1303344820759786,
|
22588 |
+
"grad_norm": 1076.550048828125,
|
22589 |
+
"learning_rate": 4.9871664725251314e-05,
|
22590 |
+
"loss": 59.2903,
|
22591 |
+
"step": 32260
|
22592 |
+
},
|
22593 |
+
{
|
22594 |
+
"epoch": 0.13037488334134625,
|
22595 |
+
"grad_norm": 1305.9718017578125,
|
22596 |
+
"learning_rate": 4.987131124489701e-05,
|
22597 |
+
"loss": 62.7737,
|
22598 |
+
"step": 32270
|
22599 |
+
},
|
22600 |
+
{
|
22601 |
+
"epoch": 0.1304152846067139,
|
22602 |
+
"grad_norm": 1377.45458984375,
|
22603 |
+
"learning_rate": 4.98709572796637e-05,
|
22604 |
+
"loss": 98.2218,
|
22605 |
+
"step": 32280
|
22606 |
+
},
|
22607 |
+
{
|
22608 |
+
"epoch": 0.1304556858720815,
|
22609 |
+
"grad_norm": 1250.3414306640625,
|
22610 |
+
"learning_rate": 4.987060282955826e-05,
|
22611 |
+
"loss": 141.3949,
|
22612 |
+
"step": 32290
|
22613 |
+
},
|
22614 |
+
{
|
22615 |
+
"epoch": 0.13049608713744915,
|
22616 |
+
"grad_norm": 362.1710205078125,
|
22617 |
+
"learning_rate": 4.987024789458762e-05,
|
22618 |
+
"loss": 56.7362,
|
22619 |
+
"step": 32300
|
22620 |
+
},
|
22621 |
+
{
|
22622 |
+
"epoch": 0.13053648840281679,
|
22623 |
+
"grad_norm": 1179.3179931640625,
|
22624 |
+
"learning_rate": 4.9869892474758694e-05,
|
22625 |
+
"loss": 110.4925,
|
22626 |
+
"step": 32310
|
22627 |
+
},
|
22628 |
+
{
|
22629 |
+
"epoch": 0.1305768896681844,
|
22630 |
+
"grad_norm": 885.8596801757812,
|
22631 |
+
"learning_rate": 4.986953657007841e-05,
|
22632 |
+
"loss": 103.9362,
|
22633 |
+
"step": 32320
|
22634 |
+
},
|
22635 |
+
{
|
22636 |
+
"epoch": 0.13061729093355204,
|
22637 |
+
"grad_norm": 2053.674072265625,
|
22638 |
+
"learning_rate": 4.98691801805537e-05,
|
22639 |
+
"loss": 108.4903,
|
22640 |
+
"step": 32330
|
22641 |
+
},
|
22642 |
+
{
|
22643 |
+
"epoch": 0.13065769219891968,
|
22644 |
+
"grad_norm": 628.6939086914062,
|
22645 |
+
"learning_rate": 4.986882330619152e-05,
|
22646 |
+
"loss": 66.5057,
|
22647 |
+
"step": 32340
|
22648 |
+
},
|
22649 |
+
{
|
22650 |
+
"epoch": 0.1306980934642873,
|
22651 |
+
"grad_norm": 9528.09765625,
|
22652 |
+
"learning_rate": 4.986846594699883e-05,
|
22653 |
+
"loss": 103.5715,
|
22654 |
+
"step": 32350
|
22655 |
+
},
|
22656 |
+
{
|
22657 |
+
"epoch": 0.13073849472965493,
|
22658 |
+
"grad_norm": 0.0,
|
22659 |
+
"learning_rate": 4.9868108102982604e-05,
|
22660 |
+
"loss": 121.9924,
|
22661 |
+
"step": 32360
|
22662 |
+
},
|
22663 |
+
{
|
22664 |
+
"epoch": 0.13077889599502257,
|
22665 |
+
"grad_norm": 710.1858520507812,
|
22666 |
+
"learning_rate": 4.986774977414979e-05,
|
22667 |
+
"loss": 65.1207,
|
22668 |
+
"step": 32370
|
22669 |
+
},
|
22670 |
+
{
|
22671 |
+
"epoch": 0.13081929726039018,
|
22672 |
+
"grad_norm": 474.6985778808594,
|
22673 |
+
"learning_rate": 4.98673909605074e-05,
|
22674 |
+
"loss": 102.3303,
|
22675 |
+
"step": 32380
|
22676 |
+
},
|
22677 |
+
{
|
22678 |
+
"epoch": 0.13085969852575782,
|
22679 |
+
"grad_norm": 0.0,
|
22680 |
+
"learning_rate": 4.986703166206242e-05,
|
22681 |
+
"loss": 75.2703,
|
22682 |
+
"step": 32390
|
22683 |
+
},
|
22684 |
+
{
|
22685 |
+
"epoch": 0.13090009979112546,
|
22686 |
+
"grad_norm": 781.9612426757812,
|
22687 |
+
"learning_rate": 4.986667187882186e-05,
|
22688 |
+
"loss": 115.7585,
|
22689 |
+
"step": 32400
|
22690 |
+
},
|
22691 |
+
{
|
22692 |
+
"epoch": 0.1309405010564931,
|
22693 |
+
"grad_norm": 1456.1153564453125,
|
22694 |
+
"learning_rate": 4.986631161079272e-05,
|
22695 |
+
"loss": 119.8207,
|
22696 |
+
"step": 32410
|
22697 |
+
},
|
22698 |
+
{
|
22699 |
+
"epoch": 0.13098090232186071,
|
22700 |
+
"grad_norm": 767.140625,
|
22701 |
+
"learning_rate": 4.986595085798204e-05,
|
22702 |
+
"loss": 79.3004,
|
22703 |
+
"step": 32420
|
22704 |
+
},
|
22705 |
+
{
|
22706 |
+
"epoch": 0.13102130358722835,
|
22707 |
+
"grad_norm": 1090.2476806640625,
|
22708 |
+
"learning_rate": 4.9865589620396837e-05,
|
22709 |
+
"loss": 104.849,
|
22710 |
+
"step": 32430
|
22711 |
+
},
|
22712 |
+
{
|
22713 |
+
"epoch": 0.131061704852596,
|
22714 |
+
"grad_norm": 618.4625854492188,
|
22715 |
+
"learning_rate": 4.986522789804417e-05,
|
22716 |
+
"loss": 88.606,
|
22717 |
+
"step": 32440
|
22718 |
+
},
|
22719 |
+
{
|
22720 |
+
"epoch": 0.1311021061179636,
|
22721 |
+
"grad_norm": 1400.4354248046875,
|
22722 |
+
"learning_rate": 4.9864865690931086e-05,
|
22723 |
+
"loss": 112.4108,
|
22724 |
+
"step": 32450
|
22725 |
+
},
|
22726 |
+
{
|
22727 |
+
"epoch": 0.13114250738333125,
|
22728 |
+
"grad_norm": 1226.9744873046875,
|
22729 |
+
"learning_rate": 4.986450299906464e-05,
|
22730 |
+
"loss": 110.0555,
|
22731 |
+
"step": 32460
|
22732 |
+
},
|
22733 |
+
{
|
22734 |
+
"epoch": 0.1311829086486989,
|
22735 |
+
"grad_norm": 1217.69970703125,
|
22736 |
+
"learning_rate": 4.9864139822451905e-05,
|
22737 |
+
"loss": 105.7761,
|
22738 |
+
"step": 32470
|
22739 |
+
},
|
22740 |
+
{
|
22741 |
+
"epoch": 0.1312233099140665,
|
22742 |
+
"grad_norm": 944.0647583007812,
|
22743 |
+
"learning_rate": 4.9863776161099964e-05,
|
22744 |
+
"loss": 68.9442,
|
22745 |
+
"step": 32480
|
22746 |
+
},
|
22747 |
+
{
|
22748 |
+
"epoch": 0.13126371117943414,
|
22749 |
+
"grad_norm": 1241.845458984375,
|
22750 |
+
"learning_rate": 4.986341201501591e-05,
|
22751 |
+
"loss": 95.3951,
|
22752 |
+
"step": 32490
|
22753 |
+
},
|
22754 |
+
{
|
22755 |
+
"epoch": 0.13130411244480178,
|
22756 |
+
"grad_norm": 1810.86572265625,
|
22757 |
+
"learning_rate": 4.9863047384206835e-05,
|
22758 |
+
"loss": 132.1521,
|
22759 |
+
"step": 32500
|
22760 |
+
},
|
22761 |
+
{
|
22762 |
+
"epoch": 0.1313445137101694,
|
22763 |
+
"grad_norm": 1479.045166015625,
|
22764 |
+
"learning_rate": 4.986268226867985e-05,
|
22765 |
+
"loss": 109.1159,
|
22766 |
+
"step": 32510
|
22767 |
+
},
|
22768 |
+
{
|
22769 |
+
"epoch": 0.13138491497553703,
|
22770 |
+
"grad_norm": 958.012451171875,
|
22771 |
+
"learning_rate": 4.986231666844208e-05,
|
22772 |
+
"loss": 84.0408,
|
22773 |
+
"step": 32520
|
22774 |
+
},
|
22775 |
+
{
|
22776 |
+
"epoch": 0.13142531624090467,
|
22777 |
+
"grad_norm": 1029.1376953125,
|
22778 |
+
"learning_rate": 4.9861950583500636e-05,
|
22779 |
+
"loss": 161.0161,
|
22780 |
+
"step": 32530
|
22781 |
+
},
|
22782 |
+
{
|
22783 |
+
"epoch": 0.13146571750627228,
|
22784 |
+
"grad_norm": 2178.518798828125,
|
22785 |
+
"learning_rate": 4.986158401386268e-05,
|
22786 |
+
"loss": 125.5231,
|
22787 |
+
"step": 32540
|
22788 |
+
},
|
22789 |
+
{
|
22790 |
+
"epoch": 0.13150611877163992,
|
22791 |
+
"grad_norm": 873.2677612304688,
|
22792 |
+
"learning_rate": 4.9861216959535335e-05,
|
22793 |
+
"loss": 82.9297,
|
22794 |
+
"step": 32550
|
22795 |
+
},
|
22796 |
+
{
|
22797 |
+
"epoch": 0.13154652003700756,
|
22798 |
+
"grad_norm": 3118.9599609375,
|
22799 |
+
"learning_rate": 4.9860849420525766e-05,
|
22800 |
+
"loss": 133.3087,
|
22801 |
+
"step": 32560
|
22802 |
+
},
|
22803 |
+
{
|
22804 |
+
"epoch": 0.1315869213023752,
|
22805 |
+
"grad_norm": 342.8658752441406,
|
22806 |
+
"learning_rate": 4.986048139684114e-05,
|
22807 |
+
"loss": 77.4298,
|
22808 |
+
"step": 32570
|
22809 |
+
},
|
22810 |
+
{
|
22811 |
+
"epoch": 0.13162732256774282,
|
22812 |
+
"grad_norm": 1096.45458984375,
|
22813 |
+
"learning_rate": 4.986011288848863e-05,
|
22814 |
+
"loss": 64.3582,
|
22815 |
+
"step": 32580
|
22816 |
+
},
|
22817 |
+
{
|
22818 |
+
"epoch": 0.13166772383311046,
|
22819 |
+
"grad_norm": 802.4067993164062,
|
22820 |
+
"learning_rate": 4.9859743895475416e-05,
|
22821 |
+
"loss": 106.3808,
|
22822 |
+
"step": 32590
|
22823 |
+
},
|
22824 |
+
{
|
22825 |
+
"epoch": 0.1317081250984781,
|
22826 |
+
"grad_norm": 1518.53173828125,
|
22827 |
+
"learning_rate": 4.98593744178087e-05,
|
22828 |
+
"loss": 86.2783,
|
22829 |
+
"step": 32600
|
22830 |
+
},
|
22831 |
+
{
|
22832 |
+
"epoch": 0.1317485263638457,
|
22833 |
+
"grad_norm": 662.360107421875,
|
22834 |
+
"learning_rate": 4.985900445549568e-05,
|
22835 |
+
"loss": 127.7222,
|
22836 |
+
"step": 32610
|
22837 |
+
},
|
22838 |
+
{
|
22839 |
+
"epoch": 0.13178892762921335,
|
22840 |
+
"grad_norm": 1433.412109375,
|
22841 |
+
"learning_rate": 4.985863400854358e-05,
|
22842 |
+
"loss": 109.9828,
|
22843 |
+
"step": 32620
|
22844 |
+
},
|
22845 |
+
{
|
22846 |
+
"epoch": 0.131829328894581,
|
22847 |
+
"grad_norm": 524.9939575195312,
|
22848 |
+
"learning_rate": 4.98582630769596e-05,
|
22849 |
+
"loss": 114.5297,
|
22850 |
+
"step": 32630
|
22851 |
+
},
|
22852 |
+
{
|
22853 |
+
"epoch": 0.1318697301599486,
|
22854 |
+
"grad_norm": 432.99395751953125,
|
22855 |
+
"learning_rate": 4.9857891660750986e-05,
|
22856 |
+
"loss": 74.3833,
|
22857 |
+
"step": 32640
|
22858 |
+
},
|
22859 |
+
{
|
22860 |
+
"epoch": 0.13191013142531624,
|
22861 |
+
"grad_norm": 1059.305908203125,
|
22862 |
+
"learning_rate": 4.9857519759924974e-05,
|
22863 |
+
"loss": 103.7896,
|
22864 |
+
"step": 32650
|
22865 |
+
},
|
22866 |
+
{
|
22867 |
+
"epoch": 0.13195053269068388,
|
22868 |
+
"grad_norm": 1521.5408935546875,
|
22869 |
+
"learning_rate": 4.985714737448882e-05,
|
22870 |
+
"loss": 110.8204,
|
22871 |
+
"step": 32660
|
22872 |
+
},
|
22873 |
+
{
|
22874 |
+
"epoch": 0.1319909339560515,
|
22875 |
+
"grad_norm": 712.7562255859375,
|
22876 |
+
"learning_rate": 4.9856774504449776e-05,
|
22877 |
+
"loss": 77.0656,
|
22878 |
+
"step": 32670
|
22879 |
+
},
|
22880 |
+
{
|
22881 |
+
"epoch": 0.13203133522141913,
|
22882 |
+
"grad_norm": 738.4374389648438,
|
22883 |
+
"learning_rate": 4.9856401149815126e-05,
|
22884 |
+
"loss": 76.575,
|
22885 |
+
"step": 32680
|
22886 |
+
},
|
22887 |
+
{
|
22888 |
+
"epoch": 0.13207173648678677,
|
22889 |
+
"grad_norm": 521.0355834960938,
|
22890 |
+
"learning_rate": 4.9856027310592134e-05,
|
22891 |
+
"loss": 79.7847,
|
22892 |
+
"step": 32690
|
22893 |
+
},
|
22894 |
+
{
|
22895 |
+
"epoch": 0.13211213775215438,
|
22896 |
+
"grad_norm": 738.5291137695312,
|
22897 |
+
"learning_rate": 4.985565298678809e-05,
|
22898 |
+
"loss": 74.041,
|
22899 |
+
"step": 32700
|
22900 |
+
},
|
22901 |
+
{
|
22902 |
+
"epoch": 0.13215253901752202,
|
22903 |
+
"grad_norm": 1271.053955078125,
|
22904 |
+
"learning_rate": 4.985527817841029e-05,
|
22905 |
+
"loss": 87.5718,
|
22906 |
+
"step": 32710
|
22907 |
+
},
|
22908 |
+
{
|
22909 |
+
"epoch": 0.13219294028288966,
|
22910 |
+
"grad_norm": 1135.2099609375,
|
22911 |
+
"learning_rate": 4.985490288546606e-05,
|
22912 |
+
"loss": 121.2673,
|
22913 |
+
"step": 32720
|
22914 |
+
},
|
22915 |
+
{
|
22916 |
+
"epoch": 0.1322333415482573,
|
22917 |
+
"grad_norm": 866.3362426757812,
|
22918 |
+
"learning_rate": 4.9854527107962686e-05,
|
22919 |
+
"loss": 107.6865,
|
22920 |
+
"step": 32730
|
22921 |
+
},
|
22922 |
+
{
|
22923 |
+
"epoch": 0.13227374281362492,
|
22924 |
+
"grad_norm": 1969.1063232421875,
|
22925 |
+
"learning_rate": 4.985415084590752e-05,
|
22926 |
+
"loss": 79.8334,
|
22927 |
+
"step": 32740
|
22928 |
+
},
|
22929 |
+
{
|
22930 |
+
"epoch": 0.13231414407899256,
|
22931 |
+
"grad_norm": 1149.1275634765625,
|
22932 |
+
"learning_rate": 4.985377409930789e-05,
|
22933 |
+
"loss": 109.8118,
|
22934 |
+
"step": 32750
|
22935 |
+
},
|
22936 |
+
{
|
22937 |
+
"epoch": 0.1323545453443602,
|
22938 |
+
"grad_norm": 756.9971313476562,
|
22939 |
+
"learning_rate": 4.985339686817113e-05,
|
22940 |
+
"loss": 118.7679,
|
22941 |
+
"step": 32760
|
22942 |
+
},
|
22943 |
+
{
|
22944 |
+
"epoch": 0.1323949466097278,
|
22945 |
+
"grad_norm": 795.5819702148438,
|
22946 |
+
"learning_rate": 4.9853019152504607e-05,
|
22947 |
+
"loss": 91.6352,
|
22948 |
+
"step": 32770
|
22949 |
+
},
|
22950 |
+
{
|
22951 |
+
"epoch": 0.13243534787509545,
|
22952 |
+
"grad_norm": 1227.4793701171875,
|
22953 |
+
"learning_rate": 4.9852640952315674e-05,
|
22954 |
+
"loss": 60.6576,
|
22955 |
+
"step": 32780
|
22956 |
+
},
|
22957 |
+
{
|
22958 |
+
"epoch": 0.1324757491404631,
|
22959 |
+
"grad_norm": 1278.319091796875,
|
22960 |
+
"learning_rate": 4.985226226761172e-05,
|
22961 |
+
"loss": 113.7857,
|
22962 |
+
"step": 32790
|
22963 |
+
},
|
22964 |
+
{
|
22965 |
+
"epoch": 0.1325161504058307,
|
22966 |
+
"grad_norm": 610.24560546875,
|
22967 |
+
"learning_rate": 4.985188309840012e-05,
|
22968 |
+
"loss": 78.6529,
|
22969 |
+
"step": 32800
|
22970 |
+
},
|
22971 |
+
{
|
22972 |
+
"epoch": 0.13255655167119834,
|
22973 |
+
"grad_norm": 1682.79736328125,
|
22974 |
+
"learning_rate": 4.9851503444688255e-05,
|
22975 |
+
"loss": 107.8541,
|
22976 |
+
"step": 32810
|
22977 |
+
},
|
22978 |
+
{
|
22979 |
+
"epoch": 0.13259695293656598,
|
22980 |
+
"grad_norm": 493.7384033203125,
|
22981 |
+
"learning_rate": 4.985112330648354e-05,
|
22982 |
+
"loss": 79.7233,
|
22983 |
+
"step": 32820
|
22984 |
+
},
|
22985 |
+
{
|
22986 |
+
"epoch": 0.1326373542019336,
|
22987 |
+
"grad_norm": 790.5695190429688,
|
22988 |
+
"learning_rate": 4.985074268379338e-05,
|
22989 |
+
"loss": 91.9399,
|
22990 |
+
"step": 32830
|
22991 |
+
},
|
22992 |
+
{
|
22993 |
+
"epoch": 0.13267775546730123,
|
22994 |
+
"grad_norm": 1110.0252685546875,
|
22995 |
+
"learning_rate": 4.985036157662521e-05,
|
22996 |
+
"loss": 160.5353,
|
22997 |
+
"step": 32840
|
22998 |
+
},
|
22999 |
+
{
|
23000 |
+
"epoch": 0.13271815673266887,
|
23001 |
+
"grad_norm": 1280.8482666015625,
|
23002 |
+
"learning_rate": 4.9849979984986426e-05,
|
23003 |
+
"loss": 139.881,
|
23004 |
+
"step": 32850
|
23005 |
+
},
|
23006 |
+
{
|
23007 |
+
"epoch": 0.13275855799803649,
|
23008 |
+
"grad_norm": 973.560302734375,
|
23009 |
+
"learning_rate": 4.98495979088845e-05,
|
23010 |
+
"loss": 124.0516,
|
23011 |
+
"step": 32860
|
23012 |
+
},
|
23013 |
+
{
|
23014 |
+
"epoch": 0.13279895926340413,
|
23015 |
+
"grad_norm": 496.84893798828125,
|
23016 |
+
"learning_rate": 4.9849215348326875e-05,
|
23017 |
+
"loss": 90.3565,
|
23018 |
+
"step": 32870
|
23019 |
+
},
|
23020 |
+
{
|
23021 |
+
"epoch": 0.13283936052877177,
|
23022 |
+
"grad_norm": 1748.6497802734375,
|
23023 |
+
"learning_rate": 4.984883230332099e-05,
|
23024 |
+
"loss": 84.9742,
|
23025 |
+
"step": 32880
|
23026 |
+
},
|
23027 |
+
{
|
23028 |
+
"epoch": 0.1328797617941394,
|
23029 |
+
"grad_norm": 1350.8636474609375,
|
23030 |
+
"learning_rate": 4.984844877387433e-05,
|
23031 |
+
"loss": 115.242,
|
23032 |
+
"step": 32890
|
23033 |
+
},
|
23034 |
+
{
|
23035 |
+
"epoch": 0.13292016305950702,
|
23036 |
+
"grad_norm": 776.8285522460938,
|
23037 |
+
"learning_rate": 4.984806475999437e-05,
|
23038 |
+
"loss": 77.2372,
|
23039 |
+
"step": 32900
|
23040 |
+
},
|
23041 |
+
{
|
23042 |
+
"epoch": 0.13296056432487466,
|
23043 |
+
"grad_norm": 863.73583984375,
|
23044 |
+
"learning_rate": 4.98476802616886e-05,
|
23045 |
+
"loss": 74.2184,
|
23046 |
+
"step": 32910
|
23047 |
+
},
|
23048 |
+
{
|
23049 |
+
"epoch": 0.1330009655902423,
|
23050 |
+
"grad_norm": 793.2598876953125,
|
23051 |
+
"learning_rate": 4.9847295278964514e-05,
|
23052 |
+
"loss": 67.6055,
|
23053 |
+
"step": 32920
|
23054 |
+
},
|
23055 |
+
{
|
23056 |
+
"epoch": 0.1330413668556099,
|
23057 |
+
"grad_norm": 1486.9317626953125,
|
23058 |
+
"learning_rate": 4.9846909811829604e-05,
|
23059 |
+
"loss": 127.2083,
|
23060 |
+
"step": 32930
|
23061 |
+
},
|
23062 |
+
{
|
23063 |
+
"epoch": 0.13308176812097755,
|
23064 |
+
"grad_norm": 773.4334716796875,
|
23065 |
+
"learning_rate": 4.984652386029139e-05,
|
23066 |
+
"loss": 116.8202,
|
23067 |
+
"step": 32940
|
23068 |
+
},
|
23069 |
+
{
|
23070 |
+
"epoch": 0.1331221693863452,
|
23071 |
+
"grad_norm": 1226.421630859375,
|
23072 |
+
"learning_rate": 4.984613742435742e-05,
|
23073 |
+
"loss": 112.299,
|
23074 |
+
"step": 32950
|
23075 |
+
},
|
23076 |
+
{
|
23077 |
+
"epoch": 0.1331625706517128,
|
23078 |
+
"grad_norm": 445.5863342285156,
|
23079 |
+
"learning_rate": 4.9845750504035195e-05,
|
23080 |
+
"loss": 87.6496,
|
23081 |
+
"step": 32960
|
23082 |
+
},
|
23083 |
+
{
|
23084 |
+
"epoch": 0.13320297191708044,
|
23085 |
+
"grad_norm": 894.6140747070312,
|
23086 |
+
"learning_rate": 4.984536309933227e-05,
|
23087 |
+
"loss": 95.4575,
|
23088 |
+
"step": 32970
|
23089 |
+
},
|
23090 |
+
{
|
23091 |
+
"epoch": 0.13324337318244808,
|
23092 |
+
"grad_norm": 823.8974609375,
|
23093 |
+
"learning_rate": 4.9844975210256217e-05,
|
23094 |
+
"loss": 93.921,
|
23095 |
+
"step": 32980
|
23096 |
+
},
|
23097 |
+
{
|
23098 |
+
"epoch": 0.1332837744478157,
|
23099 |
+
"grad_norm": 755.107177734375,
|
23100 |
+
"learning_rate": 4.984458683681457e-05,
|
23101 |
+
"loss": 84.3962,
|
23102 |
+
"step": 32990
|
23103 |
+
},
|
23104 |
+
{
|
23105 |
+
"epoch": 0.13332417571318333,
|
23106 |
+
"grad_norm": 885.6873779296875,
|
23107 |
+
"learning_rate": 4.984419797901491e-05,
|
23108 |
+
"loss": 105.6254,
|
23109 |
+
"step": 33000
|
23110 |
+
},
|
23111 |
+
{
|
23112 |
+
"epoch": 0.13336457697855097,
|
23113 |
+
"grad_norm": 411.86053466796875,
|
23114 |
+
"learning_rate": 4.984380863686482e-05,
|
23115 |
+
"loss": 97.5172,
|
23116 |
+
"step": 33010
|
23117 |
+
},
|
23118 |
+
{
|
23119 |
+
"epoch": 0.1334049782439186,
|
23120 |
+
"grad_norm": 563.2962646484375,
|
23121 |
+
"learning_rate": 4.98434188103719e-05,
|
23122 |
+
"loss": 80.259,
|
23123 |
+
"step": 33020
|
23124 |
+
},
|
23125 |
+
{
|
23126 |
+
"epoch": 0.13344537950928623,
|
23127 |
+
"grad_norm": 1379.732666015625,
|
23128 |
+
"learning_rate": 4.984302849954373e-05,
|
23129 |
+
"loss": 73.214,
|
23130 |
+
"step": 33030
|
23131 |
+
},
|
23132 |
+
{
|
23133 |
+
"epoch": 0.13348578077465387,
|
23134 |
+
"grad_norm": 750.8110961914062,
|
23135 |
+
"learning_rate": 4.984263770438793e-05,
|
23136 |
+
"loss": 106.9937,
|
23137 |
+
"step": 33040
|
23138 |
+
},
|
23139 |
+
{
|
23140 |
+
"epoch": 0.1335261820400215,
|
23141 |
+
"grad_norm": 1013.02392578125,
|
23142 |
+
"learning_rate": 4.984224642491212e-05,
|
23143 |
+
"loss": 75.5326,
|
23144 |
+
"step": 33050
|
23145 |
+
},
|
23146 |
+
{
|
23147 |
+
"epoch": 0.13356658330538912,
|
23148 |
+
"grad_norm": 1022.1004028320312,
|
23149 |
+
"learning_rate": 4.9841854661123936e-05,
|
23150 |
+
"loss": 108.553,
|
23151 |
+
"step": 33060
|
23152 |
+
},
|
23153 |
+
{
|
23154 |
+
"epoch": 0.13360698457075676,
|
23155 |
+
"grad_norm": 809.585205078125,
|
23156 |
+
"learning_rate": 4.9841462413030995e-05,
|
23157 |
+
"loss": 84.0129,
|
23158 |
+
"step": 33070
|
23159 |
+
},
|
23160 |
+
{
|
23161 |
+
"epoch": 0.1336473858361244,
|
23162 |
+
"grad_norm": 842.181884765625,
|
23163 |
+
"learning_rate": 4.984106968064095e-05,
|
23164 |
+
"loss": 112.834,
|
23165 |
+
"step": 33080
|
23166 |
+
},
|
23167 |
+
{
|
23168 |
+
"epoch": 0.133687787101492,
|
23169 |
+
"grad_norm": 467.4346618652344,
|
23170 |
+
"learning_rate": 4.984067646396147e-05,
|
23171 |
+
"loss": 100.528,
|
23172 |
+
"step": 33090
|
23173 |
+
},
|
23174 |
+
{
|
23175 |
+
"epoch": 0.13372818836685965,
|
23176 |
+
"grad_norm": 714.903076171875,
|
23177 |
+
"learning_rate": 4.984028276300021e-05,
|
23178 |
+
"loss": 129.6565,
|
23179 |
+
"step": 33100
|
23180 |
+
},
|
23181 |
+
{
|
23182 |
+
"epoch": 0.1337685896322273,
|
23183 |
+
"grad_norm": 709.3721923828125,
|
23184 |
+
"learning_rate": 4.983988857776486e-05,
|
23185 |
+
"loss": 101.3018,
|
23186 |
+
"step": 33110
|
23187 |
+
},
|
23188 |
+
{
|
23189 |
+
"epoch": 0.1338089908975949,
|
23190 |
+
"grad_norm": 988.5148315429688,
|
23191 |
+
"learning_rate": 4.983949390826308e-05,
|
23192 |
+
"loss": 86.796,
|
23193 |
+
"step": 33120
|
23194 |
+
},
|
23195 |
+
{
|
23196 |
+
"epoch": 0.13384939216296254,
|
23197 |
+
"grad_norm": 1472.410400390625,
|
23198 |
+
"learning_rate": 4.983909875450258e-05,
|
23199 |
+
"loss": 94.508,
|
23200 |
+
"step": 33130
|
23201 |
+
},
|
23202 |
+
{
|
23203 |
+
"epoch": 0.13388979342833018,
|
23204 |
+
"grad_norm": 900.3340454101562,
|
23205 |
+
"learning_rate": 4.983870311649107e-05,
|
23206 |
+
"loss": 95.947,
|
23207 |
+
"step": 33140
|
23208 |
+
},
|
23209 |
+
{
|
23210 |
+
"epoch": 0.1339301946936978,
|
23211 |
+
"grad_norm": 1466.88134765625,
|
23212 |
+
"learning_rate": 4.983830699423625e-05,
|
23213 |
+
"loss": 74.9536,
|
23214 |
+
"step": 33150
|
23215 |
+
},
|
23216 |
+
{
|
23217 |
+
"epoch": 0.13397059595906544,
|
23218 |
+
"grad_norm": 576.2639770507812,
|
23219 |
+
"learning_rate": 4.9837910387745845e-05,
|
23220 |
+
"loss": 115.5023,
|
23221 |
+
"step": 33160
|
23222 |
+
},
|
23223 |
+
{
|
23224 |
+
"epoch": 0.13401099722443308,
|
23225 |
+
"grad_norm": 1318.8853759765625,
|
23226 |
+
"learning_rate": 4.9837513297027594e-05,
|
23227 |
+
"loss": 84.5853,
|
23228 |
+
"step": 33170
|
23229 |
+
},
|
23230 |
+
{
|
23231 |
+
"epoch": 0.1340513984898007,
|
23232 |
+
"grad_norm": 934.3829345703125,
|
23233 |
+
"learning_rate": 4.983711572208924e-05,
|
23234 |
+
"loss": 75.3381,
|
23235 |
+
"step": 33180
|
23236 |
+
},
|
23237 |
+
{
|
23238 |
+
"epoch": 0.13409179975516833,
|
23239 |
+
"grad_norm": 1154.966796875,
|
23240 |
+
"learning_rate": 4.983671766293851e-05,
|
23241 |
+
"loss": 83.0238,
|
23242 |
+
"step": 33190
|
23243 |
+
},
|
23244 |
+
{
|
23245 |
+
"epoch": 0.13413220102053597,
|
23246 |
+
"grad_norm": 1403.0806884765625,
|
23247 |
+
"learning_rate": 4.983631911958319e-05,
|
23248 |
+
"loss": 82.2895,
|
23249 |
+
"step": 33200
|
23250 |
+
},
|
23251 |
+
{
|
23252 |
+
"epoch": 0.1341726022859036,
|
23253 |
+
"grad_norm": 1025.5306396484375,
|
23254 |
+
"learning_rate": 4.983592009203105e-05,
|
23255 |
+
"loss": 64.9099,
|
23256 |
+
"step": 33210
|
23257 |
+
},
|
23258 |
+
{
|
23259 |
+
"epoch": 0.13421300355127122,
|
23260 |
+
"grad_norm": 570.6565551757812,
|
23261 |
+
"learning_rate": 4.9835520580289854e-05,
|
23262 |
+
"loss": 60.6472,
|
23263 |
+
"step": 33220
|
23264 |
+
},
|
23265 |
+
{
|
23266 |
+
"epoch": 0.13425340481663886,
|
23267 |
+
"grad_norm": 241.88272094726562,
|
23268 |
+
"learning_rate": 4.98351205843674e-05,
|
23269 |
+
"loss": 101.3504,
|
23270 |
+
"step": 33230
|
23271 |
+
},
|
23272 |
+
{
|
23273 |
+
"epoch": 0.1342938060820065,
|
23274 |
+
"grad_norm": 786.84912109375,
|
23275 |
+
"learning_rate": 4.9834720104271484e-05,
|
23276 |
+
"loss": 80.2996,
|
23277 |
+
"step": 33240
|
23278 |
+
},
|
23279 |
+
{
|
23280 |
+
"epoch": 0.1343342073473741,
|
23281 |
+
"grad_norm": 1382.0052490234375,
|
23282 |
+
"learning_rate": 4.983431914000991e-05,
|
23283 |
+
"loss": 90.2512,
|
23284 |
+
"step": 33250
|
23285 |
+
},
|
23286 |
+
{
|
23287 |
+
"epoch": 0.13437460861274175,
|
23288 |
+
"grad_norm": 1485.1060791015625,
|
23289 |
+
"learning_rate": 4.9833917691590506e-05,
|
23290 |
+
"loss": 80.0044,
|
23291 |
+
"step": 33260
|
23292 |
+
},
|
23293 |
+
{
|
23294 |
+
"epoch": 0.1344150098781094,
|
23295 |
+
"grad_norm": 698.7577514648438,
|
23296 |
+
"learning_rate": 4.9833515759021085e-05,
|
23297 |
+
"loss": 102.9575,
|
23298 |
+
"step": 33270
|
23299 |
+
},
|
23300 |
+
{
|
23301 |
+
"epoch": 0.134455411143477,
|
23302 |
+
"grad_norm": 693.4671020507812,
|
23303 |
+
"learning_rate": 4.98331133423095e-05,
|
23304 |
+
"loss": 94.7776,
|
23305 |
+
"step": 33280
|
23306 |
+
},
|
23307 |
+
{
|
23308 |
+
"epoch": 0.13449581240884464,
|
23309 |
+
"grad_norm": 664.5126342773438,
|
23310 |
+
"learning_rate": 4.983271044146357e-05,
|
23311 |
+
"loss": 85.3929,
|
23312 |
+
"step": 33290
|
23313 |
+
},
|
23314 |
+
{
|
23315 |
+
"epoch": 0.13453621367421228,
|
23316 |
+
"grad_norm": 841.61962890625,
|
23317 |
+
"learning_rate": 4.983230705649118e-05,
|
23318 |
+
"loss": 92.7831,
|
23319 |
+
"step": 33300
|
23320 |
+
},
|
23321 |
+
{
|
23322 |
+
"epoch": 0.1345766149395799,
|
23323 |
+
"grad_norm": 1524.328125,
|
23324 |
+
"learning_rate": 4.9831903187400166e-05,
|
23325 |
+
"loss": 85.1477,
|
23326 |
+
"step": 33310
|
23327 |
+
},
|
23328 |
+
{
|
23329 |
+
"epoch": 0.13461701620494754,
|
23330 |
+
"grad_norm": 551.5435180664062,
|
23331 |
+
"learning_rate": 4.983149883419842e-05,
|
23332 |
+
"loss": 128.3945,
|
23333 |
+
"step": 33320
|
23334 |
+
},
|
23335 |
+
{
|
23336 |
+
"epoch": 0.13465741747031518,
|
23337 |
+
"grad_norm": 387.754638671875,
|
23338 |
+
"learning_rate": 4.983109399689382e-05,
|
23339 |
+
"loss": 75.982,
|
23340 |
+
"step": 33330
|
23341 |
+
},
|
23342 |
+
{
|
23343 |
+
"epoch": 0.1346978187356828,
|
23344 |
+
"grad_norm": 2052.11669921875,
|
23345 |
+
"learning_rate": 4.9830688675494265e-05,
|
23346 |
+
"loss": 166.9485,
|
23347 |
+
"step": 33340
|
23348 |
+
},
|
23349 |
+
{
|
23350 |
+
"epoch": 0.13473822000105043,
|
23351 |
+
"grad_norm": 656.3280639648438,
|
23352 |
+
"learning_rate": 4.9830282870007646e-05,
|
23353 |
+
"loss": 98.0935,
|
23354 |
+
"step": 33350
|
23355 |
+
},
|
23356 |
+
{
|
23357 |
+
"epoch": 0.13477862126641807,
|
23358 |
+
"grad_norm": 960.6358032226562,
|
23359 |
+
"learning_rate": 4.982987658044188e-05,
|
23360 |
+
"loss": 88.8502,
|
23361 |
+
"step": 33360
|
23362 |
+
},
|
23363 |
+
{
|
23364 |
+
"epoch": 0.1348190225317857,
|
23365 |
+
"grad_norm": 303.2107238769531,
|
23366 |
+
"learning_rate": 4.982946980680488e-05,
|
23367 |
+
"loss": 82.7864,
|
23368 |
+
"step": 33370
|
23369 |
+
},
|
23370 |
+
{
|
23371 |
+
"epoch": 0.13485942379715332,
|
23372 |
+
"grad_norm": 786.0392456054688,
|
23373 |
+
"learning_rate": 4.982906254910459e-05,
|
23374 |
+
"loss": 75.8299,
|
23375 |
+
"step": 33380
|
23376 |
+
},
|
23377 |
+
{
|
23378 |
+
"epoch": 0.13489982506252096,
|
23379 |
+
"grad_norm": 554.2486572265625,
|
23380 |
+
"learning_rate": 4.982865480734894e-05,
|
23381 |
+
"loss": 71.6516,
|
23382 |
+
"step": 33390
|
23383 |
+
},
|
23384 |
+
{
|
23385 |
+
"epoch": 0.1349402263278886,
|
23386 |
+
"grad_norm": 901.5972900390625,
|
23387 |
+
"learning_rate": 4.982824658154589e-05,
|
23388 |
+
"loss": 106.4935,
|
23389 |
+
"step": 33400
|
23390 |
+
},
|
23391 |
+
{
|
23392 |
+
"epoch": 0.1349806275932562,
|
23393 |
+
"grad_norm": 363.12115478515625,
|
23394 |
+
"learning_rate": 4.982783787170338e-05,
|
23395 |
+
"loss": 76.4278,
|
23396 |
+
"step": 33410
|
23397 |
+
},
|
23398 |
+
{
|
23399 |
+
"epoch": 0.13502102885862385,
|
23400 |
+
"grad_norm": 3618.558349609375,
|
23401 |
+
"learning_rate": 4.982742867782939e-05,
|
23402 |
+
"loss": 130.8146,
|
23403 |
+
"step": 33420
|
23404 |
+
},
|
23405 |
+
{
|
23406 |
+
"epoch": 0.1350614301239915,
|
23407 |
+
"grad_norm": 810.9340209960938,
|
23408 |
+
"learning_rate": 4.982701899993189e-05,
|
23409 |
+
"loss": 109.7431,
|
23410 |
+
"step": 33430
|
23411 |
+
},
|
23412 |
+
{
|
23413 |
+
"epoch": 0.1351018313893591,
|
23414 |
+
"grad_norm": 1115.0909423828125,
|
23415 |
+
"learning_rate": 4.982660883801889e-05,
|
23416 |
+
"loss": 97.727,
|
23417 |
+
"step": 33440
|
23418 |
+
},
|
23419 |
+
{
|
23420 |
+
"epoch": 0.13514223265472675,
|
23421 |
+
"grad_norm": 726.3258056640625,
|
23422 |
+
"learning_rate": 4.982619819209836e-05,
|
23423 |
+
"loss": 129.2327,
|
23424 |
+
"step": 33450
|
23425 |
+
},
|
23426 |
+
{
|
23427 |
+
"epoch": 0.13518263392009439,
|
23428 |
+
"grad_norm": 0.0,
|
23429 |
+
"learning_rate": 4.9825787062178315e-05,
|
23430 |
+
"loss": 62.2287,
|
23431 |
+
"step": 33460
|
23432 |
+
},
|
23433 |
+
{
|
23434 |
+
"epoch": 0.135223035185462,
|
23435 |
+
"grad_norm": 2895.0556640625,
|
23436 |
+
"learning_rate": 4.982537544826677e-05,
|
23437 |
+
"loss": 99.4745,
|
23438 |
+
"step": 33470
|
23439 |
+
},
|
23440 |
+
{
|
23441 |
+
"epoch": 0.13526343645082964,
|
23442 |
+
"grad_norm": 496.2646789550781,
|
23443 |
+
"learning_rate": 4.982496335037175e-05,
|
23444 |
+
"loss": 76.2467,
|
23445 |
+
"step": 33480
|
23446 |
+
},
|
23447 |
+
{
|
23448 |
+
"epoch": 0.13530383771619728,
|
23449 |
+
"grad_norm": 1723.678955078125,
|
23450 |
+
"learning_rate": 4.982455076850129e-05,
|
23451 |
+
"loss": 87.5003,
|
23452 |
+
"step": 33490
|
23453 |
+
},
|
23454 |
+
{
|
23455 |
+
"epoch": 0.1353442389815649,
|
23456 |
+
"grad_norm": 622.0546875,
|
23457 |
+
"learning_rate": 4.982413770266342e-05,
|
23458 |
+
"loss": 60.5188,
|
23459 |
+
"step": 33500
|
23460 |
+
},
|
23461 |
+
{
|
23462 |
+
"epoch": 0.13538464024693253,
|
23463 |
+
"grad_norm": 579.060302734375,
|
23464 |
+
"learning_rate": 4.9823724152866226e-05,
|
23465 |
+
"loss": 121.2273,
|
23466 |
+
"step": 33510
|
23467 |
+
},
|
23468 |
+
{
|
23469 |
+
"epoch": 0.13542504151230017,
|
23470 |
+
"grad_norm": 638.8704833984375,
|
23471 |
+
"learning_rate": 4.982331011911774e-05,
|
23472 |
+
"loss": 139.4159,
|
23473 |
+
"step": 33520
|
23474 |
+
},
|
23475 |
+
{
|
23476 |
+
"epoch": 0.13546544277766778,
|
23477 |
+
"grad_norm": 684.1176147460938,
|
23478 |
+
"learning_rate": 4.9822895601426034e-05,
|
23479 |
+
"loss": 107.1239,
|
23480 |
+
"step": 33530
|
23481 |
+
},
|
23482 |
+
{
|
23483 |
+
"epoch": 0.13550584404303542,
|
23484 |
+
"grad_norm": 626.8060302734375,
|
23485 |
+
"learning_rate": 4.982248059979921e-05,
|
23486 |
+
"loss": 86.9168,
|
23487 |
+
"step": 33540
|
23488 |
+
},
|
23489 |
+
{
|
23490 |
+
"epoch": 0.13554624530840306,
|
23491 |
+
"grad_norm": 539.0343017578125,
|
23492 |
+
"learning_rate": 4.982206511424534e-05,
|
23493 |
+
"loss": 69.0915,
|
23494 |
+
"step": 33550
|
23495 |
+
},
|
23496 |
+
{
|
23497 |
+
"epoch": 0.1355866465737707,
|
23498 |
+
"grad_norm": 2398.862548828125,
|
23499 |
+
"learning_rate": 4.9821649144772545e-05,
|
23500 |
+
"loss": 70.2374,
|
23501 |
+
"step": 33560
|
23502 |
+
},
|
23503 |
+
{
|
23504 |
+
"epoch": 0.13562704783913831,
|
23505 |
+
"grad_norm": 586.1361694335938,
|
23506 |
+
"learning_rate": 4.9821232691388906e-05,
|
23507 |
+
"loss": 70.2269,
|
23508 |
+
"step": 33570
|
23509 |
+
},
|
23510 |
+
{
|
23511 |
+
"epoch": 0.13566744910450595,
|
23512 |
+
"grad_norm": 850.7914428710938,
|
23513 |
+
"learning_rate": 4.982081575410256e-05,
|
23514 |
+
"loss": 113.3011,
|
23515 |
+
"step": 33580
|
23516 |
+
},
|
23517 |
+
{
|
23518 |
+
"epoch": 0.1357078503698736,
|
23519 |
+
"grad_norm": 642.4421997070312,
|
23520 |
+
"learning_rate": 4.9820398332921634e-05,
|
23521 |
+
"loss": 95.5486,
|
23522 |
+
"step": 33590
|
23523 |
+
},
|
23524 |
+
{
|
23525 |
+
"epoch": 0.1357482516352412,
|
23526 |
+
"grad_norm": 706.2947998046875,
|
23527 |
+
"learning_rate": 4.981998042785427e-05,
|
23528 |
+
"loss": 97.3085,
|
23529 |
+
"step": 33600
|
23530 |
+
},
|
23531 |
+
{
|
23532 |
+
"epoch": 0.13578865290060885,
|
23533 |
+
"grad_norm": 1023.1542358398438,
|
23534 |
+
"learning_rate": 4.98195620389086e-05,
|
23535 |
+
"loss": 117.8821,
|
23536 |
+
"step": 33610
|
23537 |
+
},
|
23538 |
+
{
|
23539 |
+
"epoch": 0.1358290541659765,
|
23540 |
+
"grad_norm": 480.221923828125,
|
23541 |
+
"learning_rate": 4.9819143166092796e-05,
|
23542 |
+
"loss": 93.8683,
|
23543 |
+
"step": 33620
|
23544 |
+
},
|
23545 |
+
{
|
23546 |
+
"epoch": 0.1358694554313441,
|
23547 |
+
"grad_norm": 520.1914672851562,
|
23548 |
+
"learning_rate": 4.9818723809415016e-05,
|
23549 |
+
"loss": 83.5975,
|
23550 |
+
"step": 33630
|
23551 |
+
},
|
23552 |
+
{
|
23553 |
+
"epoch": 0.13590985669671174,
|
23554 |
+
"grad_norm": 2167.59619140625,
|
23555 |
+
"learning_rate": 4.981830396888344e-05,
|
23556 |
+
"loss": 103.9532,
|
23557 |
+
"step": 33640
|
23558 |
+
},
|
23559 |
+
{
|
23560 |
+
"epoch": 0.13595025796207938,
|
23561 |
+
"grad_norm": 801.8353271484375,
|
23562 |
+
"learning_rate": 4.981788364450625e-05,
|
23563 |
+
"loss": 146.1724,
|
23564 |
+
"step": 33650
|
23565 |
+
},
|
23566 |
+
{
|
23567 |
+
"epoch": 0.135990659227447,
|
23568 |
+
"grad_norm": 862.556884765625,
|
23569 |
+
"learning_rate": 4.981746283629164e-05,
|
23570 |
+
"loss": 92.8243,
|
23571 |
+
"step": 33660
|
23572 |
+
},
|
23573 |
+
{
|
23574 |
+
"epoch": 0.13603106049281463,
|
23575 |
+
"grad_norm": 1226.8758544921875,
|
23576 |
+
"learning_rate": 4.981704154424781e-05,
|
23577 |
+
"loss": 107.6284,
|
23578 |
+
"step": 33670
|
23579 |
+
},
|
23580 |
+
{
|
23581 |
+
"epoch": 0.13607146175818227,
|
23582 |
+
"grad_norm": 1550.146484375,
|
23583 |
+
"learning_rate": 4.981661976838299e-05,
|
23584 |
+
"loss": 109.7946,
|
23585 |
+
"step": 33680
|
23586 |
+
},
|
23587 |
+
{
|
23588 |
+
"epoch": 0.13611186302354988,
|
23589 |
+
"grad_norm": 619.8020629882812,
|
23590 |
+
"learning_rate": 4.981619750870537e-05,
|
23591 |
+
"loss": 108.0941,
|
23592 |
+
"step": 33690
|
23593 |
+
},
|
23594 |
+
{
|
23595 |
+
"epoch": 0.13615226428891752,
|
23596 |
+
"grad_norm": 551.2567749023438,
|
23597 |
+
"learning_rate": 4.9815774765223226e-05,
|
23598 |
+
"loss": 75.5076,
|
23599 |
+
"step": 33700
|
23600 |
+
},
|
23601 |
+
{
|
23602 |
+
"epoch": 0.13619266555428516,
|
23603 |
+
"grad_norm": 1045.827880859375,
|
23604 |
+
"learning_rate": 4.9815351537944774e-05,
|
23605 |
+
"loss": 64.202,
|
23606 |
+
"step": 33710
|
23607 |
+
},
|
23608 |
+
{
|
23609 |
+
"epoch": 0.1362330668196528,
|
23610 |
+
"grad_norm": 435.85296630859375,
|
23611 |
+
"learning_rate": 4.9814927826878256e-05,
|
23612 |
+
"loss": 123.6437,
|
23613 |
+
"step": 33720
|
23614 |
+
},
|
23615 |
+
{
|
23616 |
+
"epoch": 0.13627346808502042,
|
23617 |
+
"grad_norm": 1007.7003784179688,
|
23618 |
+
"learning_rate": 4.9814503632031954e-05,
|
23619 |
+
"loss": 133.8719,
|
23620 |
+
"step": 33730
|
23621 |
+
},
|
23622 |
+
{
|
23623 |
+
"epoch": 0.13631386935038806,
|
23624 |
+
"grad_norm": 1092.2225341796875,
|
23625 |
+
"learning_rate": 4.981407895341412e-05,
|
23626 |
+
"loss": 91.8184,
|
23627 |
+
"step": 33740
|
23628 |
+
},
|
23629 |
+
{
|
23630 |
+
"epoch": 0.1363542706157557,
|
23631 |
+
"grad_norm": 656.14990234375,
|
23632 |
+
"learning_rate": 4.9813653791033057e-05,
|
23633 |
+
"loss": 68.1897,
|
23634 |
+
"step": 33750
|
23635 |
+
},
|
23636 |
+
{
|
23637 |
+
"epoch": 0.1363946718811233,
|
23638 |
+
"grad_norm": 1275.6177978515625,
|
23639 |
+
"learning_rate": 4.981322814489703e-05,
|
23640 |
+
"loss": 91.9896,
|
23641 |
+
"step": 33760
|
23642 |
+
},
|
23643 |
+
{
|
23644 |
+
"epoch": 0.13643507314649095,
|
23645 |
+
"grad_norm": 900.5950927734375,
|
23646 |
+
"learning_rate": 4.9812802015014334e-05,
|
23647 |
+
"loss": 90.7882,
|
23648 |
+
"step": 33770
|
23649 |
+
},
|
23650 |
+
{
|
23651 |
+
"epoch": 0.1364754744118586,
|
23652 |
+
"grad_norm": 0.0,
|
23653 |
+
"learning_rate": 4.981237540139331e-05,
|
23654 |
+
"loss": 62.0134,
|
23655 |
+
"step": 33780
|
23656 |
+
},
|
23657 |
+
{
|
23658 |
+
"epoch": 0.1365158756772262,
|
23659 |
+
"grad_norm": 1441.5318603515625,
|
23660 |
+
"learning_rate": 4.9811948304042234e-05,
|
23661 |
+
"loss": 75.5356,
|
23662 |
+
"step": 33790
|
23663 |
+
},
|
23664 |
+
{
|
23665 |
+
"epoch": 0.13655627694259384,
|
23666 |
+
"grad_norm": 783.706787109375,
|
23667 |
+
"learning_rate": 4.9811520722969465e-05,
|
23668 |
+
"loss": 109.1127,
|
23669 |
+
"step": 33800
|
23670 |
+
},
|
23671 |
+
{
|
23672 |
+
"epoch": 0.13659667820796148,
|
23673 |
+
"grad_norm": 1493.0281982421875,
|
23674 |
+
"learning_rate": 4.981109265818332e-05,
|
23675 |
+
"loss": 150.1899,
|
23676 |
+
"step": 33810
|
23677 |
+
},
|
23678 |
+
{
|
23679 |
+
"epoch": 0.1366370794733291,
|
23680 |
+
"grad_norm": 728.7564086914062,
|
23681 |
+
"learning_rate": 4.981066410969215e-05,
|
23682 |
+
"loss": 64.3308,
|
23683 |
+
"step": 33820
|
23684 |
+
},
|
23685 |
+
{
|
23686 |
+
"epoch": 0.13667748073869673,
|
23687 |
+
"grad_norm": 1312.0137939453125,
|
23688 |
+
"learning_rate": 4.981023507750431e-05,
|
23689 |
+
"loss": 119.9241,
|
23690 |
+
"step": 33830
|
23691 |
+
},
|
23692 |
+
{
|
23693 |
+
"epoch": 0.13671788200406437,
|
23694 |
+
"grad_norm": 805.4586181640625,
|
23695 |
+
"learning_rate": 4.980980556162816e-05,
|
23696 |
+
"loss": 107.8721,
|
23697 |
+
"step": 33840
|
23698 |
+
},
|
23699 |
+
{
|
23700 |
+
"epoch": 0.13675828326943198,
|
23701 |
+
"grad_norm": 609.840087890625,
|
23702 |
+
"learning_rate": 4.980937556207208e-05,
|
23703 |
+
"loss": 89.4216,
|
23704 |
+
"step": 33850
|
23705 |
+
},
|
23706 |
+
{
|
23707 |
+
"epoch": 0.13679868453479962,
|
23708 |
+
"grad_norm": 973.7212524414062,
|
23709 |
+
"learning_rate": 4.9808945078844456e-05,
|
23710 |
+
"loss": 81.5434,
|
23711 |
+
"step": 33860
|
23712 |
+
},
|
23713 |
+
{
|
23714 |
+
"epoch": 0.13683908580016726,
|
23715 |
+
"grad_norm": 982.0657348632812,
|
23716 |
+
"learning_rate": 4.9808514111953674e-05,
|
23717 |
+
"loss": 67.6982,
|
23718 |
+
"step": 33870
|
23719 |
+
},
|
23720 |
+
{
|
23721 |
+
"epoch": 0.1368794870655349,
|
23722 |
+
"grad_norm": 524.6974487304688,
|
23723 |
+
"learning_rate": 4.980808266140813e-05,
|
23724 |
+
"loss": 150.7908,
|
23725 |
+
"step": 33880
|
23726 |
+
},
|
23727 |
+
{
|
23728 |
+
"epoch": 0.13691988833090252,
|
23729 |
+
"grad_norm": 600.6156005859375,
|
23730 |
+
"learning_rate": 4.980765072721625e-05,
|
23731 |
+
"loss": 107.8641,
|
23732 |
+
"step": 33890
|
23733 |
+
},
|
23734 |
+
{
|
23735 |
+
"epoch": 0.13696028959627016,
|
23736 |
+
"grad_norm": 464.95550537109375,
|
23737 |
+
"learning_rate": 4.9807218309386444e-05,
|
23738 |
+
"loss": 81.4719,
|
23739 |
+
"step": 33900
|
23740 |
+
},
|
23741 |
+
{
|
23742 |
+
"epoch": 0.1370006908616378,
|
23743 |
+
"grad_norm": 810.7647094726562,
|
23744 |
+
"learning_rate": 4.980678540792715e-05,
|
23745 |
+
"loss": 91.2513,
|
23746 |
+
"step": 33910
|
23747 |
+
},
|
23748 |
+
{
|
23749 |
+
"epoch": 0.1370410921270054,
|
23750 |
+
"grad_norm": 904.2017211914062,
|
23751 |
+
"learning_rate": 4.980635202284679e-05,
|
23752 |
+
"loss": 128.9432,
|
23753 |
+
"step": 33920
|
23754 |
+
},
|
23755 |
+
{
|
23756 |
+
"epoch": 0.13708149339237305,
|
23757 |
+
"grad_norm": 751.0636596679688,
|
23758 |
+
"learning_rate": 4.980591815415384e-05,
|
23759 |
+
"loss": 67.3943,
|
23760 |
+
"step": 33930
|
23761 |
+
},
|
23762 |
+
{
|
23763 |
+
"epoch": 0.1371218946577407,
|
23764 |
+
"grad_norm": 999.1458740234375,
|
23765 |
+
"learning_rate": 4.980548380185674e-05,
|
23766 |
+
"loss": 122.1365,
|
23767 |
+
"step": 33940
|
23768 |
+
},
|
23769 |
+
{
|
23770 |
+
"epoch": 0.1371622959231083,
|
23771 |
+
"grad_norm": 1589.1033935546875,
|
23772 |
+
"learning_rate": 4.980504896596396e-05,
|
23773 |
+
"loss": 77.5396,
|
23774 |
+
"step": 33950
|
23775 |
+
},
|
23776 |
+
{
|
23777 |
+
"epoch": 0.13720269718847594,
|
23778 |
+
"grad_norm": 1546.7073974609375,
|
23779 |
+
"learning_rate": 4.980461364648398e-05,
|
23780 |
+
"loss": 117.3774,
|
23781 |
+
"step": 33960
|
23782 |
+
},
|
23783 |
+
{
|
23784 |
+
"epoch": 0.13724309845384358,
|
23785 |
+
"grad_norm": 920.711669921875,
|
23786 |
+
"learning_rate": 4.9804177843425295e-05,
|
23787 |
+
"loss": 112.9565,
|
23788 |
+
"step": 33970
|
23789 |
+
},
|
23790 |
+
{
|
23791 |
+
"epoch": 0.1372834997192112,
|
23792 |
+
"grad_norm": 1206.650146484375,
|
23793 |
+
"learning_rate": 4.980374155679639e-05,
|
23794 |
+
"loss": 111.5966,
|
23795 |
+
"step": 33980
|
23796 |
+
},
|
23797 |
+
{
|
23798 |
+
"epoch": 0.13732390098457883,
|
23799 |
+
"grad_norm": 1469.6834716796875,
|
23800 |
+
"learning_rate": 4.980330478660576e-05,
|
23801 |
+
"loss": 107.332,
|
23802 |
+
"step": 33990
|
23803 |
+
},
|
23804 |
+
{
|
23805 |
+
"epoch": 0.13736430224994647,
|
23806 |
+
"grad_norm": 1957.3115234375,
|
23807 |
+
"learning_rate": 4.980286753286195e-05,
|
23808 |
+
"loss": 128.1132,
|
23809 |
+
"step": 34000
|
23810 |
}
|
23811 |
],
|
23812 |
"logging_steps": 10,
|