Upload folder using huggingface_hub
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1402 -2
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1871270621
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f55fb89421ce83ff461080187492d84798fbcd76e79543c89089fd9d51cb34c5
|
3 |
size 1871270621
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 950390010
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06b87bf1f744c71408a95b3614f57bf77ef130c54d78fd1a5a1342034e74ce64
|
3 |
size 950390010
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2363b22c92eeda8138f40ea2564836453f9aa4624feb2c1d81714c1a7fa53ac9
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0088ae0ce76fd10e325045aebd9a3af74d0eba5e0d4a404ec097dbca5014f9e
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -21007,6 +21007,1406 @@
|
|
21007 |
"learning_rate": 4.993910125649561e-05,
|
21008 |
"loss": 147.1198,
|
21009 |
"step": 30000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21010 |
}
|
21011 |
],
|
21012 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.1292840491764202,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 32000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
21007 |
"learning_rate": 4.993910125649561e-05,
|
21008 |
"loss": 147.1198,
|
21009 |
"step": 30000
|
21010 |
+
},
|
21011 |
+
{
|
21012 |
+
"epoch": 0.12124419736826157,
|
21013 |
+
"grad_norm": 364.9338684082031,
|
21014 |
+
"learning_rate": 4.993885751736781e-05,
|
21015 |
+
"loss": 84.0477,
|
21016 |
+
"step": 30010
|
21017 |
+
},
|
21018 |
+
{
|
21019 |
+
"epoch": 0.12128459863362921,
|
21020 |
+
"grad_norm": 719.6060180664062,
|
21021 |
+
"learning_rate": 4.993861329204414e-05,
|
21022 |
+
"loss": 115.0903,
|
21023 |
+
"step": 30020
|
21024 |
+
},
|
21025 |
+
{
|
21026 |
+
"epoch": 0.12132499989899684,
|
21027 |
+
"grad_norm": 1294.3851318359375,
|
21028 |
+
"learning_rate": 4.993836858052937e-05,
|
21029 |
+
"loss": 151.7286,
|
21030 |
+
"step": 30030
|
21031 |
+
},
|
21032 |
+
{
|
21033 |
+
"epoch": 0.12136540116436446,
|
21034 |
+
"grad_norm": 711.005126953125,
|
21035 |
+
"learning_rate": 4.993812338282826e-05,
|
21036 |
+
"loss": 86.3682,
|
21037 |
+
"step": 30040
|
21038 |
+
},
|
21039 |
+
{
|
21040 |
+
"epoch": 0.1214058024297321,
|
21041 |
+
"grad_norm": 723.4884643554688,
|
21042 |
+
"learning_rate": 4.993787769894559e-05,
|
21043 |
+
"loss": 120.8179,
|
21044 |
+
"step": 30050
|
21045 |
+
},
|
21046 |
+
{
|
21047 |
+
"epoch": 0.12144620369509973,
|
21048 |
+
"grad_norm": 1060.3338623046875,
|
21049 |
+
"learning_rate": 4.993763152888617e-05,
|
21050 |
+
"loss": 134.1443,
|
21051 |
+
"step": 30060
|
21052 |
+
},
|
21053 |
+
{
|
21054 |
+
"epoch": 0.12148660496046736,
|
21055 |
+
"grad_norm": 829.1416625976562,
|
21056 |
+
"learning_rate": 4.993738487265478e-05,
|
21057 |
+
"loss": 119.1408,
|
21058 |
+
"step": 30070
|
21059 |
+
},
|
21060 |
+
{
|
21061 |
+
"epoch": 0.121527006225835,
|
21062 |
+
"grad_norm": 267.49755859375,
|
21063 |
+
"learning_rate": 4.993713773025623e-05,
|
21064 |
+
"loss": 112.0577,
|
21065 |
+
"step": 30080
|
21066 |
+
},
|
21067 |
+
{
|
21068 |
+
"epoch": 0.12156740749120262,
|
21069 |
+
"grad_norm": 2977.7763671875,
|
21070 |
+
"learning_rate": 4.993689010169534e-05,
|
21071 |
+
"loss": 115.676,
|
21072 |
+
"step": 30090
|
21073 |
+
},
|
21074 |
+
{
|
21075 |
+
"epoch": 0.12160780875657026,
|
21076 |
+
"grad_norm": 927.7879638671875,
|
21077 |
+
"learning_rate": 4.993664198697694e-05,
|
21078 |
+
"loss": 80.4086,
|
21079 |
+
"step": 30100
|
21080 |
+
},
|
21081 |
+
{
|
21082 |
+
"epoch": 0.12164821002193789,
|
21083 |
+
"grad_norm": 1482.4476318359375,
|
21084 |
+
"learning_rate": 4.993639338610587e-05,
|
21085 |
+
"loss": 90.1057,
|
21086 |
+
"step": 30110
|
21087 |
+
},
|
21088 |
+
{
|
21089 |
+
"epoch": 0.12168861128730551,
|
21090 |
+
"grad_norm": 1078.0257568359375,
|
21091 |
+
"learning_rate": 4.993614429908697e-05,
|
21092 |
+
"loss": 112.5462,
|
21093 |
+
"step": 30120
|
21094 |
+
},
|
21095 |
+
{
|
21096 |
+
"epoch": 0.12172901255267315,
|
21097 |
+
"grad_norm": 1431.45947265625,
|
21098 |
+
"learning_rate": 4.99358947259251e-05,
|
21099 |
+
"loss": 83.491,
|
21100 |
+
"step": 30130
|
21101 |
+
},
|
21102 |
+
{
|
21103 |
+
"epoch": 0.12176941381804078,
|
21104 |
+
"grad_norm": 792.9324340820312,
|
21105 |
+
"learning_rate": 4.9935644666625125e-05,
|
21106 |
+
"loss": 82.8931,
|
21107 |
+
"step": 30140
|
21108 |
+
},
|
21109 |
+
{
|
21110 |
+
"epoch": 0.12180981508340841,
|
21111 |
+
"grad_norm": 1509.551025390625,
|
21112 |
+
"learning_rate": 4.9935394121191915e-05,
|
21113 |
+
"loss": 103.0119,
|
21114 |
+
"step": 30150
|
21115 |
+
},
|
21116 |
+
{
|
21117 |
+
"epoch": 0.12185021634877605,
|
21118 |
+
"grad_norm": 1039.8441162109375,
|
21119 |
+
"learning_rate": 4.993514308963036e-05,
|
21120 |
+
"loss": 119.8,
|
21121 |
+
"step": 30160
|
21122 |
+
},
|
21123 |
+
{
|
21124 |
+
"epoch": 0.12189061761414367,
|
21125 |
+
"grad_norm": 399.6064758300781,
|
21126 |
+
"learning_rate": 4.993489157194536e-05,
|
21127 |
+
"loss": 115.0279,
|
21128 |
+
"step": 30170
|
21129 |
+
},
|
21130 |
+
{
|
21131 |
+
"epoch": 0.12193101887951131,
|
21132 |
+
"grad_norm": 794.4744873046875,
|
21133 |
+
"learning_rate": 4.993463956814181e-05,
|
21134 |
+
"loss": 101.3931,
|
21135 |
+
"step": 30180
|
21136 |
+
},
|
21137 |
+
{
|
21138 |
+
"epoch": 0.12197142014487894,
|
21139 |
+
"grad_norm": 2071.046142578125,
|
21140 |
+
"learning_rate": 4.993438707822462e-05,
|
21141 |
+
"loss": 114.3679,
|
21142 |
+
"step": 30190
|
21143 |
+
},
|
21144 |
+
{
|
21145 |
+
"epoch": 0.12201182141024657,
|
21146 |
+
"grad_norm": 996.6055297851562,
|
21147 |
+
"learning_rate": 4.993413410219871e-05,
|
21148 |
+
"loss": 84.8786,
|
21149 |
+
"step": 30200
|
21150 |
+
},
|
21151 |
+
{
|
21152 |
+
"epoch": 0.1220522226756142,
|
21153 |
+
"grad_norm": 1139.685302734375,
|
21154 |
+
"learning_rate": 4.993388064006903e-05,
|
21155 |
+
"loss": 78.988,
|
21156 |
+
"step": 30210
|
21157 |
+
},
|
21158 |
+
{
|
21159 |
+
"epoch": 0.12209262394098183,
|
21160 |
+
"grad_norm": 622.7205200195312,
|
21161 |
+
"learning_rate": 4.993362669184051e-05,
|
21162 |
+
"loss": 88.1808,
|
21163 |
+
"step": 30220
|
21164 |
+
},
|
21165 |
+
{
|
21166 |
+
"epoch": 0.12213302520634946,
|
21167 |
+
"grad_norm": 7026.68359375,
|
21168 |
+
"learning_rate": 4.9933372257518096e-05,
|
21169 |
+
"loss": 126.5632,
|
21170 |
+
"step": 30230
|
21171 |
+
},
|
21172 |
+
{
|
21173 |
+
"epoch": 0.1221734264717171,
|
21174 |
+
"grad_norm": 945.2219848632812,
|
21175 |
+
"learning_rate": 4.993311733710676e-05,
|
21176 |
+
"loss": 78.7985,
|
21177 |
+
"step": 30240
|
21178 |
+
},
|
21179 |
+
{
|
21180 |
+
"epoch": 0.12221382773708472,
|
21181 |
+
"grad_norm": 826.4041137695312,
|
21182 |
+
"learning_rate": 4.9932861930611454e-05,
|
21183 |
+
"loss": 72.6881,
|
21184 |
+
"step": 30250
|
21185 |
+
},
|
21186 |
+
{
|
21187 |
+
"epoch": 0.12225422900245236,
|
21188 |
+
"grad_norm": 714.5296630859375,
|
21189 |
+
"learning_rate": 4.993260603803718e-05,
|
21190 |
+
"loss": 94.0831,
|
21191 |
+
"step": 30260
|
21192 |
+
},
|
21193 |
+
{
|
21194 |
+
"epoch": 0.12229463026781999,
|
21195 |
+
"grad_norm": 692.105224609375,
|
21196 |
+
"learning_rate": 4.99323496593889e-05,
|
21197 |
+
"loss": 60.1789,
|
21198 |
+
"step": 30270
|
21199 |
+
},
|
21200 |
+
{
|
21201 |
+
"epoch": 0.12233503153318762,
|
21202 |
+
"grad_norm": 534.4913330078125,
|
21203 |
+
"learning_rate": 4.993209279467164e-05,
|
21204 |
+
"loss": 69.5706,
|
21205 |
+
"step": 30280
|
21206 |
+
},
|
21207 |
+
{
|
21208 |
+
"epoch": 0.12237543279855526,
|
21209 |
+
"grad_norm": 261.96044921875,
|
21210 |
+
"learning_rate": 4.99318354438904e-05,
|
21211 |
+
"loss": 77.032,
|
21212 |
+
"step": 30290
|
21213 |
+
},
|
21214 |
+
{
|
21215 |
+
"epoch": 0.12241583406392288,
|
21216 |
+
"grad_norm": 1099.048095703125,
|
21217 |
+
"learning_rate": 4.9931577607050175e-05,
|
21218 |
+
"loss": 92.2519,
|
21219 |
+
"step": 30300
|
21220 |
+
},
|
21221 |
+
{
|
21222 |
+
"epoch": 0.12245623532929051,
|
21223 |
+
"grad_norm": 1755.97705078125,
|
21224 |
+
"learning_rate": 4.993131928415602e-05,
|
21225 |
+
"loss": 159.9417,
|
21226 |
+
"step": 30310
|
21227 |
+
},
|
21228 |
+
{
|
21229 |
+
"epoch": 0.12249663659465815,
|
21230 |
+
"grad_norm": 1634.8353271484375,
|
21231 |
+
"learning_rate": 4.993106047521296e-05,
|
21232 |
+
"loss": 91.0221,
|
21233 |
+
"step": 30320
|
21234 |
+
},
|
21235 |
+
{
|
21236 |
+
"epoch": 0.12253703786002577,
|
21237 |
+
"grad_norm": 980.1278686523438,
|
21238 |
+
"learning_rate": 4.993080118022604e-05,
|
21239 |
+
"loss": 97.5692,
|
21240 |
+
"step": 30330
|
21241 |
+
},
|
21242 |
+
{
|
21243 |
+
"epoch": 0.12257743912539341,
|
21244 |
+
"grad_norm": 2488.35009765625,
|
21245 |
+
"learning_rate": 4.993054139920032e-05,
|
21246 |
+
"loss": 114.5058,
|
21247 |
+
"step": 30340
|
21248 |
+
},
|
21249 |
+
{
|
21250 |
+
"epoch": 0.12261784039076104,
|
21251 |
+
"grad_norm": 1487.991455078125,
|
21252 |
+
"learning_rate": 4.9930281132140846e-05,
|
21253 |
+
"loss": 82.5714,
|
21254 |
+
"step": 30350
|
21255 |
+
},
|
21256 |
+
{
|
21257 |
+
"epoch": 0.12265824165612867,
|
21258 |
+
"grad_norm": 513.70654296875,
|
21259 |
+
"learning_rate": 4.993002037905272e-05,
|
21260 |
+
"loss": 127.0465,
|
21261 |
+
"step": 30360
|
21262 |
+
},
|
21263 |
+
{
|
21264 |
+
"epoch": 0.1226986429214963,
|
21265 |
+
"grad_norm": 2360.14208984375,
|
21266 |
+
"learning_rate": 4.9929759139941e-05,
|
21267 |
+
"loss": 128.6292,
|
21268 |
+
"step": 30370
|
21269 |
+
},
|
21270 |
+
{
|
21271 |
+
"epoch": 0.12273904418686393,
|
21272 |
+
"grad_norm": 537.433349609375,
|
21273 |
+
"learning_rate": 4.99294974148108e-05,
|
21274 |
+
"loss": 84.4188,
|
21275 |
+
"step": 30380
|
21276 |
+
},
|
21277 |
+
{
|
21278 |
+
"epoch": 0.12277944545223156,
|
21279 |
+
"grad_norm": 339.3077697753906,
|
21280 |
+
"learning_rate": 4.9929235203667214e-05,
|
21281 |
+
"loss": 84.647,
|
21282 |
+
"step": 30390
|
21283 |
+
},
|
21284 |
+
{
|
21285 |
+
"epoch": 0.1228198467175992,
|
21286 |
+
"grad_norm": 697.3407592773438,
|
21287 |
+
"learning_rate": 4.992897250651535e-05,
|
21288 |
+
"loss": 96.315,
|
21289 |
+
"step": 30400
|
21290 |
+
},
|
21291 |
+
{
|
21292 |
+
"epoch": 0.12286024798296682,
|
21293 |
+
"grad_norm": 637.2626342773438,
|
21294 |
+
"learning_rate": 4.9928709323360337e-05,
|
21295 |
+
"loss": 87.7648,
|
21296 |
+
"step": 30410
|
21297 |
+
},
|
21298 |
+
{
|
21299 |
+
"epoch": 0.12290064924833445,
|
21300 |
+
"grad_norm": 591.4133911132812,
|
21301 |
+
"learning_rate": 4.99284456542073e-05,
|
21302 |
+
"loss": 57.4863,
|
21303 |
+
"step": 30420
|
21304 |
+
},
|
21305 |
+
{
|
21306 |
+
"epoch": 0.12294105051370209,
|
21307 |
+
"grad_norm": 1242.639404296875,
|
21308 |
+
"learning_rate": 4.992818149906138e-05,
|
21309 |
+
"loss": 105.4673,
|
21310 |
+
"step": 30430
|
21311 |
+
},
|
21312 |
+
{
|
21313 |
+
"epoch": 0.12298145177906972,
|
21314 |
+
"grad_norm": 946.1543579101562,
|
21315 |
+
"learning_rate": 4.992791685792772e-05,
|
21316 |
+
"loss": 101.4292,
|
21317 |
+
"step": 30440
|
21318 |
+
},
|
21319 |
+
{
|
21320 |
+
"epoch": 0.12302185304443736,
|
21321 |
+
"grad_norm": 554.2642211914062,
|
21322 |
+
"learning_rate": 4.9927651730811495e-05,
|
21323 |
+
"loss": 55.5201,
|
21324 |
+
"step": 30450
|
21325 |
+
},
|
21326 |
+
{
|
21327 |
+
"epoch": 0.12306225430980498,
|
21328 |
+
"grad_norm": 1658.3990478515625,
|
21329 |
+
"learning_rate": 4.992738611771787e-05,
|
21330 |
+
"loss": 90.6332,
|
21331 |
+
"step": 30460
|
21332 |
+
},
|
21333 |
+
{
|
21334 |
+
"epoch": 0.12310265557517261,
|
21335 |
+
"grad_norm": 1110.182373046875,
|
21336 |
+
"learning_rate": 4.9927120018652004e-05,
|
21337 |
+
"loss": 112.0799,
|
21338 |
+
"step": 30470
|
21339 |
+
},
|
21340 |
+
{
|
21341 |
+
"epoch": 0.12314305684054025,
|
21342 |
+
"grad_norm": 2135.968505859375,
|
21343 |
+
"learning_rate": 4.992685343361911e-05,
|
21344 |
+
"loss": 124.2809,
|
21345 |
+
"step": 30480
|
21346 |
+
},
|
21347 |
+
{
|
21348 |
+
"epoch": 0.12318345810590788,
|
21349 |
+
"grad_norm": 0.0,
|
21350 |
+
"learning_rate": 4.992658636262438e-05,
|
21351 |
+
"loss": 130.0038,
|
21352 |
+
"step": 30490
|
21353 |
+
},
|
21354 |
+
{
|
21355 |
+
"epoch": 0.1232238593712755,
|
21356 |
+
"grad_norm": 897.2399291992188,
|
21357 |
+
"learning_rate": 4.992631880567301e-05,
|
21358 |
+
"loss": 110.8611,
|
21359 |
+
"step": 30500
|
21360 |
+
},
|
21361 |
+
{
|
21362 |
+
"epoch": 0.12326426063664314,
|
21363 |
+
"grad_norm": 1721.460693359375,
|
21364 |
+
"learning_rate": 4.9926050762770224e-05,
|
21365 |
+
"loss": 110.5094,
|
21366 |
+
"step": 30510
|
21367 |
+
},
|
21368 |
+
{
|
21369 |
+
"epoch": 0.12330466190201077,
|
21370 |
+
"grad_norm": 994.7217407226562,
|
21371 |
+
"learning_rate": 4.992578223392124e-05,
|
21372 |
+
"loss": 95.7169,
|
21373 |
+
"step": 30520
|
21374 |
+
},
|
21375 |
+
{
|
21376 |
+
"epoch": 0.12334506316737841,
|
21377 |
+
"grad_norm": 1169.87939453125,
|
21378 |
+
"learning_rate": 4.9925513219131303e-05,
|
21379 |
+
"loss": 82.8276,
|
21380 |
+
"step": 30530
|
21381 |
+
},
|
21382 |
+
{
|
21383 |
+
"epoch": 0.12338546443274603,
|
21384 |
+
"grad_norm": 677.4993286132812,
|
21385 |
+
"learning_rate": 4.992524371840566e-05,
|
21386 |
+
"loss": 105.6593,
|
21387 |
+
"step": 30540
|
21388 |
+
},
|
21389 |
+
{
|
21390 |
+
"epoch": 0.12342586569811366,
|
21391 |
+
"grad_norm": 885.8995361328125,
|
21392 |
+
"learning_rate": 4.992497373174955e-05,
|
21393 |
+
"loss": 114.9273,
|
21394 |
+
"step": 30550
|
21395 |
+
},
|
21396 |
+
{
|
21397 |
+
"epoch": 0.1234662669634813,
|
21398 |
+
"grad_norm": 1765.774658203125,
|
21399 |
+
"learning_rate": 4.9924703259168244e-05,
|
21400 |
+
"loss": 73.1132,
|
21401 |
+
"step": 30560
|
21402 |
+
},
|
21403 |
+
{
|
21404 |
+
"epoch": 0.12350666822884893,
|
21405 |
+
"grad_norm": 1306.00244140625,
|
21406 |
+
"learning_rate": 4.992443230066701e-05,
|
21407 |
+
"loss": 96.6844,
|
21408 |
+
"step": 30570
|
21409 |
+
},
|
21410 |
+
{
|
21411 |
+
"epoch": 0.12354706949421655,
|
21412 |
+
"grad_norm": 2164.46044921875,
|
21413 |
+
"learning_rate": 4.992416085625115e-05,
|
21414 |
+
"loss": 101.1072,
|
21415 |
+
"step": 30580
|
21416 |
+
},
|
21417 |
+
{
|
21418 |
+
"epoch": 0.12358747075958419,
|
21419 |
+
"grad_norm": 1472.5419921875,
|
21420 |
+
"learning_rate": 4.992388892592594e-05,
|
21421 |
+
"loss": 93.9604,
|
21422 |
+
"step": 30590
|
21423 |
+
},
|
21424 |
+
{
|
21425 |
+
"epoch": 0.12362787202495182,
|
21426 |
+
"grad_norm": 376.5589294433594,
|
21427 |
+
"learning_rate": 4.9923616509696683e-05,
|
21428 |
+
"loss": 89.4311,
|
21429 |
+
"step": 30600
|
21430 |
+
},
|
21431 |
+
{
|
21432 |
+
"epoch": 0.12366827329031946,
|
21433 |
+
"grad_norm": 809.4692993164062,
|
21434 |
+
"learning_rate": 4.9923343607568684e-05,
|
21435 |
+
"loss": 90.6958,
|
21436 |
+
"step": 30610
|
21437 |
+
},
|
21438 |
+
{
|
21439 |
+
"epoch": 0.12370867455568708,
|
21440 |
+
"grad_norm": 1123.7208251953125,
|
21441 |
+
"learning_rate": 4.9923070219547275e-05,
|
21442 |
+
"loss": 113.3677,
|
21443 |
+
"step": 30620
|
21444 |
+
},
|
21445 |
+
{
|
21446 |
+
"epoch": 0.12374907582105471,
|
21447 |
+
"grad_norm": 1361.58056640625,
|
21448 |
+
"learning_rate": 4.9922796345637776e-05,
|
21449 |
+
"loss": 109.5225,
|
21450 |
+
"step": 30630
|
21451 |
+
},
|
21452 |
+
{
|
21453 |
+
"epoch": 0.12378947708642235,
|
21454 |
+
"grad_norm": 1546.5401611328125,
|
21455 |
+
"learning_rate": 4.992252198584554e-05,
|
21456 |
+
"loss": 156.2372,
|
21457 |
+
"step": 30640
|
21458 |
+
},
|
21459 |
+
{
|
21460 |
+
"epoch": 0.12382987835178998,
|
21461 |
+
"grad_norm": 1252.5113525390625,
|
21462 |
+
"learning_rate": 4.99222471401759e-05,
|
21463 |
+
"loss": 98.325,
|
21464 |
+
"step": 30650
|
21465 |
+
},
|
21466 |
+
{
|
21467 |
+
"epoch": 0.1238702796171576,
|
21468 |
+
"grad_norm": 587.108642578125,
|
21469 |
+
"learning_rate": 4.992197180863422e-05,
|
21470 |
+
"loss": 83.3731,
|
21471 |
+
"step": 30660
|
21472 |
+
},
|
21473 |
+
{
|
21474 |
+
"epoch": 0.12391068088252524,
|
21475 |
+
"grad_norm": 546.9779663085938,
|
21476 |
+
"learning_rate": 4.992169599122587e-05,
|
21477 |
+
"loss": 69.5896,
|
21478 |
+
"step": 30670
|
21479 |
+
},
|
21480 |
+
{
|
21481 |
+
"epoch": 0.12395108214789287,
|
21482 |
+
"grad_norm": 533.7169189453125,
|
21483 |
+
"learning_rate": 4.992141968795623e-05,
|
21484 |
+
"loss": 59.3518,
|
21485 |
+
"step": 30680
|
21486 |
+
},
|
21487 |
+
{
|
21488 |
+
"epoch": 0.12399148341326051,
|
21489 |
+
"grad_norm": 609.4190673828125,
|
21490 |
+
"learning_rate": 4.992114289883068e-05,
|
21491 |
+
"loss": 132.897,
|
21492 |
+
"step": 30690
|
21493 |
+
},
|
21494 |
+
{
|
21495 |
+
"epoch": 0.12403188467862813,
|
21496 |
+
"grad_norm": 1489.8643798828125,
|
21497 |
+
"learning_rate": 4.9920865623854615e-05,
|
21498 |
+
"loss": 84.5661,
|
21499 |
+
"step": 30700
|
21500 |
+
},
|
21501 |
+
{
|
21502 |
+
"epoch": 0.12407228594399576,
|
21503 |
+
"grad_norm": 1392.722900390625,
|
21504 |
+
"learning_rate": 4.992058786303345e-05,
|
21505 |
+
"loss": 80.1483,
|
21506 |
+
"step": 30710
|
21507 |
+
},
|
21508 |
+
{
|
21509 |
+
"epoch": 0.1241126872093634,
|
21510 |
+
"grad_norm": 1709.162841796875,
|
21511 |
+
"learning_rate": 4.9920309616372596e-05,
|
21512 |
+
"loss": 78.6077,
|
21513 |
+
"step": 30720
|
21514 |
+
},
|
21515 |
+
{
|
21516 |
+
"epoch": 0.12415308847473103,
|
21517 |
+
"grad_norm": 319.7249450683594,
|
21518 |
+
"learning_rate": 4.9920030883877476e-05,
|
21519 |
+
"loss": 49.0658,
|
21520 |
+
"step": 30730
|
21521 |
+
},
|
21522 |
+
{
|
21523 |
+
"epoch": 0.12419348974009865,
|
21524 |
+
"grad_norm": 2077.73095703125,
|
21525 |
+
"learning_rate": 4.9919751665553525e-05,
|
21526 |
+
"loss": 119.2495,
|
21527 |
+
"step": 30740
|
21528 |
+
},
|
21529 |
+
{
|
21530 |
+
"epoch": 0.1242338910054663,
|
21531 |
+
"grad_norm": 1575.6136474609375,
|
21532 |
+
"learning_rate": 4.991947196140618e-05,
|
21533 |
+
"loss": 65.2858,
|
21534 |
+
"step": 30750
|
21535 |
+
},
|
21536 |
+
{
|
21537 |
+
"epoch": 0.12427429227083392,
|
21538 |
+
"grad_norm": 891.6358032226562,
|
21539 |
+
"learning_rate": 4.9919191771440905e-05,
|
21540 |
+
"loss": 76.9454,
|
21541 |
+
"step": 30760
|
21542 |
+
},
|
21543 |
+
{
|
21544 |
+
"epoch": 0.12431469353620156,
|
21545 |
+
"grad_norm": 836.199951171875,
|
21546 |
+
"learning_rate": 4.991891109566316e-05,
|
21547 |
+
"loss": 86.9683,
|
21548 |
+
"step": 30770
|
21549 |
+
},
|
21550 |
+
{
|
21551 |
+
"epoch": 0.12435509480156919,
|
21552 |
+
"grad_norm": 1238.918701171875,
|
21553 |
+
"learning_rate": 4.99186299340784e-05,
|
21554 |
+
"loss": 95.3734,
|
21555 |
+
"step": 30780
|
21556 |
+
},
|
21557 |
+
{
|
21558 |
+
"epoch": 0.12439549606693681,
|
21559 |
+
"grad_norm": 1337.448974609375,
|
21560 |
+
"learning_rate": 4.991834828669213e-05,
|
21561 |
+
"loss": 103.7555,
|
21562 |
+
"step": 30790
|
21563 |
+
},
|
21564 |
+
{
|
21565 |
+
"epoch": 0.12443589733230445,
|
21566 |
+
"grad_norm": 2682.815673828125,
|
21567 |
+
"learning_rate": 4.9918066153509834e-05,
|
21568 |
+
"loss": 157.3261,
|
21569 |
+
"step": 30800
|
21570 |
+
},
|
21571 |
+
{
|
21572 |
+
"epoch": 0.12447629859767208,
|
21573 |
+
"grad_norm": 2427.59033203125,
|
21574 |
+
"learning_rate": 4.9917783534537006e-05,
|
21575 |
+
"loss": 79.6278,
|
21576 |
+
"step": 30810
|
21577 |
+
},
|
21578 |
+
{
|
21579 |
+
"epoch": 0.1245166998630397,
|
21580 |
+
"grad_norm": 668.7774658203125,
|
21581 |
+
"learning_rate": 4.991750042977916e-05,
|
21582 |
+
"loss": 77.9551,
|
21583 |
+
"step": 30820
|
21584 |
+
},
|
21585 |
+
{
|
21586 |
+
"epoch": 0.12455710112840734,
|
21587 |
+
"grad_norm": 1772.7213134765625,
|
21588 |
+
"learning_rate": 4.991721683924182e-05,
|
21589 |
+
"loss": 109.0242,
|
21590 |
+
"step": 30830
|
21591 |
+
},
|
21592 |
+
{
|
21593 |
+
"epoch": 0.12459750239377497,
|
21594 |
+
"grad_norm": 896.2188110351562,
|
21595 |
+
"learning_rate": 4.99169327629305e-05,
|
21596 |
+
"loss": 103.7983,
|
21597 |
+
"step": 30840
|
21598 |
+
},
|
21599 |
+
{
|
21600 |
+
"epoch": 0.12463790365914261,
|
21601 |
+
"grad_norm": 1777.919677734375,
|
21602 |
+
"learning_rate": 4.991664820085074e-05,
|
21603 |
+
"loss": 69.7507,
|
21604 |
+
"step": 30850
|
21605 |
+
},
|
21606 |
+
{
|
21607 |
+
"epoch": 0.12467830492451024,
|
21608 |
+
"grad_norm": 414.7098693847656,
|
21609 |
+
"learning_rate": 4.9916363153008114e-05,
|
21610 |
+
"loss": 137.439,
|
21611 |
+
"step": 30860
|
21612 |
+
},
|
21613 |
+
{
|
21614 |
+
"epoch": 0.12471870618987786,
|
21615 |
+
"grad_norm": 598.8345947265625,
|
21616 |
+
"learning_rate": 4.9916077619408155e-05,
|
21617 |
+
"loss": 120.2765,
|
21618 |
+
"step": 30870
|
21619 |
+
},
|
21620 |
+
{
|
21621 |
+
"epoch": 0.1247591074552455,
|
21622 |
+
"grad_norm": 1064.3018798828125,
|
21623 |
+
"learning_rate": 4.991579160005644e-05,
|
21624 |
+
"loss": 84.4768,
|
21625 |
+
"step": 30880
|
21626 |
+
},
|
21627 |
+
{
|
21628 |
+
"epoch": 0.12479950872061313,
|
21629 |
+
"grad_norm": 666.4903564453125,
|
21630 |
+
"learning_rate": 4.9915505094958526e-05,
|
21631 |
+
"loss": 136.433,
|
21632 |
+
"step": 30890
|
21633 |
+
},
|
21634 |
+
{
|
21635 |
+
"epoch": 0.12483990998598075,
|
21636 |
+
"grad_norm": 1181.445556640625,
|
21637 |
+
"learning_rate": 4.991521810412002e-05,
|
21638 |
+
"loss": 63.556,
|
21639 |
+
"step": 30900
|
21640 |
+
},
|
21641 |
+
{
|
21642 |
+
"epoch": 0.1248803112513484,
|
21643 |
+
"grad_norm": 605.0240478515625,
|
21644 |
+
"learning_rate": 4.991493062754651e-05,
|
21645 |
+
"loss": 94.3815,
|
21646 |
+
"step": 30910
|
21647 |
+
},
|
21648 |
+
{
|
21649 |
+
"epoch": 0.12492071251671602,
|
21650 |
+
"grad_norm": 794.223388671875,
|
21651 |
+
"learning_rate": 4.99146426652436e-05,
|
21652 |
+
"loss": 76.5061,
|
21653 |
+
"step": 30920
|
21654 |
+
},
|
21655 |
+
{
|
21656 |
+
"epoch": 0.12496111378208366,
|
21657 |
+
"grad_norm": 1127.827392578125,
|
21658 |
+
"learning_rate": 4.991435421721691e-05,
|
21659 |
+
"loss": 80.6668,
|
21660 |
+
"step": 30930
|
21661 |
+
},
|
21662 |
+
{
|
21663 |
+
"epoch": 0.12500151504745127,
|
21664 |
+
"grad_norm": 494.918212890625,
|
21665 |
+
"learning_rate": 4.991406528347206e-05,
|
21666 |
+
"loss": 137.6075,
|
21667 |
+
"step": 30940
|
21668 |
+
},
|
21669 |
+
{
|
21670 |
+
"epoch": 0.1250419163128189,
|
21671 |
+
"grad_norm": 688.9254760742188,
|
21672 |
+
"learning_rate": 4.9913775864014665e-05,
|
21673 |
+
"loss": 77.563,
|
21674 |
+
"step": 30950
|
21675 |
+
},
|
21676 |
+
{
|
21677 |
+
"epoch": 0.12508231757818655,
|
21678 |
+
"grad_norm": 1798.6043701171875,
|
21679 |
+
"learning_rate": 4.991348595885039e-05,
|
21680 |
+
"loss": 172.5805,
|
21681 |
+
"step": 30960
|
21682 |
+
},
|
21683 |
+
{
|
21684 |
+
"epoch": 0.1251227188435542,
|
21685 |
+
"grad_norm": 629.1405639648438,
|
21686 |
+
"learning_rate": 4.991319556798488e-05,
|
21687 |
+
"loss": 94.1099,
|
21688 |
+
"step": 30970
|
21689 |
+
},
|
21690 |
+
{
|
21691 |
+
"epoch": 0.1251631201089218,
|
21692 |
+
"grad_norm": 831.2258911132812,
|
21693 |
+
"learning_rate": 4.99129046914238e-05,
|
21694 |
+
"loss": 82.6184,
|
21695 |
+
"step": 30980
|
21696 |
+
},
|
21697 |
+
{
|
21698 |
+
"epoch": 0.12520352137428944,
|
21699 |
+
"grad_norm": 899.6831665039062,
|
21700 |
+
"learning_rate": 4.991261332917282e-05,
|
21701 |
+
"loss": 89.8317,
|
21702 |
+
"step": 30990
|
21703 |
+
},
|
21704 |
+
{
|
21705 |
+
"epoch": 0.12524392263965708,
|
21706 |
+
"grad_norm": 1243.12255859375,
|
21707 |
+
"learning_rate": 4.991232148123761e-05,
|
21708 |
+
"loss": 106.8137,
|
21709 |
+
"step": 31000
|
21710 |
+
},
|
21711 |
+
{
|
21712 |
+
"epoch": 0.1252843239050247,
|
21713 |
+
"grad_norm": 708.5358276367188,
|
21714 |
+
"learning_rate": 4.9912029147623875e-05,
|
21715 |
+
"loss": 71.765,
|
21716 |
+
"step": 31010
|
21717 |
+
},
|
21718 |
+
{
|
21719 |
+
"epoch": 0.12532472517039234,
|
21720 |
+
"grad_norm": 1078.65185546875,
|
21721 |
+
"learning_rate": 4.9911736328337296e-05,
|
21722 |
+
"loss": 98.1934,
|
21723 |
+
"step": 31020
|
21724 |
+
},
|
21725 |
+
{
|
21726 |
+
"epoch": 0.12536512643575998,
|
21727 |
+
"grad_norm": 863.23193359375,
|
21728 |
+
"learning_rate": 4.99114430233836e-05,
|
21729 |
+
"loss": 110.7916,
|
21730 |
+
"step": 31030
|
21731 |
+
},
|
21732 |
+
{
|
21733 |
+
"epoch": 0.1254055277011276,
|
21734 |
+
"grad_norm": 948.326416015625,
|
21735 |
+
"learning_rate": 4.991114923276849e-05,
|
21736 |
+
"loss": 128.7791,
|
21737 |
+
"step": 31040
|
21738 |
+
},
|
21739 |
+
{
|
21740 |
+
"epoch": 0.12544592896649523,
|
21741 |
+
"grad_norm": 1923.48681640625,
|
21742 |
+
"learning_rate": 4.9910854956497696e-05,
|
21743 |
+
"loss": 81.3787,
|
21744 |
+
"step": 31050
|
21745 |
+
},
|
21746 |
+
{
|
21747 |
+
"epoch": 0.12548633023186287,
|
21748 |
+
"grad_norm": 1134.7197265625,
|
21749 |
+
"learning_rate": 4.991056019457697e-05,
|
21750 |
+
"loss": 97.8454,
|
21751 |
+
"step": 31060
|
21752 |
+
},
|
21753 |
+
{
|
21754 |
+
"epoch": 0.12552673149723048,
|
21755 |
+
"grad_norm": 1076.4554443359375,
|
21756 |
+
"learning_rate": 4.991026494701205e-05,
|
21757 |
+
"loss": 93.0418,
|
21758 |
+
"step": 31070
|
21759 |
+
},
|
21760 |
+
{
|
21761 |
+
"epoch": 0.12556713276259812,
|
21762 |
+
"grad_norm": 1976.7174072265625,
|
21763 |
+
"learning_rate": 4.9909969213808683e-05,
|
21764 |
+
"loss": 87.8301,
|
21765 |
+
"step": 31080
|
21766 |
+
},
|
21767 |
+
{
|
21768 |
+
"epoch": 0.12560753402796576,
|
21769 |
+
"grad_norm": 1298.41162109375,
|
21770 |
+
"learning_rate": 4.990967299497264e-05,
|
21771 |
+
"loss": 125.1484,
|
21772 |
+
"step": 31090
|
21773 |
+
},
|
21774 |
+
{
|
21775 |
+
"epoch": 0.12564793529333337,
|
21776 |
+
"grad_norm": 532.9257202148438,
|
21777 |
+
"learning_rate": 4.990937629050971e-05,
|
21778 |
+
"loss": 101.9732,
|
21779 |
+
"step": 31100
|
21780 |
+
},
|
21781 |
+
{
|
21782 |
+
"epoch": 0.125688336558701,
|
21783 |
+
"grad_norm": 1158.4649658203125,
|
21784 |
+
"learning_rate": 4.990907910042566e-05,
|
21785 |
+
"loss": 88.4633,
|
21786 |
+
"step": 31110
|
21787 |
+
},
|
21788 |
+
{
|
21789 |
+
"epoch": 0.12572873782406865,
|
21790 |
+
"grad_norm": 1732.9954833984375,
|
21791 |
+
"learning_rate": 4.990878142472628e-05,
|
21792 |
+
"loss": 80.0504,
|
21793 |
+
"step": 31120
|
21794 |
+
},
|
21795 |
+
{
|
21796 |
+
"epoch": 0.1257691390894363,
|
21797 |
+
"grad_norm": 432.7400207519531,
|
21798 |
+
"learning_rate": 4.990848326341739e-05,
|
21799 |
+
"loss": 92.5247,
|
21800 |
+
"step": 31130
|
21801 |
+
},
|
21802 |
+
{
|
21803 |
+
"epoch": 0.1258095403548039,
|
21804 |
+
"grad_norm": 985.176025390625,
|
21805 |
+
"learning_rate": 4.990818461650479e-05,
|
21806 |
+
"loss": 100.5391,
|
21807 |
+
"step": 31140
|
21808 |
+
},
|
21809 |
+
{
|
21810 |
+
"epoch": 0.12584994162017155,
|
21811 |
+
"grad_norm": 789.0088500976562,
|
21812 |
+
"learning_rate": 4.990788548399432e-05,
|
21813 |
+
"loss": 88.8649,
|
21814 |
+
"step": 31150
|
21815 |
+
},
|
21816 |
+
{
|
21817 |
+
"epoch": 0.12589034288553919,
|
21818 |
+
"grad_norm": 1790.52978515625,
|
21819 |
+
"learning_rate": 4.990758586589178e-05,
|
21820 |
+
"loss": 108.7384,
|
21821 |
+
"step": 31160
|
21822 |
+
},
|
21823 |
+
{
|
21824 |
+
"epoch": 0.1259307441509068,
|
21825 |
+
"grad_norm": 1069.7830810546875,
|
21826 |
+
"learning_rate": 4.9907285762203046e-05,
|
21827 |
+
"loss": 104.5124,
|
21828 |
+
"step": 31170
|
21829 |
+
},
|
21830 |
+
{
|
21831 |
+
"epoch": 0.12597114541627444,
|
21832 |
+
"grad_norm": 766.6657104492188,
|
21833 |
+
"learning_rate": 4.990698517293395e-05,
|
21834 |
+
"loss": 59.5578,
|
21835 |
+
"step": 31180
|
21836 |
+
},
|
21837 |
+
{
|
21838 |
+
"epoch": 0.12601154668164208,
|
21839 |
+
"grad_norm": 1233.1490478515625,
|
21840 |
+
"learning_rate": 4.990668409809034e-05,
|
21841 |
+
"loss": 63.7087,
|
21842 |
+
"step": 31190
|
21843 |
+
},
|
21844 |
+
{
|
21845 |
+
"epoch": 0.1260519479470097,
|
21846 |
+
"grad_norm": 1039.3233642578125,
|
21847 |
+
"learning_rate": 4.990638253767812e-05,
|
21848 |
+
"loss": 110.4396,
|
21849 |
+
"step": 31200
|
21850 |
+
},
|
21851 |
+
{
|
21852 |
+
"epoch": 0.12609234921237733,
|
21853 |
+
"grad_norm": 1719.74267578125,
|
21854 |
+
"learning_rate": 4.9906080491703146e-05,
|
21855 |
+
"loss": 84.7901,
|
21856 |
+
"step": 31210
|
21857 |
+
},
|
21858 |
+
{
|
21859 |
+
"epoch": 0.12613275047774497,
|
21860 |
+
"grad_norm": 2833.946533203125,
|
21861 |
+
"learning_rate": 4.9905777960171304e-05,
|
21862 |
+
"loss": 146.1511,
|
21863 |
+
"step": 31220
|
21864 |
+
},
|
21865 |
+
{
|
21866 |
+
"epoch": 0.12617315174311258,
|
21867 |
+
"grad_norm": 820.8349609375,
|
21868 |
+
"learning_rate": 4.99054749430885e-05,
|
21869 |
+
"loss": 120.1305,
|
21870 |
+
"step": 31230
|
21871 |
+
},
|
21872 |
+
{
|
21873 |
+
"epoch": 0.12621355300848022,
|
21874 |
+
"grad_norm": 782.0814208984375,
|
21875 |
+
"learning_rate": 4.990517144046064e-05,
|
21876 |
+
"loss": 68.8798,
|
21877 |
+
"step": 31240
|
21878 |
+
},
|
21879 |
+
{
|
21880 |
+
"epoch": 0.12625395427384786,
|
21881 |
+
"grad_norm": 1526.1461181640625,
|
21882 |
+
"learning_rate": 4.990486745229364e-05,
|
21883 |
+
"loss": 109.5388,
|
21884 |
+
"step": 31250
|
21885 |
+
},
|
21886 |
+
{
|
21887 |
+
"epoch": 0.12629435553921547,
|
21888 |
+
"grad_norm": 2700.19677734375,
|
21889 |
+
"learning_rate": 4.9904562978593426e-05,
|
21890 |
+
"loss": 141.1139,
|
21891 |
+
"step": 31260
|
21892 |
+
},
|
21893 |
+
{
|
21894 |
+
"epoch": 0.12633475680458311,
|
21895 |
+
"grad_norm": 578.1785278320312,
|
21896 |
+
"learning_rate": 4.990425801936594e-05,
|
21897 |
+
"loss": 63.9474,
|
21898 |
+
"step": 31270
|
21899 |
+
},
|
21900 |
+
{
|
21901 |
+
"epoch": 0.12637515806995075,
|
21902 |
+
"grad_norm": 1284.421630859375,
|
21903 |
+
"learning_rate": 4.990395257461712e-05,
|
21904 |
+
"loss": 64.6959,
|
21905 |
+
"step": 31280
|
21906 |
+
},
|
21907 |
+
{
|
21908 |
+
"epoch": 0.1264155593353184,
|
21909 |
+
"grad_norm": 722.0364379882812,
|
21910 |
+
"learning_rate": 4.9903646644352925e-05,
|
21911 |
+
"loss": 136.6368,
|
21912 |
+
"step": 31290
|
21913 |
+
},
|
21914 |
+
{
|
21915 |
+
"epoch": 0.126455960600686,
|
21916 |
+
"grad_norm": 639.962646484375,
|
21917 |
+
"learning_rate": 4.990334022857932e-05,
|
21918 |
+
"loss": 85.277,
|
21919 |
+
"step": 31300
|
21920 |
+
},
|
21921 |
+
{
|
21922 |
+
"epoch": 0.12649636186605365,
|
21923 |
+
"grad_norm": 1369.47265625,
|
21924 |
+
"learning_rate": 4.990303332730226e-05,
|
21925 |
+
"loss": 95.2965,
|
21926 |
+
"step": 31310
|
21927 |
+
},
|
21928 |
+
{
|
21929 |
+
"epoch": 0.1265367631314213,
|
21930 |
+
"grad_norm": 710.7479858398438,
|
21931 |
+
"learning_rate": 4.990272594052776e-05,
|
21932 |
+
"loss": 103.4021,
|
21933 |
+
"step": 31320
|
21934 |
+
},
|
21935 |
+
{
|
21936 |
+
"epoch": 0.1265771643967889,
|
21937 |
+
"grad_norm": 967.9599609375,
|
21938 |
+
"learning_rate": 4.990241806826179e-05,
|
21939 |
+
"loss": 55.237,
|
21940 |
+
"step": 31330
|
21941 |
+
},
|
21942 |
+
{
|
21943 |
+
"epoch": 0.12661756566215654,
|
21944 |
+
"grad_norm": 1026.2791748046875,
|
21945 |
+
"learning_rate": 4.990210971051037e-05,
|
21946 |
+
"loss": 76.5986,
|
21947 |
+
"step": 31340
|
21948 |
+
},
|
21949 |
+
{
|
21950 |
+
"epoch": 0.12665796692752418,
|
21951 |
+
"grad_norm": 1351.9713134765625,
|
21952 |
+
"learning_rate": 4.990180086727949e-05,
|
21953 |
+
"loss": 78.9581,
|
21954 |
+
"step": 31350
|
21955 |
+
},
|
21956 |
+
{
|
21957 |
+
"epoch": 0.1266983681928918,
|
21958 |
+
"grad_norm": 1424.3463134765625,
|
21959 |
+
"learning_rate": 4.9901491538575185e-05,
|
21960 |
+
"loss": 72.768,
|
21961 |
+
"step": 31360
|
21962 |
+
},
|
21963 |
+
{
|
21964 |
+
"epoch": 0.12673876945825943,
|
21965 |
+
"grad_norm": 251.47360229492188,
|
21966 |
+
"learning_rate": 4.990118172440348e-05,
|
21967 |
+
"loss": 97.3149,
|
21968 |
+
"step": 31370
|
21969 |
+
},
|
21970 |
+
{
|
21971 |
+
"epoch": 0.12677917072362707,
|
21972 |
+
"grad_norm": 1101.8797607421875,
|
21973 |
+
"learning_rate": 4.9900871424770424e-05,
|
21974 |
+
"loss": 113.3635,
|
21975 |
+
"step": 31380
|
21976 |
+
},
|
21977 |
+
{
|
21978 |
+
"epoch": 0.12681957198899468,
|
21979 |
+
"grad_norm": 3568.4501953125,
|
21980 |
+
"learning_rate": 4.9900560639682045e-05,
|
21981 |
+
"loss": 123.9855,
|
21982 |
+
"step": 31390
|
21983 |
+
},
|
21984 |
+
{
|
21985 |
+
"epoch": 0.12685997325436232,
|
21986 |
+
"grad_norm": 1042.5748291015625,
|
21987 |
+
"learning_rate": 4.9900249369144434e-05,
|
21988 |
+
"loss": 74.6795,
|
21989 |
+
"step": 31400
|
21990 |
+
},
|
21991 |
+
{
|
21992 |
+
"epoch": 0.12690037451972996,
|
21993 |
+
"grad_norm": 2803.200439453125,
|
21994 |
+
"learning_rate": 4.9899937613163635e-05,
|
21995 |
+
"loss": 122.4246,
|
21996 |
+
"step": 31410
|
21997 |
+
},
|
21998 |
+
{
|
21999 |
+
"epoch": 0.12694077578509758,
|
22000 |
+
"grad_norm": 999.986083984375,
|
22001 |
+
"learning_rate": 4.9899625371745726e-05,
|
22002 |
+
"loss": 103.829,
|
22003 |
+
"step": 31420
|
22004 |
+
},
|
22005 |
+
{
|
22006 |
+
"epoch": 0.12698117705046522,
|
22007 |
+
"grad_norm": 420.03607177734375,
|
22008 |
+
"learning_rate": 4.989931264489681e-05,
|
22009 |
+
"loss": 109.0738,
|
22010 |
+
"step": 31430
|
22011 |
+
},
|
22012 |
+
{
|
22013 |
+
"epoch": 0.12702157831583286,
|
22014 |
+
"grad_norm": 679.0191040039062,
|
22015 |
+
"learning_rate": 4.9898999432622974e-05,
|
22016 |
+
"loss": 63.1434,
|
22017 |
+
"step": 31440
|
22018 |
+
},
|
22019 |
+
{
|
22020 |
+
"epoch": 0.1270619795812005,
|
22021 |
+
"grad_norm": 1536.908203125,
|
22022 |
+
"learning_rate": 4.989868573493032e-05,
|
22023 |
+
"loss": 114.7305,
|
22024 |
+
"step": 31450
|
22025 |
+
},
|
22026 |
+
{
|
22027 |
+
"epoch": 0.1271023808465681,
|
22028 |
+
"grad_norm": 2571.055419921875,
|
22029 |
+
"learning_rate": 4.9898371551824974e-05,
|
22030 |
+
"loss": 91.3618,
|
22031 |
+
"step": 31460
|
22032 |
+
},
|
22033 |
+
{
|
22034 |
+
"epoch": 0.12714278211193575,
|
22035 |
+
"grad_norm": 1286.792724609375,
|
22036 |
+
"learning_rate": 4.989805688331306e-05,
|
22037 |
+
"loss": 96.6507,
|
22038 |
+
"step": 31470
|
22039 |
+
},
|
22040 |
+
{
|
22041 |
+
"epoch": 0.1271831833773034,
|
22042 |
+
"grad_norm": 1014.9740600585938,
|
22043 |
+
"learning_rate": 4.9897741729400705e-05,
|
22044 |
+
"loss": 82.7385,
|
22045 |
+
"step": 31480
|
22046 |
+
},
|
22047 |
+
{
|
22048 |
+
"epoch": 0.127223584642671,
|
22049 |
+
"grad_norm": 3557.023681640625,
|
22050 |
+
"learning_rate": 4.989742609009405e-05,
|
22051 |
+
"loss": 105.2578,
|
22052 |
+
"step": 31490
|
22053 |
+
},
|
22054 |
+
{
|
22055 |
+
"epoch": 0.12726398590803864,
|
22056 |
+
"grad_norm": 1261.7489013671875,
|
22057 |
+
"learning_rate": 4.989710996539926e-05,
|
22058 |
+
"loss": 110.2257,
|
22059 |
+
"step": 31500
|
22060 |
+
},
|
22061 |
+
{
|
22062 |
+
"epoch": 0.12730438717340628,
|
22063 |
+
"grad_norm": 620.3264770507812,
|
22064 |
+
"learning_rate": 4.9896793355322495e-05,
|
22065 |
+
"loss": 130.9313,
|
22066 |
+
"step": 31510
|
22067 |
+
},
|
22068 |
+
{
|
22069 |
+
"epoch": 0.1273447884387739,
|
22070 |
+
"grad_norm": 1063.13623046875,
|
22071 |
+
"learning_rate": 4.989647625986993e-05,
|
22072 |
+
"loss": 84.6815,
|
22073 |
+
"step": 31520
|
22074 |
+
},
|
22075 |
+
{
|
22076 |
+
"epoch": 0.12738518970414153,
|
22077 |
+
"grad_norm": 1258.741943359375,
|
22078 |
+
"learning_rate": 4.989615867904773e-05,
|
22079 |
+
"loss": 101.0329,
|
22080 |
+
"step": 31530
|
22081 |
+
},
|
22082 |
+
{
|
22083 |
+
"epoch": 0.12742559096950917,
|
22084 |
+
"grad_norm": 483.7554931640625,
|
22085 |
+
"learning_rate": 4.989584061286211e-05,
|
22086 |
+
"loss": 115.0386,
|
22087 |
+
"step": 31540
|
22088 |
+
},
|
22089 |
+
{
|
22090 |
+
"epoch": 0.12746599223487678,
|
22091 |
+
"grad_norm": 1390.3809814453125,
|
22092 |
+
"learning_rate": 4.989552206131925e-05,
|
22093 |
+
"loss": 95.0824,
|
22094 |
+
"step": 31550
|
22095 |
+
},
|
22096 |
+
{
|
22097 |
+
"epoch": 0.12750639350024442,
|
22098 |
+
"grad_norm": 474.9691162109375,
|
22099 |
+
"learning_rate": 4.9895203024425385e-05,
|
22100 |
+
"loss": 123.0268,
|
22101 |
+
"step": 31560
|
22102 |
+
},
|
22103 |
+
{
|
22104 |
+
"epoch": 0.12754679476561206,
|
22105 |
+
"grad_norm": 953.542236328125,
|
22106 |
+
"learning_rate": 4.989488350218671e-05,
|
22107 |
+
"loss": 132.4043,
|
22108 |
+
"step": 31570
|
22109 |
+
},
|
22110 |
+
{
|
22111 |
+
"epoch": 0.12758719603097968,
|
22112 |
+
"grad_norm": 602.45751953125,
|
22113 |
+
"learning_rate": 4.989456349460947e-05,
|
22114 |
+
"loss": 81.3055,
|
22115 |
+
"step": 31580
|
22116 |
+
},
|
22117 |
+
{
|
22118 |
+
"epoch": 0.12762759729634732,
|
22119 |
+
"grad_norm": 1298.2872314453125,
|
22120 |
+
"learning_rate": 4.989424300169989e-05,
|
22121 |
+
"loss": 85.7651,
|
22122 |
+
"step": 31590
|
22123 |
+
},
|
22124 |
+
{
|
22125 |
+
"epoch": 0.12766799856171496,
|
22126 |
+
"grad_norm": 539.2799072265625,
|
22127 |
+
"learning_rate": 4.9893922023464236e-05,
|
22128 |
+
"loss": 73.1652,
|
22129 |
+
"step": 31600
|
22130 |
+
},
|
22131 |
+
{
|
22132 |
+
"epoch": 0.1277083998270826,
|
22133 |
+
"grad_norm": 506.7164001464844,
|
22134 |
+
"learning_rate": 4.989360055990875e-05,
|
22135 |
+
"loss": 85.522,
|
22136 |
+
"step": 31610
|
22137 |
+
},
|
22138 |
+
{
|
22139 |
+
"epoch": 0.1277488010924502,
|
22140 |
+
"grad_norm": 1420.2366943359375,
|
22141 |
+
"learning_rate": 4.98932786110397e-05,
|
22142 |
+
"loss": 101.5808,
|
22143 |
+
"step": 31620
|
22144 |
+
},
|
22145 |
+
{
|
22146 |
+
"epoch": 0.12778920235781785,
|
22147 |
+
"grad_norm": 931.4535522460938,
|
22148 |
+
"learning_rate": 4.989295617686337e-05,
|
22149 |
+
"loss": 104.2542,
|
22150 |
+
"step": 31630
|
22151 |
+
},
|
22152 |
+
{
|
22153 |
+
"epoch": 0.1278296036231855,
|
22154 |
+
"grad_norm": 1208.561767578125,
|
22155 |
+
"learning_rate": 4.989263325738605e-05,
|
22156 |
+
"loss": 118.4887,
|
22157 |
+
"step": 31640
|
22158 |
+
},
|
22159 |
+
{
|
22160 |
+
"epoch": 0.1278700048885531,
|
22161 |
+
"grad_norm": 1164.8492431640625,
|
22162 |
+
"learning_rate": 4.989230985261403e-05,
|
22163 |
+
"loss": 133.7977,
|
22164 |
+
"step": 31650
|
22165 |
+
},
|
22166 |
+
{
|
22167 |
+
"epoch": 0.12791040615392074,
|
22168 |
+
"grad_norm": 2086.97900390625,
|
22169 |
+
"learning_rate": 4.9891985962553606e-05,
|
22170 |
+
"loss": 138.2947,
|
22171 |
+
"step": 31660
|
22172 |
+
},
|
22173 |
+
{
|
22174 |
+
"epoch": 0.12795080741928838,
|
22175 |
+
"grad_norm": 896.4519653320312,
|
22176 |
+
"learning_rate": 4.98916615872111e-05,
|
22177 |
+
"loss": 108.6655,
|
22178 |
+
"step": 31670
|
22179 |
+
},
|
22180 |
+
{
|
22181 |
+
"epoch": 0.127991208684656,
|
22182 |
+
"grad_norm": 921.4371337890625,
|
22183 |
+
"learning_rate": 4.9891336726592844e-05,
|
22184 |
+
"loss": 72.2636,
|
22185 |
+
"step": 31680
|
22186 |
+
},
|
22187 |
+
{
|
22188 |
+
"epoch": 0.12803160995002363,
|
22189 |
+
"grad_norm": 2175.284423828125,
|
22190 |
+
"learning_rate": 4.989101138070516e-05,
|
22191 |
+
"loss": 108.8024,
|
22192 |
+
"step": 31690
|
22193 |
+
},
|
22194 |
+
{
|
22195 |
+
"epoch": 0.12807201121539127,
|
22196 |
+
"grad_norm": 456.76068115234375,
|
22197 |
+
"learning_rate": 4.989068554955439e-05,
|
22198 |
+
"loss": 124.3368,
|
22199 |
+
"step": 31700
|
22200 |
+
},
|
22201 |
+
{
|
22202 |
+
"epoch": 0.12811241248075889,
|
22203 |
+
"grad_norm": 2323.61962890625,
|
22204 |
+
"learning_rate": 4.9890359233146897e-05,
|
22205 |
+
"loss": 83.3551,
|
22206 |
+
"step": 31710
|
22207 |
+
},
|
22208 |
+
{
|
22209 |
+
"epoch": 0.12815281374612653,
|
22210 |
+
"grad_norm": 630.8578491210938,
|
22211 |
+
"learning_rate": 4.989003243148904e-05,
|
22212 |
+
"loss": 112.8626,
|
22213 |
+
"step": 31720
|
22214 |
+
},
|
22215 |
+
{
|
22216 |
+
"epoch": 0.12819321501149417,
|
22217 |
+
"grad_norm": 1055.7919921875,
|
22218 |
+
"learning_rate": 4.988970514458718e-05,
|
22219 |
+
"loss": 85.2178,
|
22220 |
+
"step": 31730
|
22221 |
+
},
|
22222 |
+
{
|
22223 |
+
"epoch": 0.12823361627686178,
|
22224 |
+
"grad_norm": 4026.397705078125,
|
22225 |
+
"learning_rate": 4.9889377372447706e-05,
|
22226 |
+
"loss": 136.7049,
|
22227 |
+
"step": 31740
|
22228 |
+
},
|
22229 |
+
{
|
22230 |
+
"epoch": 0.12827401754222942,
|
22231 |
+
"grad_norm": 1399.34130859375,
|
22232 |
+
"learning_rate": 4.9889049115077005e-05,
|
22233 |
+
"loss": 90.4039,
|
22234 |
+
"step": 31750
|
22235 |
+
},
|
22236 |
+
{
|
22237 |
+
"epoch": 0.12831441880759706,
|
22238 |
+
"grad_norm": 718.3490600585938,
|
22239 |
+
"learning_rate": 4.988872037248148e-05,
|
22240 |
+
"loss": 122.6609,
|
22241 |
+
"step": 31760
|
22242 |
+
},
|
22243 |
+
{
|
22244 |
+
"epoch": 0.1283548200729647,
|
22245 |
+
"grad_norm": 1984.33740234375,
|
22246 |
+
"learning_rate": 4.988839114466753e-05,
|
22247 |
+
"loss": 118.7678,
|
22248 |
+
"step": 31770
|
22249 |
+
},
|
22250 |
+
{
|
22251 |
+
"epoch": 0.1283952213383323,
|
22252 |
+
"grad_norm": 732.53662109375,
|
22253 |
+
"learning_rate": 4.988806143164159e-05,
|
22254 |
+
"loss": 59.4425,
|
22255 |
+
"step": 31780
|
22256 |
+
},
|
22257 |
+
{
|
22258 |
+
"epoch": 0.12843562260369995,
|
22259 |
+
"grad_norm": 559.675537109375,
|
22260 |
+
"learning_rate": 4.988773123341007e-05,
|
22261 |
+
"loss": 67.2548,
|
22262 |
+
"step": 31790
|
22263 |
+
},
|
22264 |
+
{
|
22265 |
+
"epoch": 0.1284760238690676,
|
22266 |
+
"grad_norm": 1089.072998046875,
|
22267 |
+
"learning_rate": 4.988740054997943e-05,
|
22268 |
+
"loss": 96.6428,
|
22269 |
+
"step": 31800
|
22270 |
+
},
|
22271 |
+
{
|
22272 |
+
"epoch": 0.1285164251344352,
|
22273 |
+
"grad_norm": 2197.211181640625,
|
22274 |
+
"learning_rate": 4.9887069381356094e-05,
|
22275 |
+
"loss": 116.156,
|
22276 |
+
"step": 31810
|
22277 |
+
},
|
22278 |
+
{
|
22279 |
+
"epoch": 0.12855682639980284,
|
22280 |
+
"grad_norm": 459.337158203125,
|
22281 |
+
"learning_rate": 4.988673772754653e-05,
|
22282 |
+
"loss": 105.2984,
|
22283 |
+
"step": 31820
|
22284 |
+
},
|
22285 |
+
{
|
22286 |
+
"epoch": 0.12859722766517048,
|
22287 |
+
"grad_norm": 1142.6385498046875,
|
22288 |
+
"learning_rate": 4.98864055885572e-05,
|
22289 |
+
"loss": 97.5586,
|
22290 |
+
"step": 31830
|
22291 |
+
},
|
22292 |
+
{
|
22293 |
+
"epoch": 0.1286376289305381,
|
22294 |
+
"grad_norm": 952.4442749023438,
|
22295 |
+
"learning_rate": 4.988607296439458e-05,
|
22296 |
+
"loss": 112.1516,
|
22297 |
+
"step": 31840
|
22298 |
+
},
|
22299 |
+
{
|
22300 |
+
"epoch": 0.12867803019590573,
|
22301 |
+
"grad_norm": 633.4367065429688,
|
22302 |
+
"learning_rate": 4.988573985506516e-05,
|
22303 |
+
"loss": 82.8759,
|
22304 |
+
"step": 31850
|
22305 |
+
},
|
22306 |
+
{
|
22307 |
+
"epoch": 0.12871843146127337,
|
22308 |
+
"grad_norm": 341.5099792480469,
|
22309 |
+
"learning_rate": 4.988540626057543e-05,
|
22310 |
+
"loss": 79.2266,
|
22311 |
+
"step": 31860
|
22312 |
+
},
|
22313 |
+
{
|
22314 |
+
"epoch": 0.128758832726641,
|
22315 |
+
"grad_norm": 521.6610717773438,
|
22316 |
+
"learning_rate": 4.988507218093189e-05,
|
22317 |
+
"loss": 74.551,
|
22318 |
+
"step": 31870
|
22319 |
+
},
|
22320 |
+
{
|
22321 |
+
"epoch": 0.12879923399200863,
|
22322 |
+
"grad_norm": 848.8890991210938,
|
22323 |
+
"learning_rate": 4.988473761614105e-05,
|
22324 |
+
"loss": 68.6997,
|
22325 |
+
"step": 31880
|
22326 |
+
},
|
22327 |
+
{
|
22328 |
+
"epoch": 0.12883963525737627,
|
22329 |
+
"grad_norm": 1142.2823486328125,
|
22330 |
+
"learning_rate": 4.9884402566209445e-05,
|
22331 |
+
"loss": 84.3612,
|
22332 |
+
"step": 31890
|
22333 |
+
},
|
22334 |
+
{
|
22335 |
+
"epoch": 0.12888003652274388,
|
22336 |
+
"grad_norm": 1104.7357177734375,
|
22337 |
+
"learning_rate": 4.98840670311436e-05,
|
22338 |
+
"loss": 114.3409,
|
22339 |
+
"step": 31900
|
22340 |
+
},
|
22341 |
+
{
|
22342 |
+
"epoch": 0.12892043778811152,
|
22343 |
+
"grad_norm": 1430.170654296875,
|
22344 |
+
"learning_rate": 4.988373101095006e-05,
|
22345 |
+
"loss": 66.6711,
|
22346 |
+
"step": 31910
|
22347 |
+
},
|
22348 |
+
{
|
22349 |
+
"epoch": 0.12896083905347916,
|
22350 |
+
"grad_norm": 962.3909301757812,
|
22351 |
+
"learning_rate": 4.9883394505635364e-05,
|
22352 |
+
"loss": 103.3977,
|
22353 |
+
"step": 31920
|
22354 |
+
},
|
22355 |
+
{
|
22356 |
+
"epoch": 0.1290012403188468,
|
22357 |
+
"grad_norm": 1590.525634765625,
|
22358 |
+
"learning_rate": 4.988305751520609e-05,
|
22359 |
+
"loss": 107.3967,
|
22360 |
+
"step": 31930
|
22361 |
+
},
|
22362 |
+
{
|
22363 |
+
"epoch": 0.1290416415842144,
|
22364 |
+
"grad_norm": 373.5509033203125,
|
22365 |
+
"learning_rate": 4.988272003966879e-05,
|
22366 |
+
"loss": 61.0093,
|
22367 |
+
"step": 31940
|
22368 |
+
},
|
22369 |
+
{
|
22370 |
+
"epoch": 0.12908204284958205,
|
22371 |
+
"grad_norm": 845.4537353515625,
|
22372 |
+
"learning_rate": 4.9882382079030064e-05,
|
22373 |
+
"loss": 87.1438,
|
22374 |
+
"step": 31950
|
22375 |
+
},
|
22376 |
+
{
|
22377 |
+
"epoch": 0.1291224441149497,
|
22378 |
+
"grad_norm": 945.7670288085938,
|
22379 |
+
"learning_rate": 4.988204363329648e-05,
|
22380 |
+
"loss": 67.9447,
|
22381 |
+
"step": 31960
|
22382 |
+
},
|
22383 |
+
{
|
22384 |
+
"epoch": 0.1291628453803173,
|
22385 |
+
"grad_norm": 398.878173828125,
|
22386 |
+
"learning_rate": 4.988170470247465e-05,
|
22387 |
+
"loss": 81.1569,
|
22388 |
+
"step": 31970
|
22389 |
+
},
|
22390 |
+
{
|
22391 |
+
"epoch": 0.12920324664568494,
|
22392 |
+
"grad_norm": 329.99859619140625,
|
22393 |
+
"learning_rate": 4.988136528657118e-05,
|
22394 |
+
"loss": 59.9264,
|
22395 |
+
"step": 31980
|
22396 |
+
},
|
22397 |
+
{
|
22398 |
+
"epoch": 0.12924364791105258,
|
22399 |
+
"grad_norm": 875.9232177734375,
|
22400 |
+
"learning_rate": 4.988102538559268e-05,
|
22401 |
+
"loss": 118.8401,
|
22402 |
+
"step": 31990
|
22403 |
+
},
|
22404 |
+
{
|
22405 |
+
"epoch": 0.1292840491764202,
|
22406 |
+
"grad_norm": 1466.2398681640625,
|
22407 |
+
"learning_rate": 4.988068499954578e-05,
|
22408 |
+
"loss": 106.1032,
|
22409 |
+
"step": 32000
|
22410 |
}
|
22411 |
],
|
22412 |
"logging_steps": 10,
|