JacobLinCool
commited on
Commit
•
de40eeb
1
Parent(s):
c8c3f07
Training in progress, epoch 9, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 111475752
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba5554d5c0715b9e6b7677d2adf0db3ee133b93813eab49858c366530a9186bb
|
3 |
size 111475752
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 223212738
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08e06a402e5c3ded87b9c1e4249f92f8a11eed4177315e3126d76f94d7a07a58
|
3 |
size 223212738
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a9f98afb0211ede3e469085bcd7ffb48239431e6596713b1e711384e5b986da
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:709b689cdaa470cfc575e643ff39b4e6938bba001e0031274a8872524520efb6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 66.13745980707395,
|
3 |
"best_model_checkpoint": "./exp/whisper-large-v3-turbo-common_voice_16_1-zh-TW-pissa/checkpoint-3397",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -23896,6 +23896,2617 @@
|
|
23896 |
"eval_steps_per_second": 3.799,
|
23897 |
"eval_wer": 66.13745980707395,
|
23898 |
"step": 3397
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23899 |
}
|
23900 |
],
|
23901 |
"logging_steps": 1,
|
@@ -23910,12 +26521,12 @@
|
|
23910 |
"should_evaluate": false,
|
23911 |
"should_log": false,
|
23912 |
"should_save": true,
|
23913 |
-
"should_training_stop":
|
23914 |
},
|
23915 |
"attributes": {}
|
23916 |
}
|
23917 |
},
|
23918 |
-
"total_flos":
|
23919 |
"train_batch_size": 4,
|
23920 |
"trial_name": null,
|
23921 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 66.13745980707395,
|
3 |
"best_model_checkpoint": "./exp/whisper-large-v3-turbo-common_voice_16_1-zh-TW-pissa/checkpoint-3397",
|
4 |
+
"epoch": 9.986754966887418,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3770,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
23896 |
"eval_steps_per_second": 3.799,
|
23897 |
"eval_wer": 66.13745980707395,
|
23898 |
"step": 3397
|
23899 |
+
},
|
23900 |
+
{
|
23901 |
+
"epoch": 9.001324503311258,
|
23902 |
+
"grad_norm": 5.766251564025879,
|
23903 |
+
"learning_rate": 4.93368700265252e-05,
|
23904 |
+
"loss": 0.0689,
|
23905 |
+
"step": 3398
|
23906 |
+
},
|
23907 |
+
{
|
23908 |
+
"epoch": 9.003973509933775,
|
23909 |
+
"grad_norm": 1.6587156057357788,
|
23910 |
+
"learning_rate": 4.9204244031830244e-05,
|
23911 |
+
"loss": 0.0418,
|
23912 |
+
"step": 3399
|
23913 |
+
},
|
23914 |
+
{
|
23915 |
+
"epoch": 9.006622516556291,
|
23916 |
+
"grad_norm": 1.2178778648376465,
|
23917 |
+
"learning_rate": 4.907161803713528e-05,
|
23918 |
+
"loss": 0.0401,
|
23919 |
+
"step": 3400
|
23920 |
+
},
|
23921 |
+
{
|
23922 |
+
"epoch": 9.009271523178809,
|
23923 |
+
"grad_norm": 1.448103427886963,
|
23924 |
+
"learning_rate": 4.893899204244032e-05,
|
23925 |
+
"loss": 0.0372,
|
23926 |
+
"step": 3401
|
23927 |
+
},
|
23928 |
+
{
|
23929 |
+
"epoch": 9.011920529801325,
|
23930 |
+
"grad_norm": 8.003567695617676,
|
23931 |
+
"learning_rate": 4.880636604774536e-05,
|
23932 |
+
"loss": 0.1565,
|
23933 |
+
"step": 3402
|
23934 |
+
},
|
23935 |
+
{
|
23936 |
+
"epoch": 9.01456953642384,
|
23937 |
+
"grad_norm": 2.3756167888641357,
|
23938 |
+
"learning_rate": 4.8673740053050396e-05,
|
23939 |
+
"loss": 0.0492,
|
23940 |
+
"step": 3403
|
23941 |
+
},
|
23942 |
+
{
|
23943 |
+
"epoch": 9.017218543046358,
|
23944 |
+
"grad_norm": 2.2185049057006836,
|
23945 |
+
"learning_rate": 4.8541114058355434e-05,
|
23946 |
+
"loss": 0.0467,
|
23947 |
+
"step": 3404
|
23948 |
+
},
|
23949 |
+
{
|
23950 |
+
"epoch": 9.019867549668874,
|
23951 |
+
"grad_norm": 4.058760643005371,
|
23952 |
+
"learning_rate": 4.840848806366048e-05,
|
23953 |
+
"loss": 0.0783,
|
23954 |
+
"step": 3405
|
23955 |
+
},
|
23956 |
+
{
|
23957 |
+
"epoch": 9.022516556291391,
|
23958 |
+
"grad_norm": 6.136806011199951,
|
23959 |
+
"learning_rate": 4.8275862068965517e-05,
|
23960 |
+
"loss": 0.0763,
|
23961 |
+
"step": 3406
|
23962 |
+
},
|
23963 |
+
{
|
23964 |
+
"epoch": 9.025165562913907,
|
23965 |
+
"grad_norm": 6.241557598114014,
|
23966 |
+
"learning_rate": 4.8143236074270555e-05,
|
23967 |
+
"loss": 0.08,
|
23968 |
+
"step": 3407
|
23969 |
+
},
|
23970 |
+
{
|
23971 |
+
"epoch": 9.027814569536424,
|
23972 |
+
"grad_norm": 1.459482192993164,
|
23973 |
+
"learning_rate": 4.80106100795756e-05,
|
23974 |
+
"loss": 0.039,
|
23975 |
+
"step": 3408
|
23976 |
+
},
|
23977 |
+
{
|
23978 |
+
"epoch": 9.03046357615894,
|
23979 |
+
"grad_norm": 1.8090442419052124,
|
23980 |
+
"learning_rate": 4.787798408488064e-05,
|
23981 |
+
"loss": 0.0444,
|
23982 |
+
"step": 3409
|
23983 |
+
},
|
23984 |
+
{
|
23985 |
+
"epoch": 9.033112582781458,
|
23986 |
+
"grad_norm": 1.2890164852142334,
|
23987 |
+
"learning_rate": 4.7745358090185675e-05,
|
23988 |
+
"loss": 0.0374,
|
23989 |
+
"step": 3410
|
23990 |
+
},
|
23991 |
+
{
|
23992 |
+
"epoch": 9.035761589403974,
|
23993 |
+
"grad_norm": 3.0802345275878906,
|
23994 |
+
"learning_rate": 4.761273209549072e-05,
|
23995 |
+
"loss": 0.1104,
|
23996 |
+
"step": 3411
|
23997 |
+
},
|
23998 |
+
{
|
23999 |
+
"epoch": 9.03841059602649,
|
24000 |
+
"grad_norm": 1.5208879709243774,
|
24001 |
+
"learning_rate": 4.748010610079576e-05,
|
24002 |
+
"loss": 0.0327,
|
24003 |
+
"step": 3412
|
24004 |
+
},
|
24005 |
+
{
|
24006 |
+
"epoch": 9.041059602649007,
|
24007 |
+
"grad_norm": 3.8549258708953857,
|
24008 |
+
"learning_rate": 4.73474801061008e-05,
|
24009 |
+
"loss": 0.054,
|
24010 |
+
"step": 3413
|
24011 |
+
},
|
24012 |
+
{
|
24013 |
+
"epoch": 9.043708609271523,
|
24014 |
+
"grad_norm": 7.270555019378662,
|
24015 |
+
"learning_rate": 4.721485411140584e-05,
|
24016 |
+
"loss": 0.0849,
|
24017 |
+
"step": 3414
|
24018 |
+
},
|
24019 |
+
{
|
24020 |
+
"epoch": 9.04635761589404,
|
24021 |
+
"grad_norm": 0.8044090867042542,
|
24022 |
+
"learning_rate": 4.708222811671087e-05,
|
24023 |
+
"loss": 0.0271,
|
24024 |
+
"step": 3415
|
24025 |
+
},
|
24026 |
+
{
|
24027 |
+
"epoch": 9.049006622516556,
|
24028 |
+
"grad_norm": 0.7606043219566345,
|
24029 |
+
"learning_rate": 4.694960212201592e-05,
|
24030 |
+
"loss": 0.0252,
|
24031 |
+
"step": 3416
|
24032 |
+
},
|
24033 |
+
{
|
24034 |
+
"epoch": 9.051655629139074,
|
24035 |
+
"grad_norm": 1.188653588294983,
|
24036 |
+
"learning_rate": 4.6816976127320955e-05,
|
24037 |
+
"loss": 0.0322,
|
24038 |
+
"step": 3417
|
24039 |
+
},
|
24040 |
+
{
|
24041 |
+
"epoch": 9.05430463576159,
|
24042 |
+
"grad_norm": 0.7948738932609558,
|
24043 |
+
"learning_rate": 4.668435013262599e-05,
|
24044 |
+
"loss": 0.0284,
|
24045 |
+
"step": 3418
|
24046 |
+
},
|
24047 |
+
{
|
24048 |
+
"epoch": 9.056953642384107,
|
24049 |
+
"grad_norm": 0.723395049571991,
|
24050 |
+
"learning_rate": 4.655172413793104e-05,
|
24051 |
+
"loss": 0.0268,
|
24052 |
+
"step": 3419
|
24053 |
+
},
|
24054 |
+
{
|
24055 |
+
"epoch": 9.059602649006623,
|
24056 |
+
"grad_norm": 1.129440188407898,
|
24057 |
+
"learning_rate": 4.6419098143236075e-05,
|
24058 |
+
"loss": 0.031,
|
24059 |
+
"step": 3420
|
24060 |
+
},
|
24061 |
+
{
|
24062 |
+
"epoch": 9.062251655629138,
|
24063 |
+
"grad_norm": 1.1582543849945068,
|
24064 |
+
"learning_rate": 4.628647214854111e-05,
|
24065 |
+
"loss": 0.0327,
|
24066 |
+
"step": 3421
|
24067 |
+
},
|
24068 |
+
{
|
24069 |
+
"epoch": 9.064900662251656,
|
24070 |
+
"grad_norm": 1.3705034255981445,
|
24071 |
+
"learning_rate": 4.615384615384616e-05,
|
24072 |
+
"loss": 0.029,
|
24073 |
+
"step": 3422
|
24074 |
+
},
|
24075 |
+
{
|
24076 |
+
"epoch": 9.067549668874172,
|
24077 |
+
"grad_norm": 2.787898063659668,
|
24078 |
+
"learning_rate": 4.6021220159151196e-05,
|
24079 |
+
"loss": 0.0547,
|
24080 |
+
"step": 3423
|
24081 |
+
},
|
24082 |
+
{
|
24083 |
+
"epoch": 9.07019867549669,
|
24084 |
+
"grad_norm": 0.8864904046058655,
|
24085 |
+
"learning_rate": 4.5888594164456234e-05,
|
24086 |
+
"loss": 0.029,
|
24087 |
+
"step": 3424
|
24088 |
+
},
|
24089 |
+
{
|
24090 |
+
"epoch": 9.072847682119205,
|
24091 |
+
"grad_norm": 5.607481956481934,
|
24092 |
+
"learning_rate": 4.575596816976128e-05,
|
24093 |
+
"loss": 0.0438,
|
24094 |
+
"step": 3425
|
24095 |
+
},
|
24096 |
+
{
|
24097 |
+
"epoch": 9.075496688741723,
|
24098 |
+
"grad_norm": 1.700960397720337,
|
24099 |
+
"learning_rate": 4.562334217506632e-05,
|
24100 |
+
"loss": 0.031,
|
24101 |
+
"step": 3426
|
24102 |
+
},
|
24103 |
+
{
|
24104 |
+
"epoch": 9.078145695364238,
|
24105 |
+
"grad_norm": 3.932368040084839,
|
24106 |
+
"learning_rate": 4.549071618037135e-05,
|
24107 |
+
"loss": 0.0757,
|
24108 |
+
"step": 3427
|
24109 |
+
},
|
24110 |
+
{
|
24111 |
+
"epoch": 9.080794701986754,
|
24112 |
+
"grad_norm": 1.0943644046783447,
|
24113 |
+
"learning_rate": 4.535809018567639e-05,
|
24114 |
+
"loss": 0.0258,
|
24115 |
+
"step": 3428
|
24116 |
+
},
|
24117 |
+
{
|
24118 |
+
"epoch": 9.083443708609272,
|
24119 |
+
"grad_norm": 4.466647148132324,
|
24120 |
+
"learning_rate": 4.522546419098143e-05,
|
24121 |
+
"loss": 0.2949,
|
24122 |
+
"step": 3429
|
24123 |
+
},
|
24124 |
+
{
|
24125 |
+
"epoch": 9.086092715231787,
|
24126 |
+
"grad_norm": 4.996673107147217,
|
24127 |
+
"learning_rate": 4.509283819628647e-05,
|
24128 |
+
"loss": 0.0885,
|
24129 |
+
"step": 3430
|
24130 |
+
},
|
24131 |
+
{
|
24132 |
+
"epoch": 9.088741721854305,
|
24133 |
+
"grad_norm": 1.7828238010406494,
|
24134 |
+
"learning_rate": 4.4960212201591514e-05,
|
24135 |
+
"loss": 0.037,
|
24136 |
+
"step": 3431
|
24137 |
+
},
|
24138 |
+
{
|
24139 |
+
"epoch": 9.09139072847682,
|
24140 |
+
"grad_norm": 1.6101155281066895,
|
24141 |
+
"learning_rate": 4.482758620689655e-05,
|
24142 |
+
"loss": 0.034,
|
24143 |
+
"step": 3432
|
24144 |
+
},
|
24145 |
+
{
|
24146 |
+
"epoch": 9.094039735099338,
|
24147 |
+
"grad_norm": 0.8809574246406555,
|
24148 |
+
"learning_rate": 4.469496021220159e-05,
|
24149 |
+
"loss": 0.0228,
|
24150 |
+
"step": 3433
|
24151 |
+
},
|
24152 |
+
{
|
24153 |
+
"epoch": 9.096688741721854,
|
24154 |
+
"grad_norm": 1.3178520202636719,
|
24155 |
+
"learning_rate": 4.4562334217506634e-05,
|
24156 |
+
"loss": 0.0365,
|
24157 |
+
"step": 3434
|
24158 |
+
},
|
24159 |
+
{
|
24160 |
+
"epoch": 9.099337748344372,
|
24161 |
+
"grad_norm": 0.8021741509437561,
|
24162 |
+
"learning_rate": 4.442970822281167e-05,
|
24163 |
+
"loss": 0.0275,
|
24164 |
+
"step": 3435
|
24165 |
+
},
|
24166 |
+
{
|
24167 |
+
"epoch": 9.101986754966887,
|
24168 |
+
"grad_norm": 1.3528051376342773,
|
24169 |
+
"learning_rate": 4.429708222811671e-05,
|
24170 |
+
"loss": 0.0397,
|
24171 |
+
"step": 3436
|
24172 |
+
},
|
24173 |
+
{
|
24174 |
+
"epoch": 9.104635761589403,
|
24175 |
+
"grad_norm": 1.8863712549209595,
|
24176 |
+
"learning_rate": 4.4164456233421755e-05,
|
24177 |
+
"loss": 0.0407,
|
24178 |
+
"step": 3437
|
24179 |
+
},
|
24180 |
+
{
|
24181 |
+
"epoch": 9.10728476821192,
|
24182 |
+
"grad_norm": 1.1496036052703857,
|
24183 |
+
"learning_rate": 4.403183023872679e-05,
|
24184 |
+
"loss": 0.0356,
|
24185 |
+
"step": 3438
|
24186 |
+
},
|
24187 |
+
{
|
24188 |
+
"epoch": 9.109933774834436,
|
24189 |
+
"grad_norm": 1.4267545938491821,
|
24190 |
+
"learning_rate": 4.389920424403184e-05,
|
24191 |
+
"loss": 0.0343,
|
24192 |
+
"step": 3439
|
24193 |
+
},
|
24194 |
+
{
|
24195 |
+
"epoch": 9.112582781456954,
|
24196 |
+
"grad_norm": 1.2767385244369507,
|
24197 |
+
"learning_rate": 4.376657824933687e-05,
|
24198 |
+
"loss": 0.0341,
|
24199 |
+
"step": 3440
|
24200 |
+
},
|
24201 |
+
{
|
24202 |
+
"epoch": 9.11523178807947,
|
24203 |
+
"grad_norm": 6.66063117980957,
|
24204 |
+
"learning_rate": 4.363395225464191e-05,
|
24205 |
+
"loss": 0.068,
|
24206 |
+
"step": 3441
|
24207 |
+
},
|
24208 |
+
{
|
24209 |
+
"epoch": 9.117880794701987,
|
24210 |
+
"grad_norm": 0.7400714159011841,
|
24211 |
+
"learning_rate": 4.350132625994695e-05,
|
24212 |
+
"loss": 0.0248,
|
24213 |
+
"step": 3442
|
24214 |
+
},
|
24215 |
+
{
|
24216 |
+
"epoch": 9.120529801324503,
|
24217 |
+
"grad_norm": 2.861544132232666,
|
24218 |
+
"learning_rate": 4.336870026525199e-05,
|
24219 |
+
"loss": 0.0409,
|
24220 |
+
"step": 3443
|
24221 |
+
},
|
24222 |
+
{
|
24223 |
+
"epoch": 9.12317880794702,
|
24224 |
+
"grad_norm": 4.9321818351745605,
|
24225 |
+
"learning_rate": 4.323607427055703e-05,
|
24226 |
+
"loss": 0.0798,
|
24227 |
+
"step": 3444
|
24228 |
+
},
|
24229 |
+
{
|
24230 |
+
"epoch": 9.125827814569536,
|
24231 |
+
"grad_norm": 6.0288472175598145,
|
24232 |
+
"learning_rate": 4.310344827586207e-05,
|
24233 |
+
"loss": 0.1004,
|
24234 |
+
"step": 3445
|
24235 |
+
},
|
24236 |
+
{
|
24237 |
+
"epoch": 9.128476821192052,
|
24238 |
+
"grad_norm": 0.8959963321685791,
|
24239 |
+
"learning_rate": 4.297082228116711e-05,
|
24240 |
+
"loss": 0.0344,
|
24241 |
+
"step": 3446
|
24242 |
+
},
|
24243 |
+
{
|
24244 |
+
"epoch": 9.13112582781457,
|
24245 |
+
"grad_norm": 1.036258578300476,
|
24246 |
+
"learning_rate": 4.283819628647215e-05,
|
24247 |
+
"loss": 0.032,
|
24248 |
+
"step": 3447
|
24249 |
+
},
|
24250 |
+
{
|
24251 |
+
"epoch": 9.133774834437085,
|
24252 |
+
"grad_norm": 4.999405860900879,
|
24253 |
+
"learning_rate": 4.270557029177719e-05,
|
24254 |
+
"loss": 0.1644,
|
24255 |
+
"step": 3448
|
24256 |
+
},
|
24257 |
+
{
|
24258 |
+
"epoch": 9.136423841059603,
|
24259 |
+
"grad_norm": 0.8372552990913391,
|
24260 |
+
"learning_rate": 4.257294429708223e-05,
|
24261 |
+
"loss": 0.0286,
|
24262 |
+
"step": 3449
|
24263 |
+
},
|
24264 |
+
{
|
24265 |
+
"epoch": 9.139072847682119,
|
24266 |
+
"grad_norm": 2.185940742492676,
|
24267 |
+
"learning_rate": 4.244031830238727e-05,
|
24268 |
+
"loss": 0.0588,
|
24269 |
+
"step": 3450
|
24270 |
+
},
|
24271 |
+
{
|
24272 |
+
"epoch": 9.141721854304636,
|
24273 |
+
"grad_norm": 0.920491099357605,
|
24274 |
+
"learning_rate": 4.2307692307692314e-05,
|
24275 |
+
"loss": 0.0298,
|
24276 |
+
"step": 3451
|
24277 |
+
},
|
24278 |
+
{
|
24279 |
+
"epoch": 9.144370860927152,
|
24280 |
+
"grad_norm": 0.9999523162841797,
|
24281 |
+
"learning_rate": 4.2175066312997345e-05,
|
24282 |
+
"loss": 0.0332,
|
24283 |
+
"step": 3452
|
24284 |
+
},
|
24285 |
+
{
|
24286 |
+
"epoch": 9.14701986754967,
|
24287 |
+
"grad_norm": 3.0254335403442383,
|
24288 |
+
"learning_rate": 4.204244031830238e-05,
|
24289 |
+
"loss": 0.0526,
|
24290 |
+
"step": 3453
|
24291 |
+
},
|
24292 |
+
{
|
24293 |
+
"epoch": 9.149668874172185,
|
24294 |
+
"grad_norm": 0.9677553176879883,
|
24295 |
+
"learning_rate": 4.190981432360743e-05,
|
24296 |
+
"loss": 0.0318,
|
24297 |
+
"step": 3454
|
24298 |
+
},
|
24299 |
+
{
|
24300 |
+
"epoch": 9.152317880794701,
|
24301 |
+
"grad_norm": 2.253899335861206,
|
24302 |
+
"learning_rate": 4.1777188328912466e-05,
|
24303 |
+
"loss": 0.0633,
|
24304 |
+
"step": 3455
|
24305 |
+
},
|
24306 |
+
{
|
24307 |
+
"epoch": 9.154966887417219,
|
24308 |
+
"grad_norm": 4.3012566566467285,
|
24309 |
+
"learning_rate": 4.1644562334217504e-05,
|
24310 |
+
"loss": 0.0874,
|
24311 |
+
"step": 3456
|
24312 |
+
},
|
24313 |
+
{
|
24314 |
+
"epoch": 9.157615894039735,
|
24315 |
+
"grad_norm": 1.316737413406372,
|
24316 |
+
"learning_rate": 4.151193633952255e-05,
|
24317 |
+
"loss": 0.0281,
|
24318 |
+
"step": 3457
|
24319 |
+
},
|
24320 |
+
{
|
24321 |
+
"epoch": 9.160264900662252,
|
24322 |
+
"grad_norm": 1.0094130039215088,
|
24323 |
+
"learning_rate": 4.1379310344827587e-05,
|
24324 |
+
"loss": 0.0299,
|
24325 |
+
"step": 3458
|
24326 |
+
},
|
24327 |
+
{
|
24328 |
+
"epoch": 9.162913907284768,
|
24329 |
+
"grad_norm": 1.0587879419326782,
|
24330 |
+
"learning_rate": 4.1246684350132625e-05,
|
24331 |
+
"loss": 0.0231,
|
24332 |
+
"step": 3459
|
24333 |
+
},
|
24334 |
+
{
|
24335 |
+
"epoch": 9.165562913907285,
|
24336 |
+
"grad_norm": 0.7891072630882263,
|
24337 |
+
"learning_rate": 4.111405835543767e-05,
|
24338 |
+
"loss": 0.0187,
|
24339 |
+
"step": 3460
|
24340 |
+
},
|
24341 |
+
{
|
24342 |
+
"epoch": 9.168211920529801,
|
24343 |
+
"grad_norm": 2.456876277923584,
|
24344 |
+
"learning_rate": 4.098143236074271e-05,
|
24345 |
+
"loss": 0.0495,
|
24346 |
+
"step": 3461
|
24347 |
+
},
|
24348 |
+
{
|
24349 |
+
"epoch": 9.170860927152319,
|
24350 |
+
"grad_norm": 0.6091766357421875,
|
24351 |
+
"learning_rate": 4.0848806366047745e-05,
|
24352 |
+
"loss": 0.0249,
|
24353 |
+
"step": 3462
|
24354 |
+
},
|
24355 |
+
{
|
24356 |
+
"epoch": 9.173509933774834,
|
24357 |
+
"grad_norm": 0.9004129767417908,
|
24358 |
+
"learning_rate": 4.071618037135279e-05,
|
24359 |
+
"loss": 0.0253,
|
24360 |
+
"step": 3463
|
24361 |
+
},
|
24362 |
+
{
|
24363 |
+
"epoch": 9.17615894039735,
|
24364 |
+
"grad_norm": 2.2924208641052246,
|
24365 |
+
"learning_rate": 4.058355437665783e-05,
|
24366 |
+
"loss": 0.0446,
|
24367 |
+
"step": 3464
|
24368 |
+
},
|
24369 |
+
{
|
24370 |
+
"epoch": 9.178807947019868,
|
24371 |
+
"grad_norm": 4.593645095825195,
|
24372 |
+
"learning_rate": 4.0450928381962866e-05,
|
24373 |
+
"loss": 0.0562,
|
24374 |
+
"step": 3465
|
24375 |
+
},
|
24376 |
+
{
|
24377 |
+
"epoch": 9.181456953642384,
|
24378 |
+
"grad_norm": 2.3926522731781006,
|
24379 |
+
"learning_rate": 4.0318302387267904e-05,
|
24380 |
+
"loss": 0.0398,
|
24381 |
+
"step": 3466
|
24382 |
+
},
|
24383 |
+
{
|
24384 |
+
"epoch": 9.184105960264901,
|
24385 |
+
"grad_norm": 5.846258640289307,
|
24386 |
+
"learning_rate": 4.018567639257294e-05,
|
24387 |
+
"loss": 0.079,
|
24388 |
+
"step": 3467
|
24389 |
+
},
|
24390 |
+
{
|
24391 |
+
"epoch": 9.186754966887417,
|
24392 |
+
"grad_norm": 0.941461443901062,
|
24393 |
+
"learning_rate": 4.005305039787799e-05,
|
24394 |
+
"loss": 0.0262,
|
24395 |
+
"step": 3468
|
24396 |
+
},
|
24397 |
+
{
|
24398 |
+
"epoch": 9.189403973509934,
|
24399 |
+
"grad_norm": 2.4534494876861572,
|
24400 |
+
"learning_rate": 3.9920424403183025e-05,
|
24401 |
+
"loss": 0.07,
|
24402 |
+
"step": 3469
|
24403 |
+
},
|
24404 |
+
{
|
24405 |
+
"epoch": 9.19205298013245,
|
24406 |
+
"grad_norm": 1.650627851486206,
|
24407 |
+
"learning_rate": 3.978779840848806e-05,
|
24408 |
+
"loss": 0.0415,
|
24409 |
+
"step": 3470
|
24410 |
+
},
|
24411 |
+
{
|
24412 |
+
"epoch": 9.194701986754968,
|
24413 |
+
"grad_norm": 5.541948318481445,
|
24414 |
+
"learning_rate": 3.965517241379311e-05,
|
24415 |
+
"loss": 0.1524,
|
24416 |
+
"step": 3471
|
24417 |
+
},
|
24418 |
+
{
|
24419 |
+
"epoch": 9.197350993377484,
|
24420 |
+
"grad_norm": 0.9407292604446411,
|
24421 |
+
"learning_rate": 3.9522546419098145e-05,
|
24422 |
+
"loss": 0.0247,
|
24423 |
+
"step": 3472
|
24424 |
+
},
|
24425 |
+
{
|
24426 |
+
"epoch": 9.2,
|
24427 |
+
"grad_norm": 1.3381452560424805,
|
24428 |
+
"learning_rate": 3.9389920424403183e-05,
|
24429 |
+
"loss": 0.0237,
|
24430 |
+
"step": 3473
|
24431 |
+
},
|
24432 |
+
{
|
24433 |
+
"epoch": 9.202649006622517,
|
24434 |
+
"grad_norm": 1.1741251945495605,
|
24435 |
+
"learning_rate": 3.925729442970823e-05,
|
24436 |
+
"loss": 0.05,
|
24437 |
+
"step": 3474
|
24438 |
+
},
|
24439 |
+
{
|
24440 |
+
"epoch": 9.205298013245033,
|
24441 |
+
"grad_norm": 1.5772945880889893,
|
24442 |
+
"learning_rate": 3.9124668435013266e-05,
|
24443 |
+
"loss": 0.0384,
|
24444 |
+
"step": 3475
|
24445 |
+
},
|
24446 |
+
{
|
24447 |
+
"epoch": 9.20794701986755,
|
24448 |
+
"grad_norm": 1.4000322818756104,
|
24449 |
+
"learning_rate": 3.8992042440318304e-05,
|
24450 |
+
"loss": 0.0268,
|
24451 |
+
"step": 3476
|
24452 |
+
},
|
24453 |
+
{
|
24454 |
+
"epoch": 9.210596026490066,
|
24455 |
+
"grad_norm": 2.115095376968384,
|
24456 |
+
"learning_rate": 3.885941644562334e-05,
|
24457 |
+
"loss": 0.0326,
|
24458 |
+
"step": 3477
|
24459 |
+
},
|
24460 |
+
{
|
24461 |
+
"epoch": 9.213245033112583,
|
24462 |
+
"grad_norm": 1.862501621246338,
|
24463 |
+
"learning_rate": 3.872679045092838e-05,
|
24464 |
+
"loss": 0.0373,
|
24465 |
+
"step": 3478
|
24466 |
+
},
|
24467 |
+
{
|
24468 |
+
"epoch": 9.2158940397351,
|
24469 |
+
"grad_norm": 2.3054730892181396,
|
24470 |
+
"learning_rate": 3.859416445623342e-05,
|
24471 |
+
"loss": 0.0411,
|
24472 |
+
"step": 3479
|
24473 |
+
},
|
24474 |
+
{
|
24475 |
+
"epoch": 9.218543046357617,
|
24476 |
+
"grad_norm": 4.3630852699279785,
|
24477 |
+
"learning_rate": 3.846153846153846e-05,
|
24478 |
+
"loss": 0.0989,
|
24479 |
+
"step": 3480
|
24480 |
+
},
|
24481 |
+
{
|
24482 |
+
"epoch": 9.221192052980133,
|
24483 |
+
"grad_norm": 2.056546926498413,
|
24484 |
+
"learning_rate": 3.83289124668435e-05,
|
24485 |
+
"loss": 0.047,
|
24486 |
+
"step": 3481
|
24487 |
+
},
|
24488 |
+
{
|
24489 |
+
"epoch": 9.223841059602648,
|
24490 |
+
"grad_norm": 1.5721278190612793,
|
24491 |
+
"learning_rate": 3.819628647214854e-05,
|
24492 |
+
"loss": 0.0338,
|
24493 |
+
"step": 3482
|
24494 |
+
},
|
24495 |
+
{
|
24496 |
+
"epoch": 9.226490066225166,
|
24497 |
+
"grad_norm": 7.689934253692627,
|
24498 |
+
"learning_rate": 3.8063660477453584e-05,
|
24499 |
+
"loss": 0.0972,
|
24500 |
+
"step": 3483
|
24501 |
+
},
|
24502 |
+
{
|
24503 |
+
"epoch": 9.229139072847682,
|
24504 |
+
"grad_norm": 0.7729197144508362,
|
24505 |
+
"learning_rate": 3.793103448275862e-05,
|
24506 |
+
"loss": 0.0208,
|
24507 |
+
"step": 3484
|
24508 |
+
},
|
24509 |
+
{
|
24510 |
+
"epoch": 9.2317880794702,
|
24511 |
+
"grad_norm": 0.8965718746185303,
|
24512 |
+
"learning_rate": 3.779840848806366e-05,
|
24513 |
+
"loss": 0.0277,
|
24514 |
+
"step": 3485
|
24515 |
+
},
|
24516 |
+
{
|
24517 |
+
"epoch": 9.234437086092715,
|
24518 |
+
"grad_norm": 0.7785213589668274,
|
24519 |
+
"learning_rate": 3.7665782493368704e-05,
|
24520 |
+
"loss": 0.0256,
|
24521 |
+
"step": 3486
|
24522 |
+
},
|
24523 |
+
{
|
24524 |
+
"epoch": 9.237086092715233,
|
24525 |
+
"grad_norm": 1.1779735088348389,
|
24526 |
+
"learning_rate": 3.753315649867374e-05,
|
24527 |
+
"loss": 0.0316,
|
24528 |
+
"step": 3487
|
24529 |
+
},
|
24530 |
+
{
|
24531 |
+
"epoch": 9.239735099337748,
|
24532 |
+
"grad_norm": 3.612835645675659,
|
24533 |
+
"learning_rate": 3.740053050397878e-05,
|
24534 |
+
"loss": 0.0408,
|
24535 |
+
"step": 3488
|
24536 |
+
},
|
24537 |
+
{
|
24538 |
+
"epoch": 9.242384105960264,
|
24539 |
+
"grad_norm": 2.256551504135132,
|
24540 |
+
"learning_rate": 3.7267904509283825e-05,
|
24541 |
+
"loss": 0.0331,
|
24542 |
+
"step": 3489
|
24543 |
+
},
|
24544 |
+
{
|
24545 |
+
"epoch": 9.245033112582782,
|
24546 |
+
"grad_norm": 1.0893174409866333,
|
24547 |
+
"learning_rate": 3.7135278514588856e-05,
|
24548 |
+
"loss": 0.0282,
|
24549 |
+
"step": 3490
|
24550 |
+
},
|
24551 |
+
{
|
24552 |
+
"epoch": 9.247682119205297,
|
24553 |
+
"grad_norm": 0.5613586902618408,
|
24554 |
+
"learning_rate": 3.7002652519893894e-05,
|
24555 |
+
"loss": 0.0176,
|
24556 |
+
"step": 3491
|
24557 |
+
},
|
24558 |
+
{
|
24559 |
+
"epoch": 9.250331125827815,
|
24560 |
+
"grad_norm": 2.4138083457946777,
|
24561 |
+
"learning_rate": 3.687002652519894e-05,
|
24562 |
+
"loss": 0.0366,
|
24563 |
+
"step": 3492
|
24564 |
+
},
|
24565 |
+
{
|
24566 |
+
"epoch": 9.25298013245033,
|
24567 |
+
"grad_norm": 1.4984666109085083,
|
24568 |
+
"learning_rate": 3.673740053050398e-05,
|
24569 |
+
"loss": 0.0278,
|
24570 |
+
"step": 3493
|
24571 |
+
},
|
24572 |
+
{
|
24573 |
+
"epoch": 9.255629139072848,
|
24574 |
+
"grad_norm": 1.4163844585418701,
|
24575 |
+
"learning_rate": 3.660477453580902e-05,
|
24576 |
+
"loss": 0.0367,
|
24577 |
+
"step": 3494
|
24578 |
+
},
|
24579 |
+
{
|
24580 |
+
"epoch": 9.258278145695364,
|
24581 |
+
"grad_norm": 1.6141103506088257,
|
24582 |
+
"learning_rate": 3.647214854111406e-05,
|
24583 |
+
"loss": 0.0353,
|
24584 |
+
"step": 3495
|
24585 |
+
},
|
24586 |
+
{
|
24587 |
+
"epoch": 9.260927152317882,
|
24588 |
+
"grad_norm": 0.8612403273582458,
|
24589 |
+
"learning_rate": 3.63395225464191e-05,
|
24590 |
+
"loss": 0.0299,
|
24591 |
+
"step": 3496
|
24592 |
+
},
|
24593 |
+
{
|
24594 |
+
"epoch": 9.263576158940397,
|
24595 |
+
"grad_norm": 13.050005912780762,
|
24596 |
+
"learning_rate": 3.620689655172414e-05,
|
24597 |
+
"loss": 0.2024,
|
24598 |
+
"step": 3497
|
24599 |
+
},
|
24600 |
+
{
|
24601 |
+
"epoch": 9.266225165562913,
|
24602 |
+
"grad_norm": 2.03233003616333,
|
24603 |
+
"learning_rate": 3.607427055702918e-05,
|
24604 |
+
"loss": 0.054,
|
24605 |
+
"step": 3498
|
24606 |
+
},
|
24607 |
+
{
|
24608 |
+
"epoch": 9.26887417218543,
|
24609 |
+
"grad_norm": 1.06806218624115,
|
24610 |
+
"learning_rate": 3.594164456233422e-05,
|
24611 |
+
"loss": 0.0244,
|
24612 |
+
"step": 3499
|
24613 |
+
},
|
24614 |
+
{
|
24615 |
+
"epoch": 9.271523178807946,
|
24616 |
+
"grad_norm": 2.528519630432129,
|
24617 |
+
"learning_rate": 3.580901856763926e-05,
|
24618 |
+
"loss": 0.0431,
|
24619 |
+
"step": 3500
|
24620 |
+
},
|
24621 |
+
{
|
24622 |
+
"epoch": 9.274172185430464,
|
24623 |
+
"grad_norm": 13.738266944885254,
|
24624 |
+
"learning_rate": 3.56763925729443e-05,
|
24625 |
+
"loss": 0.1754,
|
24626 |
+
"step": 3501
|
24627 |
+
},
|
24628 |
+
{
|
24629 |
+
"epoch": 9.27682119205298,
|
24630 |
+
"grad_norm": 3.9392900466918945,
|
24631 |
+
"learning_rate": 3.554376657824933e-05,
|
24632 |
+
"loss": 0.0481,
|
24633 |
+
"step": 3502
|
24634 |
+
},
|
24635 |
+
{
|
24636 |
+
"epoch": 9.279470198675497,
|
24637 |
+
"grad_norm": 1.548011302947998,
|
24638 |
+
"learning_rate": 3.541114058355438e-05,
|
24639 |
+
"loss": 0.0391,
|
24640 |
+
"step": 3503
|
24641 |
+
},
|
24642 |
+
{
|
24643 |
+
"epoch": 9.282119205298013,
|
24644 |
+
"grad_norm": 4.6536173820495605,
|
24645 |
+
"learning_rate": 3.5278514588859415e-05,
|
24646 |
+
"loss": 0.06,
|
24647 |
+
"step": 3504
|
24648 |
+
},
|
24649 |
+
{
|
24650 |
+
"epoch": 9.28476821192053,
|
24651 |
+
"grad_norm": 0.6610225439071655,
|
24652 |
+
"learning_rate": 3.514588859416445e-05,
|
24653 |
+
"loss": 0.0223,
|
24654 |
+
"step": 3505
|
24655 |
+
},
|
24656 |
+
{
|
24657 |
+
"epoch": 9.287417218543046,
|
24658 |
+
"grad_norm": 0.871101975440979,
|
24659 |
+
"learning_rate": 3.50132625994695e-05,
|
24660 |
+
"loss": 0.0283,
|
24661 |
+
"step": 3506
|
24662 |
+
},
|
24663 |
+
{
|
24664 |
+
"epoch": 9.290066225165562,
|
24665 |
+
"grad_norm": 2.9219608306884766,
|
24666 |
+
"learning_rate": 3.4880636604774536e-05,
|
24667 |
+
"loss": 0.0675,
|
24668 |
+
"step": 3507
|
24669 |
+
},
|
24670 |
+
{
|
24671 |
+
"epoch": 9.29271523178808,
|
24672 |
+
"grad_norm": 1.4002561569213867,
|
24673 |
+
"learning_rate": 3.4748010610079574e-05,
|
24674 |
+
"loss": 0.0356,
|
24675 |
+
"step": 3508
|
24676 |
+
},
|
24677 |
+
{
|
24678 |
+
"epoch": 9.295364238410595,
|
24679 |
+
"grad_norm": 0.8712365627288818,
|
24680 |
+
"learning_rate": 3.461538461538462e-05,
|
24681 |
+
"loss": 0.0263,
|
24682 |
+
"step": 3509
|
24683 |
+
},
|
24684 |
+
{
|
24685 |
+
"epoch": 9.298013245033113,
|
24686 |
+
"grad_norm": 1.1267167329788208,
|
24687 |
+
"learning_rate": 3.4482758620689657e-05,
|
24688 |
+
"loss": 0.0352,
|
24689 |
+
"step": 3510
|
24690 |
+
},
|
24691 |
+
{
|
24692 |
+
"epoch": 9.300662251655629,
|
24693 |
+
"grad_norm": 9.341315269470215,
|
24694 |
+
"learning_rate": 3.4350132625994695e-05,
|
24695 |
+
"loss": 0.1983,
|
24696 |
+
"step": 3511
|
24697 |
+
},
|
24698 |
+
{
|
24699 |
+
"epoch": 9.303311258278146,
|
24700 |
+
"grad_norm": 1.0400142669677734,
|
24701 |
+
"learning_rate": 3.421750663129974e-05,
|
24702 |
+
"loss": 0.0278,
|
24703 |
+
"step": 3512
|
24704 |
+
},
|
24705 |
+
{
|
24706 |
+
"epoch": 9.305960264900662,
|
24707 |
+
"grad_norm": 1.68052077293396,
|
24708 |
+
"learning_rate": 3.408488063660478e-05,
|
24709 |
+
"loss": 0.032,
|
24710 |
+
"step": 3513
|
24711 |
+
},
|
24712 |
+
{
|
24713 |
+
"epoch": 9.30860927152318,
|
24714 |
+
"grad_norm": 2.5643889904022217,
|
24715 |
+
"learning_rate": 3.395225464190981e-05,
|
24716 |
+
"loss": 0.0341,
|
24717 |
+
"step": 3514
|
24718 |
+
},
|
24719 |
+
{
|
24720 |
+
"epoch": 9.311258278145695,
|
24721 |
+
"grad_norm": 1.000295639038086,
|
24722 |
+
"learning_rate": 3.381962864721485e-05,
|
24723 |
+
"loss": 0.0244,
|
24724 |
+
"step": 3515
|
24725 |
+
},
|
24726 |
+
{
|
24727 |
+
"epoch": 9.313907284768211,
|
24728 |
+
"grad_norm": 4.480589866638184,
|
24729 |
+
"learning_rate": 3.368700265251989e-05,
|
24730 |
+
"loss": 0.0353,
|
24731 |
+
"step": 3516
|
24732 |
+
},
|
24733 |
+
{
|
24734 |
+
"epoch": 9.316556291390729,
|
24735 |
+
"grad_norm": 12.758308410644531,
|
24736 |
+
"learning_rate": 3.355437665782493e-05,
|
24737 |
+
"loss": 0.0626,
|
24738 |
+
"step": 3517
|
24739 |
+
},
|
24740 |
+
{
|
24741 |
+
"epoch": 9.319205298013244,
|
24742 |
+
"grad_norm": 5.002047538757324,
|
24743 |
+
"learning_rate": 3.3421750663129974e-05,
|
24744 |
+
"loss": 0.0403,
|
24745 |
+
"step": 3518
|
24746 |
+
},
|
24747 |
+
{
|
24748 |
+
"epoch": 9.321854304635762,
|
24749 |
+
"grad_norm": 0.9235745668411255,
|
24750 |
+
"learning_rate": 3.328912466843501e-05,
|
24751 |
+
"loss": 0.0283,
|
24752 |
+
"step": 3519
|
24753 |
+
},
|
24754 |
+
{
|
24755 |
+
"epoch": 9.324503311258278,
|
24756 |
+
"grad_norm": 3.925029754638672,
|
24757 |
+
"learning_rate": 3.315649867374006e-05,
|
24758 |
+
"loss": 0.0654,
|
24759 |
+
"step": 3520
|
24760 |
+
},
|
24761 |
+
{
|
24762 |
+
"epoch": 9.327152317880795,
|
24763 |
+
"grad_norm": 2.145378351211548,
|
24764 |
+
"learning_rate": 3.3023872679045095e-05,
|
24765 |
+
"loss": 0.0395,
|
24766 |
+
"step": 3521
|
24767 |
+
},
|
24768 |
+
{
|
24769 |
+
"epoch": 9.329801324503311,
|
24770 |
+
"grad_norm": 0.693535566329956,
|
24771 |
+
"learning_rate": 3.289124668435013e-05,
|
24772 |
+
"loss": 0.019,
|
24773 |
+
"step": 3522
|
24774 |
+
},
|
24775 |
+
{
|
24776 |
+
"epoch": 9.332450331125829,
|
24777 |
+
"grad_norm": 3.519618034362793,
|
24778 |
+
"learning_rate": 3.275862068965518e-05,
|
24779 |
+
"loss": 0.0337,
|
24780 |
+
"step": 3523
|
24781 |
+
},
|
24782 |
+
{
|
24783 |
+
"epoch": 9.335099337748344,
|
24784 |
+
"grad_norm": 1.0909799337387085,
|
24785 |
+
"learning_rate": 3.2625994694960215e-05,
|
24786 |
+
"loss": 0.0288,
|
24787 |
+
"step": 3524
|
24788 |
+
},
|
24789 |
+
{
|
24790 |
+
"epoch": 9.33774834437086,
|
24791 |
+
"grad_norm": 1.9988322257995605,
|
24792 |
+
"learning_rate": 3.2493368700265253e-05,
|
24793 |
+
"loss": 0.034,
|
24794 |
+
"step": 3525
|
24795 |
+
},
|
24796 |
+
{
|
24797 |
+
"epoch": 9.340397350993378,
|
24798 |
+
"grad_norm": 0.8173007965087891,
|
24799 |
+
"learning_rate": 3.23607427055703e-05,
|
24800 |
+
"loss": 0.0268,
|
24801 |
+
"step": 3526
|
24802 |
+
},
|
24803 |
+
{
|
24804 |
+
"epoch": 9.343046357615894,
|
24805 |
+
"grad_norm": 0.7954380512237549,
|
24806 |
+
"learning_rate": 3.222811671087533e-05,
|
24807 |
+
"loss": 0.0221,
|
24808 |
+
"step": 3527
|
24809 |
+
},
|
24810 |
+
{
|
24811 |
+
"epoch": 9.345695364238411,
|
24812 |
+
"grad_norm": 0.9416889548301697,
|
24813 |
+
"learning_rate": 3.209549071618037e-05,
|
24814 |
+
"loss": 0.0287,
|
24815 |
+
"step": 3528
|
24816 |
+
},
|
24817 |
+
{
|
24818 |
+
"epoch": 9.348344370860927,
|
24819 |
+
"grad_norm": 5.81309175491333,
|
24820 |
+
"learning_rate": 3.196286472148541e-05,
|
24821 |
+
"loss": 0.0304,
|
24822 |
+
"step": 3529
|
24823 |
+
},
|
24824 |
+
{
|
24825 |
+
"epoch": 9.350993377483444,
|
24826 |
+
"grad_norm": 1.454561710357666,
|
24827 |
+
"learning_rate": 3.183023872679045e-05,
|
24828 |
+
"loss": 0.0329,
|
24829 |
+
"step": 3530
|
24830 |
+
},
|
24831 |
+
{
|
24832 |
+
"epoch": 9.35364238410596,
|
24833 |
+
"grad_norm": 1.44857919216156,
|
24834 |
+
"learning_rate": 3.169761273209549e-05,
|
24835 |
+
"loss": 0.0371,
|
24836 |
+
"step": 3531
|
24837 |
+
},
|
24838 |
+
{
|
24839 |
+
"epoch": 9.356291390728476,
|
24840 |
+
"grad_norm": 1.0946413278579712,
|
24841 |
+
"learning_rate": 3.156498673740053e-05,
|
24842 |
+
"loss": 0.0208,
|
24843 |
+
"step": 3532
|
24844 |
+
},
|
24845 |
+
{
|
24846 |
+
"epoch": 9.358940397350993,
|
24847 |
+
"grad_norm": 4.3788018226623535,
|
24848 |
+
"learning_rate": 3.143236074270557e-05,
|
24849 |
+
"loss": 0.0427,
|
24850 |
+
"step": 3533
|
24851 |
+
},
|
24852 |
+
{
|
24853 |
+
"epoch": 9.36158940397351,
|
24854 |
+
"grad_norm": 8.111957550048828,
|
24855 |
+
"learning_rate": 3.129973474801061e-05,
|
24856 |
+
"loss": 0.1752,
|
24857 |
+
"step": 3534
|
24858 |
+
},
|
24859 |
+
{
|
24860 |
+
"epoch": 9.364238410596027,
|
24861 |
+
"grad_norm": 0.8156153559684753,
|
24862 |
+
"learning_rate": 3.116710875331565e-05,
|
24863 |
+
"loss": 0.0289,
|
24864 |
+
"step": 3535
|
24865 |
+
},
|
24866 |
+
{
|
24867 |
+
"epoch": 9.366887417218543,
|
24868 |
+
"grad_norm": 1.32380211353302,
|
24869 |
+
"learning_rate": 3.103448275862069e-05,
|
24870 |
+
"loss": 0.0243,
|
24871 |
+
"step": 3536
|
24872 |
+
},
|
24873 |
+
{
|
24874 |
+
"epoch": 9.36953642384106,
|
24875 |
+
"grad_norm": 1.6183733940124512,
|
24876 |
+
"learning_rate": 3.090185676392573e-05,
|
24877 |
+
"loss": 0.028,
|
24878 |
+
"step": 3537
|
24879 |
+
},
|
24880 |
+
{
|
24881 |
+
"epoch": 9.372185430463576,
|
24882 |
+
"grad_norm": 0.8132844567298889,
|
24883 |
+
"learning_rate": 3.0769230769230774e-05,
|
24884 |
+
"loss": 0.0248,
|
24885 |
+
"step": 3538
|
24886 |
+
},
|
24887 |
+
{
|
24888 |
+
"epoch": 9.374834437086093,
|
24889 |
+
"grad_norm": 0.905636727809906,
|
24890 |
+
"learning_rate": 3.063660477453581e-05,
|
24891 |
+
"loss": 0.0304,
|
24892 |
+
"step": 3539
|
24893 |
+
},
|
24894 |
+
{
|
24895 |
+
"epoch": 9.37748344370861,
|
24896 |
+
"grad_norm": 0.8593434691429138,
|
24897 |
+
"learning_rate": 3.050397877984085e-05,
|
24898 |
+
"loss": 0.0252,
|
24899 |
+
"step": 3540
|
24900 |
+
},
|
24901 |
+
{
|
24902 |
+
"epoch": 9.380132450331125,
|
24903 |
+
"grad_norm": 0.6203551292419434,
|
24904 |
+
"learning_rate": 3.0371352785145892e-05,
|
24905 |
+
"loss": 0.0214,
|
24906 |
+
"step": 3541
|
24907 |
+
},
|
24908 |
+
{
|
24909 |
+
"epoch": 9.382781456953643,
|
24910 |
+
"grad_norm": 0.9663060307502747,
|
24911 |
+
"learning_rate": 3.023872679045093e-05,
|
24912 |
+
"loss": 0.0296,
|
24913 |
+
"step": 3542
|
24914 |
+
},
|
24915 |
+
{
|
24916 |
+
"epoch": 9.385430463576158,
|
24917 |
+
"grad_norm": 4.262774467468262,
|
24918 |
+
"learning_rate": 3.0106100795755968e-05,
|
24919 |
+
"loss": 0.1042,
|
24920 |
+
"step": 3543
|
24921 |
+
},
|
24922 |
+
{
|
24923 |
+
"epoch": 9.388079470198676,
|
24924 |
+
"grad_norm": 3.479534149169922,
|
24925 |
+
"learning_rate": 2.997347480106101e-05,
|
24926 |
+
"loss": 0.0619,
|
24927 |
+
"step": 3544
|
24928 |
+
},
|
24929 |
+
{
|
24930 |
+
"epoch": 9.390728476821192,
|
24931 |
+
"grad_norm": 0.8958720564842224,
|
24932 |
+
"learning_rate": 2.9840848806366047e-05,
|
24933 |
+
"loss": 0.028,
|
24934 |
+
"step": 3545
|
24935 |
+
},
|
24936 |
+
{
|
24937 |
+
"epoch": 9.39337748344371,
|
24938 |
+
"grad_norm": 1.322778344154358,
|
24939 |
+
"learning_rate": 2.970822281167109e-05,
|
24940 |
+
"loss": 0.0295,
|
24941 |
+
"step": 3546
|
24942 |
+
},
|
24943 |
+
{
|
24944 |
+
"epoch": 9.396026490066225,
|
24945 |
+
"grad_norm": 0.8238294720649719,
|
24946 |
+
"learning_rate": 2.957559681697613e-05,
|
24947 |
+
"loss": 0.0241,
|
24948 |
+
"step": 3547
|
24949 |
+
},
|
24950 |
+
{
|
24951 |
+
"epoch": 9.398675496688742,
|
24952 |
+
"grad_norm": 18.22843360900879,
|
24953 |
+
"learning_rate": 2.9442970822281168e-05,
|
24954 |
+
"loss": 0.3251,
|
24955 |
+
"step": 3548
|
24956 |
+
},
|
24957 |
+
{
|
24958 |
+
"epoch": 9.401324503311258,
|
24959 |
+
"grad_norm": 1.4560190439224243,
|
24960 |
+
"learning_rate": 2.9310344827586206e-05,
|
24961 |
+
"loss": 0.0257,
|
24962 |
+
"step": 3549
|
24963 |
+
},
|
24964 |
+
{
|
24965 |
+
"epoch": 9.403973509933774,
|
24966 |
+
"grad_norm": 0.6159964799880981,
|
24967 |
+
"learning_rate": 2.9177718832891247e-05,
|
24968 |
+
"loss": 0.0204,
|
24969 |
+
"step": 3550
|
24970 |
+
},
|
24971 |
+
{
|
24972 |
+
"epoch": 9.406622516556292,
|
24973 |
+
"grad_norm": 2.4865353107452393,
|
24974 |
+
"learning_rate": 2.904509283819629e-05,
|
24975 |
+
"loss": 0.0572,
|
24976 |
+
"step": 3551
|
24977 |
+
},
|
24978 |
+
{
|
24979 |
+
"epoch": 9.409271523178807,
|
24980 |
+
"grad_norm": 0.6925386786460876,
|
24981 |
+
"learning_rate": 2.8912466843501326e-05,
|
24982 |
+
"loss": 0.0206,
|
24983 |
+
"step": 3552
|
24984 |
+
},
|
24985 |
+
{
|
24986 |
+
"epoch": 9.411920529801325,
|
24987 |
+
"grad_norm": 0.6940968632698059,
|
24988 |
+
"learning_rate": 2.8779840848806368e-05,
|
24989 |
+
"loss": 0.0194,
|
24990 |
+
"step": 3553
|
24991 |
+
},
|
24992 |
+
{
|
24993 |
+
"epoch": 9.41456953642384,
|
24994 |
+
"grad_norm": 1.0806211233139038,
|
24995 |
+
"learning_rate": 2.864721485411141e-05,
|
24996 |
+
"loss": 0.0194,
|
24997 |
+
"step": 3554
|
24998 |
+
},
|
24999 |
+
{
|
25000 |
+
"epoch": 9.417218543046358,
|
25001 |
+
"grad_norm": 4.622027397155762,
|
25002 |
+
"learning_rate": 2.8514588859416444e-05,
|
25003 |
+
"loss": 0.0577,
|
25004 |
+
"step": 3555
|
25005 |
+
},
|
25006 |
+
{
|
25007 |
+
"epoch": 9.419867549668874,
|
25008 |
+
"grad_norm": 1.0674515962600708,
|
25009 |
+
"learning_rate": 2.8381962864721485e-05,
|
25010 |
+
"loss": 0.0265,
|
25011 |
+
"step": 3556
|
25012 |
+
},
|
25013 |
+
{
|
25014 |
+
"epoch": 9.422516556291392,
|
25015 |
+
"grad_norm": 0.7756242752075195,
|
25016 |
+
"learning_rate": 2.8249336870026527e-05,
|
25017 |
+
"loss": 0.0238,
|
25018 |
+
"step": 3557
|
25019 |
+
},
|
25020 |
+
{
|
25021 |
+
"epoch": 9.425165562913907,
|
25022 |
+
"grad_norm": 2.069844961166382,
|
25023 |
+
"learning_rate": 2.8116710875331565e-05,
|
25024 |
+
"loss": 0.0417,
|
25025 |
+
"step": 3558
|
25026 |
+
},
|
25027 |
+
{
|
25028 |
+
"epoch": 9.427814569536423,
|
25029 |
+
"grad_norm": 0.6700056791305542,
|
25030 |
+
"learning_rate": 2.7984084880636606e-05,
|
25031 |
+
"loss": 0.0233,
|
25032 |
+
"step": 3559
|
25033 |
+
},
|
25034 |
+
{
|
25035 |
+
"epoch": 9.43046357615894,
|
25036 |
+
"grad_norm": 0.9146801233291626,
|
25037 |
+
"learning_rate": 2.7851458885941647e-05,
|
25038 |
+
"loss": 0.0223,
|
25039 |
+
"step": 3560
|
25040 |
+
},
|
25041 |
+
{
|
25042 |
+
"epoch": 9.433112582781456,
|
25043 |
+
"grad_norm": 1.4189099073410034,
|
25044 |
+
"learning_rate": 2.7718832891246682e-05,
|
25045 |
+
"loss": 0.0311,
|
25046 |
+
"step": 3561
|
25047 |
+
},
|
25048 |
+
{
|
25049 |
+
"epoch": 9.435761589403974,
|
25050 |
+
"grad_norm": 1.0737541913986206,
|
25051 |
+
"learning_rate": 2.7586206896551723e-05,
|
25052 |
+
"loss": 0.0261,
|
25053 |
+
"step": 3562
|
25054 |
+
},
|
25055 |
+
{
|
25056 |
+
"epoch": 9.43841059602649,
|
25057 |
+
"grad_norm": 1.6826467514038086,
|
25058 |
+
"learning_rate": 2.7453580901856765e-05,
|
25059 |
+
"loss": 0.0366,
|
25060 |
+
"step": 3563
|
25061 |
+
},
|
25062 |
+
{
|
25063 |
+
"epoch": 9.441059602649007,
|
25064 |
+
"grad_norm": 1.8295217752456665,
|
25065 |
+
"learning_rate": 2.7320954907161803e-05,
|
25066 |
+
"loss": 0.0343,
|
25067 |
+
"step": 3564
|
25068 |
+
},
|
25069 |
+
{
|
25070 |
+
"epoch": 9.443708609271523,
|
25071 |
+
"grad_norm": 2.9101524353027344,
|
25072 |
+
"learning_rate": 2.7188328912466844e-05,
|
25073 |
+
"loss": 0.0366,
|
25074 |
+
"step": 3565
|
25075 |
+
},
|
25076 |
+
{
|
25077 |
+
"epoch": 9.44635761589404,
|
25078 |
+
"grad_norm": 1.3622254133224487,
|
25079 |
+
"learning_rate": 2.7055702917771885e-05,
|
25080 |
+
"loss": 0.034,
|
25081 |
+
"step": 3566
|
25082 |
+
},
|
25083 |
+
{
|
25084 |
+
"epoch": 9.449006622516556,
|
25085 |
+
"grad_norm": 1.019713044166565,
|
25086 |
+
"learning_rate": 2.6923076923076927e-05,
|
25087 |
+
"loss": 0.0282,
|
25088 |
+
"step": 3567
|
25089 |
+
},
|
25090 |
+
{
|
25091 |
+
"epoch": 9.451655629139072,
|
25092 |
+
"grad_norm": 5.5701704025268555,
|
25093 |
+
"learning_rate": 2.679045092838196e-05,
|
25094 |
+
"loss": 0.07,
|
25095 |
+
"step": 3568
|
25096 |
+
},
|
25097 |
+
{
|
25098 |
+
"epoch": 9.45430463576159,
|
25099 |
+
"grad_norm": 1.8294882774353027,
|
25100 |
+
"learning_rate": 2.6657824933687003e-05,
|
25101 |
+
"loss": 0.0407,
|
25102 |
+
"step": 3569
|
25103 |
+
},
|
25104 |
+
{
|
25105 |
+
"epoch": 9.456953642384105,
|
25106 |
+
"grad_norm": 3.7098734378814697,
|
25107 |
+
"learning_rate": 2.6525198938992044e-05,
|
25108 |
+
"loss": 0.059,
|
25109 |
+
"step": 3570
|
25110 |
+
},
|
25111 |
+
{
|
25112 |
+
"epoch": 9.459602649006623,
|
25113 |
+
"grad_norm": 1.1825202703475952,
|
25114 |
+
"learning_rate": 2.6392572944297082e-05,
|
25115 |
+
"loss": 0.0288,
|
25116 |
+
"step": 3571
|
25117 |
+
},
|
25118 |
+
{
|
25119 |
+
"epoch": 9.462251655629139,
|
25120 |
+
"grad_norm": 4.8834052085876465,
|
25121 |
+
"learning_rate": 2.6259946949602123e-05,
|
25122 |
+
"loss": 0.084,
|
25123 |
+
"step": 3572
|
25124 |
+
},
|
25125 |
+
{
|
25126 |
+
"epoch": 9.464900662251656,
|
25127 |
+
"grad_norm": 0.9815185070037842,
|
25128 |
+
"learning_rate": 2.6127320954907165e-05,
|
25129 |
+
"loss": 0.0268,
|
25130 |
+
"step": 3573
|
25131 |
+
},
|
25132 |
+
{
|
25133 |
+
"epoch": 9.467549668874172,
|
25134 |
+
"grad_norm": 0.6526684761047363,
|
25135 |
+
"learning_rate": 2.59946949602122e-05,
|
25136 |
+
"loss": 0.0243,
|
25137 |
+
"step": 3574
|
25138 |
+
},
|
25139 |
+
{
|
25140 |
+
"epoch": 9.47019867549669,
|
25141 |
+
"grad_norm": 5.323193550109863,
|
25142 |
+
"learning_rate": 2.586206896551724e-05,
|
25143 |
+
"loss": 0.0836,
|
25144 |
+
"step": 3575
|
25145 |
+
},
|
25146 |
+
{
|
25147 |
+
"epoch": 9.472847682119205,
|
25148 |
+
"grad_norm": 1.0784683227539062,
|
25149 |
+
"learning_rate": 2.5729442970822282e-05,
|
25150 |
+
"loss": 0.0304,
|
25151 |
+
"step": 3576
|
25152 |
+
},
|
25153 |
+
{
|
25154 |
+
"epoch": 9.475496688741721,
|
25155 |
+
"grad_norm": 2.6157948970794678,
|
25156 |
+
"learning_rate": 2.559681697612732e-05,
|
25157 |
+
"loss": 0.0531,
|
25158 |
+
"step": 3577
|
25159 |
+
},
|
25160 |
+
{
|
25161 |
+
"epoch": 9.478145695364239,
|
25162 |
+
"grad_norm": 0.8626123070716858,
|
25163 |
+
"learning_rate": 2.546419098143236e-05,
|
25164 |
+
"loss": 0.0295,
|
25165 |
+
"step": 3578
|
25166 |
+
},
|
25167 |
+
{
|
25168 |
+
"epoch": 9.480794701986754,
|
25169 |
+
"grad_norm": 1.0967538356781006,
|
25170 |
+
"learning_rate": 2.5331564986737403e-05,
|
25171 |
+
"loss": 0.0323,
|
25172 |
+
"step": 3579
|
25173 |
+
},
|
25174 |
+
{
|
25175 |
+
"epoch": 9.483443708609272,
|
25176 |
+
"grad_norm": 1.0259839296340942,
|
25177 |
+
"learning_rate": 2.519893899204244e-05,
|
25178 |
+
"loss": 0.03,
|
25179 |
+
"step": 3580
|
25180 |
+
},
|
25181 |
+
{
|
25182 |
+
"epoch": 9.486092715231788,
|
25183 |
+
"grad_norm": 8.880619049072266,
|
25184 |
+
"learning_rate": 2.506631299734748e-05,
|
25185 |
+
"loss": 0.0342,
|
25186 |
+
"step": 3581
|
25187 |
+
},
|
25188 |
+
{
|
25189 |
+
"epoch": 9.488741721854305,
|
25190 |
+
"grad_norm": 0.5894123315811157,
|
25191 |
+
"learning_rate": 2.493368700265252e-05,
|
25192 |
+
"loss": 0.0219,
|
25193 |
+
"step": 3582
|
25194 |
+
},
|
25195 |
+
{
|
25196 |
+
"epoch": 9.491390728476821,
|
25197 |
+
"grad_norm": 3.911766767501831,
|
25198 |
+
"learning_rate": 2.480106100795756e-05,
|
25199 |
+
"loss": 0.0517,
|
25200 |
+
"step": 3583
|
25201 |
+
},
|
25202 |
+
{
|
25203 |
+
"epoch": 9.494039735099339,
|
25204 |
+
"grad_norm": 0.6711585521697998,
|
25205 |
+
"learning_rate": 2.46684350132626e-05,
|
25206 |
+
"loss": 0.0201,
|
25207 |
+
"step": 3584
|
25208 |
+
},
|
25209 |
+
{
|
25210 |
+
"epoch": 9.496688741721854,
|
25211 |
+
"grad_norm": 0.7170477509498596,
|
25212 |
+
"learning_rate": 2.453580901856764e-05,
|
25213 |
+
"loss": 0.0222,
|
25214 |
+
"step": 3585
|
25215 |
+
},
|
25216 |
+
{
|
25217 |
+
"epoch": 9.49933774834437,
|
25218 |
+
"grad_norm": 0.703746497631073,
|
25219 |
+
"learning_rate": 2.440318302387268e-05,
|
25220 |
+
"loss": 0.024,
|
25221 |
+
"step": 3586
|
25222 |
+
},
|
25223 |
+
{
|
25224 |
+
"epoch": 9.501986754966888,
|
25225 |
+
"grad_norm": 0.8796095848083496,
|
25226 |
+
"learning_rate": 2.4270557029177717e-05,
|
25227 |
+
"loss": 0.0246,
|
25228 |
+
"step": 3587
|
25229 |
+
},
|
25230 |
+
{
|
25231 |
+
"epoch": 9.504635761589403,
|
25232 |
+
"grad_norm": 0.8228460550308228,
|
25233 |
+
"learning_rate": 2.4137931034482758e-05,
|
25234 |
+
"loss": 0.0257,
|
25235 |
+
"step": 3588
|
25236 |
+
},
|
25237 |
+
{
|
25238 |
+
"epoch": 9.507284768211921,
|
25239 |
+
"grad_norm": 5.519691467285156,
|
25240 |
+
"learning_rate": 2.40053050397878e-05,
|
25241 |
+
"loss": 0.0731,
|
25242 |
+
"step": 3589
|
25243 |
+
},
|
25244 |
+
{
|
25245 |
+
"epoch": 9.509933774834437,
|
25246 |
+
"grad_norm": 5.88948392868042,
|
25247 |
+
"learning_rate": 2.3872679045092838e-05,
|
25248 |
+
"loss": 0.2604,
|
25249 |
+
"step": 3590
|
25250 |
+
},
|
25251 |
+
{
|
25252 |
+
"epoch": 9.512582781456954,
|
25253 |
+
"grad_norm": 8.751077651977539,
|
25254 |
+
"learning_rate": 2.374005305039788e-05,
|
25255 |
+
"loss": 0.0999,
|
25256 |
+
"step": 3591
|
25257 |
+
},
|
25258 |
+
{
|
25259 |
+
"epoch": 9.51523178807947,
|
25260 |
+
"grad_norm": 6.72320556640625,
|
25261 |
+
"learning_rate": 2.360742705570292e-05,
|
25262 |
+
"loss": 0.1101,
|
25263 |
+
"step": 3592
|
25264 |
+
},
|
25265 |
+
{
|
25266 |
+
"epoch": 9.517880794701988,
|
25267 |
+
"grad_norm": 2.555183172225952,
|
25268 |
+
"learning_rate": 2.347480106100796e-05,
|
25269 |
+
"loss": 0.0294,
|
25270 |
+
"step": 3593
|
25271 |
+
},
|
25272 |
+
{
|
25273 |
+
"epoch": 9.520529801324503,
|
25274 |
+
"grad_norm": 8.663053512573242,
|
25275 |
+
"learning_rate": 2.3342175066312996e-05,
|
25276 |
+
"loss": 0.0628,
|
25277 |
+
"step": 3594
|
25278 |
+
},
|
25279 |
+
{
|
25280 |
+
"epoch": 9.52317880794702,
|
25281 |
+
"grad_norm": 0.7461556196212769,
|
25282 |
+
"learning_rate": 2.3209549071618038e-05,
|
25283 |
+
"loss": 0.0208,
|
25284 |
+
"step": 3595
|
25285 |
+
},
|
25286 |
+
{
|
25287 |
+
"epoch": 9.525827814569537,
|
25288 |
+
"grad_norm": 1.2280560731887817,
|
25289 |
+
"learning_rate": 2.307692307692308e-05,
|
25290 |
+
"loss": 0.0219,
|
25291 |
+
"step": 3596
|
25292 |
+
},
|
25293 |
+
{
|
25294 |
+
"epoch": 9.528476821192053,
|
25295 |
+
"grad_norm": 1.1770743131637573,
|
25296 |
+
"learning_rate": 2.2944297082228117e-05,
|
25297 |
+
"loss": 0.0254,
|
25298 |
+
"step": 3597
|
25299 |
+
},
|
25300 |
+
{
|
25301 |
+
"epoch": 9.53112582781457,
|
25302 |
+
"grad_norm": 2.0086593627929688,
|
25303 |
+
"learning_rate": 2.281167108753316e-05,
|
25304 |
+
"loss": 0.0469,
|
25305 |
+
"step": 3598
|
25306 |
+
},
|
25307 |
+
{
|
25308 |
+
"epoch": 9.533774834437086,
|
25309 |
+
"grad_norm": 0.44559231400489807,
|
25310 |
+
"learning_rate": 2.2679045092838196e-05,
|
25311 |
+
"loss": 0.0151,
|
25312 |
+
"step": 3599
|
25313 |
+
},
|
25314 |
+
{
|
25315 |
+
"epoch": 9.536423841059603,
|
25316 |
+
"grad_norm": 3.1169657707214355,
|
25317 |
+
"learning_rate": 2.2546419098143234e-05,
|
25318 |
+
"loss": 0.0544,
|
25319 |
+
"step": 3600
|
25320 |
+
},
|
25321 |
+
{
|
25322 |
+
"epoch": 9.53907284768212,
|
25323 |
+
"grad_norm": 0.7765498161315918,
|
25324 |
+
"learning_rate": 2.2413793103448276e-05,
|
25325 |
+
"loss": 0.0211,
|
25326 |
+
"step": 3601
|
25327 |
+
},
|
25328 |
+
{
|
25329 |
+
"epoch": 9.541721854304635,
|
25330 |
+
"grad_norm": 3.1204161643981934,
|
25331 |
+
"learning_rate": 2.2281167108753317e-05,
|
25332 |
+
"loss": 0.0747,
|
25333 |
+
"step": 3602
|
25334 |
+
},
|
25335 |
+
{
|
25336 |
+
"epoch": 9.544370860927152,
|
25337 |
+
"grad_norm": 0.6241468787193298,
|
25338 |
+
"learning_rate": 2.2148541114058355e-05,
|
25339 |
+
"loss": 0.0222,
|
25340 |
+
"step": 3603
|
25341 |
+
},
|
25342 |
+
{
|
25343 |
+
"epoch": 9.547019867549668,
|
25344 |
+
"grad_norm": 1.0893391370773315,
|
25345 |
+
"learning_rate": 2.2015915119363396e-05,
|
25346 |
+
"loss": 0.0268,
|
25347 |
+
"step": 3604
|
25348 |
+
},
|
25349 |
+
{
|
25350 |
+
"epoch": 9.549668874172186,
|
25351 |
+
"grad_norm": 0.7905430197715759,
|
25352 |
+
"learning_rate": 2.1883289124668434e-05,
|
25353 |
+
"loss": 0.0181,
|
25354 |
+
"step": 3605
|
25355 |
+
},
|
25356 |
+
{
|
25357 |
+
"epoch": 9.552317880794702,
|
25358 |
+
"grad_norm": 10.596261978149414,
|
25359 |
+
"learning_rate": 2.1750663129973476e-05,
|
25360 |
+
"loss": 0.0797,
|
25361 |
+
"step": 3606
|
25362 |
+
},
|
25363 |
+
{
|
25364 |
+
"epoch": 9.55496688741722,
|
25365 |
+
"grad_norm": 1.1489005088806152,
|
25366 |
+
"learning_rate": 2.1618037135278514e-05,
|
25367 |
+
"loss": 0.0285,
|
25368 |
+
"step": 3607
|
25369 |
+
},
|
25370 |
+
{
|
25371 |
+
"epoch": 9.557615894039735,
|
25372 |
+
"grad_norm": 1.7433918714523315,
|
25373 |
+
"learning_rate": 2.1485411140583555e-05,
|
25374 |
+
"loss": 0.0289,
|
25375 |
+
"step": 3608
|
25376 |
+
},
|
25377 |
+
{
|
25378 |
+
"epoch": 9.560264900662252,
|
25379 |
+
"grad_norm": 4.478367805480957,
|
25380 |
+
"learning_rate": 2.1352785145888597e-05,
|
25381 |
+
"loss": 0.029,
|
25382 |
+
"step": 3609
|
25383 |
+
},
|
25384 |
+
{
|
25385 |
+
"epoch": 9.562913907284768,
|
25386 |
+
"grad_norm": 2.047924757003784,
|
25387 |
+
"learning_rate": 2.1220159151193635e-05,
|
25388 |
+
"loss": 0.0304,
|
25389 |
+
"step": 3610
|
25390 |
+
},
|
25391 |
+
{
|
25392 |
+
"epoch": 9.565562913907284,
|
25393 |
+
"grad_norm": 2.0253522396087646,
|
25394 |
+
"learning_rate": 2.1087533156498673e-05,
|
25395 |
+
"loss": 0.0304,
|
25396 |
+
"step": 3611
|
25397 |
+
},
|
25398 |
+
{
|
25399 |
+
"epoch": 9.568211920529802,
|
25400 |
+
"grad_norm": 4.3127217292785645,
|
25401 |
+
"learning_rate": 2.0954907161803714e-05,
|
25402 |
+
"loss": 0.0572,
|
25403 |
+
"step": 3612
|
25404 |
+
},
|
25405 |
+
{
|
25406 |
+
"epoch": 9.570860927152317,
|
25407 |
+
"grad_norm": 1.7203673124313354,
|
25408 |
+
"learning_rate": 2.0822281167108752e-05,
|
25409 |
+
"loss": 0.0386,
|
25410 |
+
"step": 3613
|
25411 |
+
},
|
25412 |
+
{
|
25413 |
+
"epoch": 9.573509933774835,
|
25414 |
+
"grad_norm": 1.5430024862289429,
|
25415 |
+
"learning_rate": 2.0689655172413793e-05,
|
25416 |
+
"loss": 0.0235,
|
25417 |
+
"step": 3614
|
25418 |
+
},
|
25419 |
+
{
|
25420 |
+
"epoch": 9.57615894039735,
|
25421 |
+
"grad_norm": 0.7850141525268555,
|
25422 |
+
"learning_rate": 2.0557029177718835e-05,
|
25423 |
+
"loss": 0.0202,
|
25424 |
+
"step": 3615
|
25425 |
+
},
|
25426 |
+
{
|
25427 |
+
"epoch": 9.578807947019868,
|
25428 |
+
"grad_norm": 1.4420243501663208,
|
25429 |
+
"learning_rate": 2.0424403183023873e-05,
|
25430 |
+
"loss": 0.0301,
|
25431 |
+
"step": 3616
|
25432 |
+
},
|
25433 |
+
{
|
25434 |
+
"epoch": 9.581456953642384,
|
25435 |
+
"grad_norm": 3.6964313983917236,
|
25436 |
+
"learning_rate": 2.0291777188328914e-05,
|
25437 |
+
"loss": 0.0366,
|
25438 |
+
"step": 3617
|
25439 |
+
},
|
25440 |
+
{
|
25441 |
+
"epoch": 9.584105960264901,
|
25442 |
+
"grad_norm": 1.0883980989456177,
|
25443 |
+
"learning_rate": 2.0159151193633952e-05,
|
25444 |
+
"loss": 0.0294,
|
25445 |
+
"step": 3618
|
25446 |
+
},
|
25447 |
+
{
|
25448 |
+
"epoch": 9.586754966887417,
|
25449 |
+
"grad_norm": 9.587152481079102,
|
25450 |
+
"learning_rate": 2.0026525198938993e-05,
|
25451 |
+
"loss": 0.0841,
|
25452 |
+
"step": 3619
|
25453 |
+
},
|
25454 |
+
{
|
25455 |
+
"epoch": 9.589403973509933,
|
25456 |
+
"grad_norm": 0.9771425127983093,
|
25457 |
+
"learning_rate": 1.989389920424403e-05,
|
25458 |
+
"loss": 0.0286,
|
25459 |
+
"step": 3620
|
25460 |
+
},
|
25461 |
+
{
|
25462 |
+
"epoch": 9.59205298013245,
|
25463 |
+
"grad_norm": 0.551032304763794,
|
25464 |
+
"learning_rate": 1.9761273209549073e-05,
|
25465 |
+
"loss": 0.0176,
|
25466 |
+
"step": 3621
|
25467 |
+
},
|
25468 |
+
{
|
25469 |
+
"epoch": 9.594701986754966,
|
25470 |
+
"grad_norm": 1.319119930267334,
|
25471 |
+
"learning_rate": 1.9628647214854114e-05,
|
25472 |
+
"loss": 0.0309,
|
25473 |
+
"step": 3622
|
25474 |
+
},
|
25475 |
+
{
|
25476 |
+
"epoch": 9.597350993377484,
|
25477 |
+
"grad_norm": 0.9036394357681274,
|
25478 |
+
"learning_rate": 1.9496021220159152e-05,
|
25479 |
+
"loss": 0.0234,
|
25480 |
+
"step": 3623
|
25481 |
+
},
|
25482 |
+
{
|
25483 |
+
"epoch": 9.6,
|
25484 |
+
"grad_norm": 1.2414772510528564,
|
25485 |
+
"learning_rate": 1.936339522546419e-05,
|
25486 |
+
"loss": 0.0352,
|
25487 |
+
"step": 3624
|
25488 |
+
},
|
25489 |
+
{
|
25490 |
+
"epoch": 9.602649006622517,
|
25491 |
+
"grad_norm": 0.6562714576721191,
|
25492 |
+
"learning_rate": 1.923076923076923e-05,
|
25493 |
+
"loss": 0.0231,
|
25494 |
+
"step": 3625
|
25495 |
+
},
|
25496 |
+
{
|
25497 |
+
"epoch": 9.605298013245033,
|
25498 |
+
"grad_norm": 1.8938868045806885,
|
25499 |
+
"learning_rate": 1.909814323607427e-05,
|
25500 |
+
"loss": 0.0249,
|
25501 |
+
"step": 3626
|
25502 |
+
},
|
25503 |
+
{
|
25504 |
+
"epoch": 9.607947019867549,
|
25505 |
+
"grad_norm": 3.0130369663238525,
|
25506 |
+
"learning_rate": 1.896551724137931e-05,
|
25507 |
+
"loss": 0.058,
|
25508 |
+
"step": 3627
|
25509 |
+
},
|
25510 |
+
{
|
25511 |
+
"epoch": 9.610596026490066,
|
25512 |
+
"grad_norm": 0.6540580987930298,
|
25513 |
+
"learning_rate": 1.8832891246684352e-05,
|
25514 |
+
"loss": 0.0207,
|
25515 |
+
"step": 3628
|
25516 |
+
},
|
25517 |
+
{
|
25518 |
+
"epoch": 9.613245033112582,
|
25519 |
+
"grad_norm": 2.125011920928955,
|
25520 |
+
"learning_rate": 1.870026525198939e-05,
|
25521 |
+
"loss": 0.0389,
|
25522 |
+
"step": 3629
|
25523 |
+
},
|
25524 |
+
{
|
25525 |
+
"epoch": 9.6158940397351,
|
25526 |
+
"grad_norm": 0.7941085696220398,
|
25527 |
+
"learning_rate": 1.8567639257294428e-05,
|
25528 |
+
"loss": 0.0238,
|
25529 |
+
"step": 3630
|
25530 |
+
},
|
25531 |
+
{
|
25532 |
+
"epoch": 9.618543046357615,
|
25533 |
+
"grad_norm": 1.355063796043396,
|
25534 |
+
"learning_rate": 1.843501326259947e-05,
|
25535 |
+
"loss": 0.0305,
|
25536 |
+
"step": 3631
|
25537 |
+
},
|
25538 |
+
{
|
25539 |
+
"epoch": 9.621192052980133,
|
25540 |
+
"grad_norm": 0.9060747027397156,
|
25541 |
+
"learning_rate": 1.830238726790451e-05,
|
25542 |
+
"loss": 0.0249,
|
25543 |
+
"step": 3632
|
25544 |
+
},
|
25545 |
+
{
|
25546 |
+
"epoch": 9.623841059602649,
|
25547 |
+
"grad_norm": 0.5956597328186035,
|
25548 |
+
"learning_rate": 1.816976127320955e-05,
|
25549 |
+
"loss": 0.0189,
|
25550 |
+
"step": 3633
|
25551 |
+
},
|
25552 |
+
{
|
25553 |
+
"epoch": 9.626490066225166,
|
25554 |
+
"grad_norm": 3.1617133617401123,
|
25555 |
+
"learning_rate": 1.803713527851459e-05,
|
25556 |
+
"loss": 0.0366,
|
25557 |
+
"step": 3634
|
25558 |
+
},
|
25559 |
+
{
|
25560 |
+
"epoch": 9.629139072847682,
|
25561 |
+
"grad_norm": 1.114554762840271,
|
25562 |
+
"learning_rate": 1.790450928381963e-05,
|
25563 |
+
"loss": 0.0274,
|
25564 |
+
"step": 3635
|
25565 |
+
},
|
25566 |
+
{
|
25567 |
+
"epoch": 9.631788079470198,
|
25568 |
+
"grad_norm": 1.4146301746368408,
|
25569 |
+
"learning_rate": 1.7771883289124666e-05,
|
25570 |
+
"loss": 0.0326,
|
25571 |
+
"step": 3636
|
25572 |
+
},
|
25573 |
+
{
|
25574 |
+
"epoch": 9.634437086092715,
|
25575 |
+
"grad_norm": 1.173376202583313,
|
25576 |
+
"learning_rate": 1.7639257294429708e-05,
|
25577 |
+
"loss": 0.0247,
|
25578 |
+
"step": 3637
|
25579 |
+
},
|
25580 |
+
{
|
25581 |
+
"epoch": 9.637086092715231,
|
25582 |
+
"grad_norm": 1.3982216119766235,
|
25583 |
+
"learning_rate": 1.750663129973475e-05,
|
25584 |
+
"loss": 0.0342,
|
25585 |
+
"step": 3638
|
25586 |
+
},
|
25587 |
+
{
|
25588 |
+
"epoch": 9.639735099337749,
|
25589 |
+
"grad_norm": 0.48249009251594543,
|
25590 |
+
"learning_rate": 1.7374005305039787e-05,
|
25591 |
+
"loss": 0.0178,
|
25592 |
+
"step": 3639
|
25593 |
+
},
|
25594 |
+
{
|
25595 |
+
"epoch": 9.642384105960264,
|
25596 |
+
"grad_norm": 7.4361677169799805,
|
25597 |
+
"learning_rate": 1.7241379310344828e-05,
|
25598 |
+
"loss": 0.0806,
|
25599 |
+
"step": 3640
|
25600 |
+
},
|
25601 |
+
{
|
25602 |
+
"epoch": 9.645033112582782,
|
25603 |
+
"grad_norm": 1.1995569467544556,
|
25604 |
+
"learning_rate": 1.710875331564987e-05,
|
25605 |
+
"loss": 0.0254,
|
25606 |
+
"step": 3641
|
25607 |
+
},
|
25608 |
+
{
|
25609 |
+
"epoch": 9.647682119205298,
|
25610 |
+
"grad_norm": 2.271826982498169,
|
25611 |
+
"learning_rate": 1.6976127320954904e-05,
|
25612 |
+
"loss": 0.0258,
|
25613 |
+
"step": 3642
|
25614 |
+
},
|
25615 |
+
{
|
25616 |
+
"epoch": 9.650331125827815,
|
25617 |
+
"grad_norm": 2.0201494693756104,
|
25618 |
+
"learning_rate": 1.6843501326259946e-05,
|
25619 |
+
"loss": 0.0273,
|
25620 |
+
"step": 3643
|
25621 |
+
},
|
25622 |
+
{
|
25623 |
+
"epoch": 9.652980132450331,
|
25624 |
+
"grad_norm": 2.1260111331939697,
|
25625 |
+
"learning_rate": 1.6710875331564987e-05,
|
25626 |
+
"loss": 0.0266,
|
25627 |
+
"step": 3644
|
25628 |
+
},
|
25629 |
+
{
|
25630 |
+
"epoch": 9.655629139072847,
|
25631 |
+
"grad_norm": 1.2374637126922607,
|
25632 |
+
"learning_rate": 1.657824933687003e-05,
|
25633 |
+
"loss": 0.0256,
|
25634 |
+
"step": 3645
|
25635 |
+
},
|
25636 |
+
{
|
25637 |
+
"epoch": 9.658278145695364,
|
25638 |
+
"grad_norm": 0.9262616634368896,
|
25639 |
+
"learning_rate": 1.6445623342175066e-05,
|
25640 |
+
"loss": 0.0238,
|
25641 |
+
"step": 3646
|
25642 |
+
},
|
25643 |
+
{
|
25644 |
+
"epoch": 9.66092715231788,
|
25645 |
+
"grad_norm": 6.919768333435059,
|
25646 |
+
"learning_rate": 1.6312997347480108e-05,
|
25647 |
+
"loss": 0.0979,
|
25648 |
+
"step": 3647
|
25649 |
+
},
|
25650 |
+
{
|
25651 |
+
"epoch": 9.663576158940398,
|
25652 |
+
"grad_norm": 2.3485090732574463,
|
25653 |
+
"learning_rate": 1.618037135278515e-05,
|
25654 |
+
"loss": 0.0359,
|
25655 |
+
"step": 3648
|
25656 |
+
},
|
25657 |
+
{
|
25658 |
+
"epoch": 9.666225165562913,
|
25659 |
+
"grad_norm": 0.8933830261230469,
|
25660 |
+
"learning_rate": 1.6047745358090184e-05,
|
25661 |
+
"loss": 0.026,
|
25662 |
+
"step": 3649
|
25663 |
+
},
|
25664 |
+
{
|
25665 |
+
"epoch": 9.668874172185431,
|
25666 |
+
"grad_norm": 0.6932836174964905,
|
25667 |
+
"learning_rate": 1.5915119363395225e-05,
|
25668 |
+
"loss": 0.0216,
|
25669 |
+
"step": 3650
|
25670 |
+
},
|
25671 |
+
{
|
25672 |
+
"epoch": 9.671523178807947,
|
25673 |
+
"grad_norm": 0.9356863498687744,
|
25674 |
+
"learning_rate": 1.5782493368700266e-05,
|
25675 |
+
"loss": 0.0223,
|
25676 |
+
"step": 3651
|
25677 |
+
},
|
25678 |
+
{
|
25679 |
+
"epoch": 9.674172185430464,
|
25680 |
+
"grad_norm": 0.7350618243217468,
|
25681 |
+
"learning_rate": 1.5649867374005304e-05,
|
25682 |
+
"loss": 0.0257,
|
25683 |
+
"step": 3652
|
25684 |
+
},
|
25685 |
+
{
|
25686 |
+
"epoch": 9.67682119205298,
|
25687 |
+
"grad_norm": 1.4734960794448853,
|
25688 |
+
"learning_rate": 1.5517241379310346e-05,
|
25689 |
+
"loss": 0.0247,
|
25690 |
+
"step": 3653
|
25691 |
+
},
|
25692 |
+
{
|
25693 |
+
"epoch": 9.679470198675496,
|
25694 |
+
"grad_norm": 0.632612943649292,
|
25695 |
+
"learning_rate": 1.5384615384615387e-05,
|
25696 |
+
"loss": 0.0212,
|
25697 |
+
"step": 3654
|
25698 |
+
},
|
25699 |
+
{
|
25700 |
+
"epoch": 9.682119205298013,
|
25701 |
+
"grad_norm": 0.8005079627037048,
|
25702 |
+
"learning_rate": 1.5251989389920425e-05,
|
25703 |
+
"loss": 0.0238,
|
25704 |
+
"step": 3655
|
25705 |
+
},
|
25706 |
+
{
|
25707 |
+
"epoch": 9.68476821192053,
|
25708 |
+
"grad_norm": 1.6749340295791626,
|
25709 |
+
"learning_rate": 1.5119363395225465e-05,
|
25710 |
+
"loss": 0.0257,
|
25711 |
+
"step": 3656
|
25712 |
+
},
|
25713 |
+
{
|
25714 |
+
"epoch": 9.687417218543047,
|
25715 |
+
"grad_norm": 1.8387826681137085,
|
25716 |
+
"learning_rate": 1.4986737400530505e-05,
|
25717 |
+
"loss": 0.0355,
|
25718 |
+
"step": 3657
|
25719 |
+
},
|
25720 |
+
{
|
25721 |
+
"epoch": 9.690066225165562,
|
25722 |
+
"grad_norm": 0.8870521783828735,
|
25723 |
+
"learning_rate": 1.4854111405835544e-05,
|
25724 |
+
"loss": 0.0239,
|
25725 |
+
"step": 3658
|
25726 |
+
},
|
25727 |
+
{
|
25728 |
+
"epoch": 9.69271523178808,
|
25729 |
+
"grad_norm": 1.0992125272750854,
|
25730 |
+
"learning_rate": 1.4721485411140584e-05,
|
25731 |
+
"loss": 0.028,
|
25732 |
+
"step": 3659
|
25733 |
+
},
|
25734 |
+
{
|
25735 |
+
"epoch": 9.695364238410596,
|
25736 |
+
"grad_norm": 3.3386027812957764,
|
25737 |
+
"learning_rate": 1.4588859416445624e-05,
|
25738 |
+
"loss": 0.0514,
|
25739 |
+
"step": 3660
|
25740 |
+
},
|
25741 |
+
{
|
25742 |
+
"epoch": 9.698013245033113,
|
25743 |
+
"grad_norm": 0.8172714710235596,
|
25744 |
+
"learning_rate": 1.4456233421750663e-05,
|
25745 |
+
"loss": 0.0226,
|
25746 |
+
"step": 3661
|
25747 |
+
},
|
25748 |
+
{
|
25749 |
+
"epoch": 9.70066225165563,
|
25750 |
+
"grad_norm": 0.3870687782764435,
|
25751 |
+
"learning_rate": 1.4323607427055705e-05,
|
25752 |
+
"loss": 0.0129,
|
25753 |
+
"step": 3662
|
25754 |
+
},
|
25755 |
+
{
|
25756 |
+
"epoch": 9.703311258278145,
|
25757 |
+
"grad_norm": 1.2576406002044678,
|
25758 |
+
"learning_rate": 1.4190981432360743e-05,
|
25759 |
+
"loss": 0.029,
|
25760 |
+
"step": 3663
|
25761 |
+
},
|
25762 |
+
{
|
25763 |
+
"epoch": 9.705960264900662,
|
25764 |
+
"grad_norm": 1.0171074867248535,
|
25765 |
+
"learning_rate": 1.4058355437665782e-05,
|
25766 |
+
"loss": 0.0271,
|
25767 |
+
"step": 3664
|
25768 |
+
},
|
25769 |
+
{
|
25770 |
+
"epoch": 9.708609271523178,
|
25771 |
+
"grad_norm": 0.8873515129089355,
|
25772 |
+
"learning_rate": 1.3925729442970824e-05,
|
25773 |
+
"loss": 0.0221,
|
25774 |
+
"step": 3665
|
25775 |
+
},
|
25776 |
+
{
|
25777 |
+
"epoch": 9.711258278145696,
|
25778 |
+
"grad_norm": 1.2012215852737427,
|
25779 |
+
"learning_rate": 1.3793103448275862e-05,
|
25780 |
+
"loss": 0.0256,
|
25781 |
+
"step": 3666
|
25782 |
+
},
|
25783 |
+
{
|
25784 |
+
"epoch": 9.713907284768212,
|
25785 |
+
"grad_norm": 0.5067713856697083,
|
25786 |
+
"learning_rate": 1.3660477453580901e-05,
|
25787 |
+
"loss": 0.0139,
|
25788 |
+
"step": 3667
|
25789 |
+
},
|
25790 |
+
{
|
25791 |
+
"epoch": 9.716556291390729,
|
25792 |
+
"grad_norm": 1.0020195245742798,
|
25793 |
+
"learning_rate": 1.3527851458885943e-05,
|
25794 |
+
"loss": 0.0256,
|
25795 |
+
"step": 3668
|
25796 |
+
},
|
25797 |
+
{
|
25798 |
+
"epoch": 9.719205298013245,
|
25799 |
+
"grad_norm": 0.7098538875579834,
|
25800 |
+
"learning_rate": 1.339522546419098e-05,
|
25801 |
+
"loss": 0.0206,
|
25802 |
+
"step": 3669
|
25803 |
+
},
|
25804 |
+
{
|
25805 |
+
"epoch": 9.721854304635762,
|
25806 |
+
"grad_norm": 1.374456763267517,
|
25807 |
+
"learning_rate": 1.3262599469496022e-05,
|
25808 |
+
"loss": 0.0333,
|
25809 |
+
"step": 3670
|
25810 |
+
},
|
25811 |
+
{
|
25812 |
+
"epoch": 9.724503311258278,
|
25813 |
+
"grad_norm": 12.061737060546875,
|
25814 |
+
"learning_rate": 1.3129973474801062e-05,
|
25815 |
+
"loss": 0.0998,
|
25816 |
+
"step": 3671
|
25817 |
+
},
|
25818 |
+
{
|
25819 |
+
"epoch": 9.727152317880794,
|
25820 |
+
"grad_norm": 0.7301341891288757,
|
25821 |
+
"learning_rate": 1.29973474801061e-05,
|
25822 |
+
"loss": 0.0217,
|
25823 |
+
"step": 3672
|
25824 |
+
},
|
25825 |
+
{
|
25826 |
+
"epoch": 9.729801324503311,
|
25827 |
+
"grad_norm": 0.5392076373100281,
|
25828 |
+
"learning_rate": 1.2864721485411141e-05,
|
25829 |
+
"loss": 0.0152,
|
25830 |
+
"step": 3673
|
25831 |
+
},
|
25832 |
+
{
|
25833 |
+
"epoch": 9.732450331125827,
|
25834 |
+
"grad_norm": 0.5606643557548523,
|
25835 |
+
"learning_rate": 1.273209549071618e-05,
|
25836 |
+
"loss": 0.0195,
|
25837 |
+
"step": 3674
|
25838 |
+
},
|
25839 |
+
{
|
25840 |
+
"epoch": 9.735099337748345,
|
25841 |
+
"grad_norm": 1.8796740770339966,
|
25842 |
+
"learning_rate": 1.259946949602122e-05,
|
25843 |
+
"loss": 0.0326,
|
25844 |
+
"step": 3675
|
25845 |
+
},
|
25846 |
+
{
|
25847 |
+
"epoch": 9.73774834437086,
|
25848 |
+
"grad_norm": 2.6059370040893555,
|
25849 |
+
"learning_rate": 1.246684350132626e-05,
|
25850 |
+
"loss": 0.039,
|
25851 |
+
"step": 3676
|
25852 |
+
},
|
25853 |
+
{
|
25854 |
+
"epoch": 9.740397350993378,
|
25855 |
+
"grad_norm": 1.8863075971603394,
|
25856 |
+
"learning_rate": 1.23342175066313e-05,
|
25857 |
+
"loss": 0.0411,
|
25858 |
+
"step": 3677
|
25859 |
+
},
|
25860 |
+
{
|
25861 |
+
"epoch": 9.743046357615894,
|
25862 |
+
"grad_norm": 1.7432737350463867,
|
25863 |
+
"learning_rate": 1.220159151193634e-05,
|
25864 |
+
"loss": 0.0317,
|
25865 |
+
"step": 3678
|
25866 |
+
},
|
25867 |
+
{
|
25868 |
+
"epoch": 9.745695364238411,
|
25869 |
+
"grad_norm": 0.7454562783241272,
|
25870 |
+
"learning_rate": 1.2068965517241379e-05,
|
25871 |
+
"loss": 0.0238,
|
25872 |
+
"step": 3679
|
25873 |
+
},
|
25874 |
+
{
|
25875 |
+
"epoch": 9.748344370860927,
|
25876 |
+
"grad_norm": 0.6465069651603699,
|
25877 |
+
"learning_rate": 1.1936339522546419e-05,
|
25878 |
+
"loss": 0.021,
|
25879 |
+
"step": 3680
|
25880 |
+
},
|
25881 |
+
{
|
25882 |
+
"epoch": 9.750993377483443,
|
25883 |
+
"grad_norm": 0.7272911071777344,
|
25884 |
+
"learning_rate": 1.180371352785146e-05,
|
25885 |
+
"loss": 0.0214,
|
25886 |
+
"step": 3681
|
25887 |
+
},
|
25888 |
+
{
|
25889 |
+
"epoch": 9.75364238410596,
|
25890 |
+
"grad_norm": 3.296924352645874,
|
25891 |
+
"learning_rate": 1.1671087533156498e-05,
|
25892 |
+
"loss": 0.068,
|
25893 |
+
"step": 3682
|
25894 |
+
},
|
25895 |
+
{
|
25896 |
+
"epoch": 9.756291390728476,
|
25897 |
+
"grad_norm": 1.1401089429855347,
|
25898 |
+
"learning_rate": 1.153846153846154e-05,
|
25899 |
+
"loss": 0.0225,
|
25900 |
+
"step": 3683
|
25901 |
+
},
|
25902 |
+
{
|
25903 |
+
"epoch": 9.758940397350994,
|
25904 |
+
"grad_norm": 8.999724388122559,
|
25905 |
+
"learning_rate": 1.140583554376658e-05,
|
25906 |
+
"loss": 0.0891,
|
25907 |
+
"step": 3684
|
25908 |
+
},
|
25909 |
+
{
|
25910 |
+
"epoch": 9.76158940397351,
|
25911 |
+
"grad_norm": 0.9648635387420654,
|
25912 |
+
"learning_rate": 1.1273209549071617e-05,
|
25913 |
+
"loss": 0.0185,
|
25914 |
+
"step": 3685
|
25915 |
+
},
|
25916 |
+
{
|
25917 |
+
"epoch": 9.764238410596027,
|
25918 |
+
"grad_norm": 2.0460093021392822,
|
25919 |
+
"learning_rate": 1.1140583554376659e-05,
|
25920 |
+
"loss": 0.0325,
|
25921 |
+
"step": 3686
|
25922 |
+
},
|
25923 |
+
{
|
25924 |
+
"epoch": 9.766887417218543,
|
25925 |
+
"grad_norm": 0.9144695997238159,
|
25926 |
+
"learning_rate": 1.1007957559681698e-05,
|
25927 |
+
"loss": 0.0251,
|
25928 |
+
"step": 3687
|
25929 |
+
},
|
25930 |
+
{
|
25931 |
+
"epoch": 9.76953642384106,
|
25932 |
+
"grad_norm": 4.348220348358154,
|
25933 |
+
"learning_rate": 1.0875331564986738e-05,
|
25934 |
+
"loss": 0.0495,
|
25935 |
+
"step": 3688
|
25936 |
+
},
|
25937 |
+
{
|
25938 |
+
"epoch": 9.772185430463576,
|
25939 |
+
"grad_norm": 8.544610023498535,
|
25940 |
+
"learning_rate": 1.0742705570291778e-05,
|
25941 |
+
"loss": 0.0511,
|
25942 |
+
"step": 3689
|
25943 |
+
},
|
25944 |
+
{
|
25945 |
+
"epoch": 9.774834437086092,
|
25946 |
+
"grad_norm": 1.0517222881317139,
|
25947 |
+
"learning_rate": 1.0610079575596817e-05,
|
25948 |
+
"loss": 0.0265,
|
25949 |
+
"step": 3690
|
25950 |
+
},
|
25951 |
+
{
|
25952 |
+
"epoch": 9.77748344370861,
|
25953 |
+
"grad_norm": 0.9953611493110657,
|
25954 |
+
"learning_rate": 1.0477453580901857e-05,
|
25955 |
+
"loss": 0.0264,
|
25956 |
+
"step": 3691
|
25957 |
+
},
|
25958 |
+
{
|
25959 |
+
"epoch": 9.780132450331125,
|
25960 |
+
"grad_norm": 2.783054828643799,
|
25961 |
+
"learning_rate": 1.0344827586206897e-05,
|
25962 |
+
"loss": 0.0385,
|
25963 |
+
"step": 3692
|
25964 |
+
},
|
25965 |
+
{
|
25966 |
+
"epoch": 9.782781456953643,
|
25967 |
+
"grad_norm": 1.5152692794799805,
|
25968 |
+
"learning_rate": 1.0212201591511936e-05,
|
25969 |
+
"loss": 0.0274,
|
25970 |
+
"step": 3693
|
25971 |
+
},
|
25972 |
+
{
|
25973 |
+
"epoch": 9.785430463576159,
|
25974 |
+
"grad_norm": 0.613064169883728,
|
25975 |
+
"learning_rate": 1.0079575596816976e-05,
|
25976 |
+
"loss": 0.0202,
|
25977 |
+
"step": 3694
|
25978 |
+
},
|
25979 |
+
{
|
25980 |
+
"epoch": 9.788079470198676,
|
25981 |
+
"grad_norm": 0.6896834969520569,
|
25982 |
+
"learning_rate": 9.946949602122016e-06,
|
25983 |
+
"loss": 0.022,
|
25984 |
+
"step": 3695
|
25985 |
+
},
|
25986 |
+
{
|
25987 |
+
"epoch": 9.790728476821192,
|
25988 |
+
"grad_norm": 1.0851006507873535,
|
25989 |
+
"learning_rate": 9.814323607427057e-06,
|
25990 |
+
"loss": 0.0275,
|
25991 |
+
"step": 3696
|
25992 |
+
},
|
25993 |
+
{
|
25994 |
+
"epoch": 9.79337748344371,
|
25995 |
+
"grad_norm": 1.2184022665023804,
|
25996 |
+
"learning_rate": 9.681697612732095e-06,
|
25997 |
+
"loss": 0.028,
|
25998 |
+
"step": 3697
|
25999 |
+
},
|
26000 |
+
{
|
26001 |
+
"epoch": 9.796026490066225,
|
26002 |
+
"grad_norm": 0.705527663230896,
|
26003 |
+
"learning_rate": 9.549071618037135e-06,
|
26004 |
+
"loss": 0.0163,
|
26005 |
+
"step": 3698
|
26006 |
+
},
|
26007 |
+
{
|
26008 |
+
"epoch": 9.798675496688741,
|
26009 |
+
"grad_norm": 2.508746862411499,
|
26010 |
+
"learning_rate": 9.416445623342176e-06,
|
26011 |
+
"loss": 0.0684,
|
26012 |
+
"step": 3699
|
26013 |
+
},
|
26014 |
+
{
|
26015 |
+
"epoch": 9.801324503311259,
|
26016 |
+
"grad_norm": 0.8603680729866028,
|
26017 |
+
"learning_rate": 9.283819628647214e-06,
|
26018 |
+
"loss": 0.0201,
|
26019 |
+
"step": 3700
|
26020 |
+
},
|
26021 |
+
{
|
26022 |
+
"epoch": 9.803973509933774,
|
26023 |
+
"grad_norm": 0.9905232787132263,
|
26024 |
+
"learning_rate": 9.151193633952255e-06,
|
26025 |
+
"loss": 0.0212,
|
26026 |
+
"step": 3701
|
26027 |
+
},
|
26028 |
+
{
|
26029 |
+
"epoch": 9.806622516556292,
|
26030 |
+
"grad_norm": 1.6922831535339355,
|
26031 |
+
"learning_rate": 9.018567639257295e-06,
|
26032 |
+
"loss": 0.019,
|
26033 |
+
"step": 3702
|
26034 |
+
},
|
26035 |
+
{
|
26036 |
+
"epoch": 9.809271523178808,
|
26037 |
+
"grad_norm": 1.232926368713379,
|
26038 |
+
"learning_rate": 8.885941644562333e-06,
|
26039 |
+
"loss": 0.0209,
|
26040 |
+
"step": 3703
|
26041 |
+
},
|
26042 |
+
{
|
26043 |
+
"epoch": 9.811920529801325,
|
26044 |
+
"grad_norm": 1.4666463136672974,
|
26045 |
+
"learning_rate": 8.753315649867374e-06,
|
26046 |
+
"loss": 0.0238,
|
26047 |
+
"step": 3704
|
26048 |
+
},
|
26049 |
+
{
|
26050 |
+
"epoch": 9.814569536423841,
|
26051 |
+
"grad_norm": 1.0803937911987305,
|
26052 |
+
"learning_rate": 8.620689655172414e-06,
|
26053 |
+
"loss": 0.0272,
|
26054 |
+
"step": 3705
|
26055 |
+
},
|
26056 |
+
{
|
26057 |
+
"epoch": 9.817218543046359,
|
26058 |
+
"grad_norm": 0.8219428658485413,
|
26059 |
+
"learning_rate": 8.488063660477452e-06,
|
26060 |
+
"loss": 0.0249,
|
26061 |
+
"step": 3706
|
26062 |
+
},
|
26063 |
+
{
|
26064 |
+
"epoch": 9.819867549668874,
|
26065 |
+
"grad_norm": 4.791027545928955,
|
26066 |
+
"learning_rate": 8.355437665782494e-06,
|
26067 |
+
"loss": 0.1262,
|
26068 |
+
"step": 3707
|
26069 |
+
},
|
26070 |
+
{
|
26071 |
+
"epoch": 9.82251655629139,
|
26072 |
+
"grad_norm": 1.0349640846252441,
|
26073 |
+
"learning_rate": 8.222811671087533e-06,
|
26074 |
+
"loss": 0.0216,
|
26075 |
+
"step": 3708
|
26076 |
+
},
|
26077 |
+
{
|
26078 |
+
"epoch": 9.825165562913908,
|
26079 |
+
"grad_norm": 0.47590669989585876,
|
26080 |
+
"learning_rate": 8.090185676392575e-06,
|
26081 |
+
"loss": 0.015,
|
26082 |
+
"step": 3709
|
26083 |
+
},
|
26084 |
+
{
|
26085 |
+
"epoch": 9.827814569536423,
|
26086 |
+
"grad_norm": 0.9150102734565735,
|
26087 |
+
"learning_rate": 7.957559681697613e-06,
|
26088 |
+
"loss": 0.0239,
|
26089 |
+
"step": 3710
|
26090 |
+
},
|
26091 |
+
{
|
26092 |
+
"epoch": 9.830463576158941,
|
26093 |
+
"grad_norm": 0.9096540808677673,
|
26094 |
+
"learning_rate": 7.824933687002652e-06,
|
26095 |
+
"loss": 0.0211,
|
26096 |
+
"step": 3711
|
26097 |
+
},
|
26098 |
+
{
|
26099 |
+
"epoch": 9.833112582781457,
|
26100 |
+
"grad_norm": 0.7392699122428894,
|
26101 |
+
"learning_rate": 7.692307692307694e-06,
|
26102 |
+
"loss": 0.0239,
|
26103 |
+
"step": 3712
|
26104 |
+
},
|
26105 |
+
{
|
26106 |
+
"epoch": 9.835761589403974,
|
26107 |
+
"grad_norm": 0.8193152546882629,
|
26108 |
+
"learning_rate": 7.559681697612732e-06,
|
26109 |
+
"loss": 0.0227,
|
26110 |
+
"step": 3713
|
26111 |
+
},
|
26112 |
+
{
|
26113 |
+
"epoch": 9.83841059602649,
|
26114 |
+
"grad_norm": 0.7564085125923157,
|
26115 |
+
"learning_rate": 7.427055702917772e-06,
|
26116 |
+
"loss": 0.0255,
|
26117 |
+
"step": 3714
|
26118 |
+
},
|
26119 |
+
{
|
26120 |
+
"epoch": 9.841059602649006,
|
26121 |
+
"grad_norm": 2.683502674102783,
|
26122 |
+
"learning_rate": 7.294429708222812e-06,
|
26123 |
+
"loss": 0.0342,
|
26124 |
+
"step": 3715
|
26125 |
+
},
|
26126 |
+
{
|
26127 |
+
"epoch": 9.843708609271523,
|
26128 |
+
"grad_norm": 9.274709701538086,
|
26129 |
+
"learning_rate": 7.161803713527852e-06,
|
26130 |
+
"loss": 0.4134,
|
26131 |
+
"step": 3716
|
26132 |
+
},
|
26133 |
+
{
|
26134 |
+
"epoch": 9.84635761589404,
|
26135 |
+
"grad_norm": 1.7635681629180908,
|
26136 |
+
"learning_rate": 7.029177718832891e-06,
|
26137 |
+
"loss": 0.0241,
|
26138 |
+
"step": 3717
|
26139 |
+
},
|
26140 |
+
{
|
26141 |
+
"epoch": 9.849006622516557,
|
26142 |
+
"grad_norm": 0.9706487655639648,
|
26143 |
+
"learning_rate": 6.896551724137931e-06,
|
26144 |
+
"loss": 0.0317,
|
26145 |
+
"step": 3718
|
26146 |
+
},
|
26147 |
+
{
|
26148 |
+
"epoch": 9.851655629139072,
|
26149 |
+
"grad_norm": 0.8491171598434448,
|
26150 |
+
"learning_rate": 6.763925729442971e-06,
|
26151 |
+
"loss": 0.0269,
|
26152 |
+
"step": 3719
|
26153 |
+
},
|
26154 |
+
{
|
26155 |
+
"epoch": 9.85430463576159,
|
26156 |
+
"grad_norm": 0.8159373998641968,
|
26157 |
+
"learning_rate": 6.631299734748011e-06,
|
26158 |
+
"loss": 0.0275,
|
26159 |
+
"step": 3720
|
26160 |
+
},
|
26161 |
+
{
|
26162 |
+
"epoch": 9.856953642384106,
|
26163 |
+
"grad_norm": 0.9206439852714539,
|
26164 |
+
"learning_rate": 6.49867374005305e-06,
|
26165 |
+
"loss": 0.0332,
|
26166 |
+
"step": 3721
|
26167 |
+
},
|
26168 |
+
{
|
26169 |
+
"epoch": 9.859602649006623,
|
26170 |
+
"grad_norm": 0.5902804732322693,
|
26171 |
+
"learning_rate": 6.36604774535809e-06,
|
26172 |
+
"loss": 0.0192,
|
26173 |
+
"step": 3722
|
26174 |
+
},
|
26175 |
+
{
|
26176 |
+
"epoch": 9.862251655629139,
|
26177 |
+
"grad_norm": 1.243345856666565,
|
26178 |
+
"learning_rate": 6.23342175066313e-06,
|
26179 |
+
"loss": 0.0258,
|
26180 |
+
"step": 3723
|
26181 |
+
},
|
26182 |
+
{
|
26183 |
+
"epoch": 9.864900662251655,
|
26184 |
+
"grad_norm": 2.155186414718628,
|
26185 |
+
"learning_rate": 6.10079575596817e-06,
|
26186 |
+
"loss": 0.0382,
|
26187 |
+
"step": 3724
|
26188 |
+
},
|
26189 |
+
{
|
26190 |
+
"epoch": 9.867549668874172,
|
26191 |
+
"grad_norm": 1.069143533706665,
|
26192 |
+
"learning_rate": 5.968169761273209e-06,
|
26193 |
+
"loss": 0.027,
|
26194 |
+
"step": 3725
|
26195 |
+
},
|
26196 |
+
{
|
26197 |
+
"epoch": 9.870198675496688,
|
26198 |
+
"grad_norm": 4.058372497558594,
|
26199 |
+
"learning_rate": 5.835543766578249e-06,
|
26200 |
+
"loss": 0.048,
|
26201 |
+
"step": 3726
|
26202 |
+
},
|
26203 |
+
{
|
26204 |
+
"epoch": 9.872847682119206,
|
26205 |
+
"grad_norm": 5.000580787658691,
|
26206 |
+
"learning_rate": 5.70291777188329e-06,
|
26207 |
+
"loss": 0.0637,
|
26208 |
+
"step": 3727
|
26209 |
+
},
|
26210 |
+
{
|
26211 |
+
"epoch": 9.875496688741721,
|
26212 |
+
"grad_norm": 9.932696342468262,
|
26213 |
+
"learning_rate": 5.570291777188329e-06,
|
26214 |
+
"loss": 0.0758,
|
26215 |
+
"step": 3728
|
26216 |
+
},
|
26217 |
+
{
|
26218 |
+
"epoch": 9.878145695364239,
|
26219 |
+
"grad_norm": 1.106728434562683,
|
26220 |
+
"learning_rate": 5.437665782493369e-06,
|
26221 |
+
"loss": 0.0265,
|
26222 |
+
"step": 3729
|
26223 |
+
},
|
26224 |
+
{
|
26225 |
+
"epoch": 9.880794701986755,
|
26226 |
+
"grad_norm": 3.356229543685913,
|
26227 |
+
"learning_rate": 5.305039787798409e-06,
|
26228 |
+
"loss": 0.0773,
|
26229 |
+
"step": 3730
|
26230 |
+
},
|
26231 |
+
{
|
26232 |
+
"epoch": 9.883443708609272,
|
26233 |
+
"grad_norm": 11.442574501037598,
|
26234 |
+
"learning_rate": 5.172413793103448e-06,
|
26235 |
+
"loss": 0.1475,
|
26236 |
+
"step": 3731
|
26237 |
+
},
|
26238 |
+
{
|
26239 |
+
"epoch": 9.886092715231788,
|
26240 |
+
"grad_norm": 0.6308454871177673,
|
26241 |
+
"learning_rate": 5.039787798408488e-06,
|
26242 |
+
"loss": 0.0202,
|
26243 |
+
"step": 3732
|
26244 |
+
},
|
26245 |
+
{
|
26246 |
+
"epoch": 9.888741721854304,
|
26247 |
+
"grad_norm": 3.8990111351013184,
|
26248 |
+
"learning_rate": 4.9071618037135285e-06,
|
26249 |
+
"loss": 0.0776,
|
26250 |
+
"step": 3733
|
26251 |
+
},
|
26252 |
+
{
|
26253 |
+
"epoch": 9.891390728476821,
|
26254 |
+
"grad_norm": 1.2768089771270752,
|
26255 |
+
"learning_rate": 4.774535809018567e-06,
|
26256 |
+
"loss": 0.028,
|
26257 |
+
"step": 3734
|
26258 |
+
},
|
26259 |
+
{
|
26260 |
+
"epoch": 9.894039735099337,
|
26261 |
+
"grad_norm": 0.7308850884437561,
|
26262 |
+
"learning_rate": 4.641909814323607e-06,
|
26263 |
+
"loss": 0.0201,
|
26264 |
+
"step": 3735
|
26265 |
+
},
|
26266 |
+
{
|
26267 |
+
"epoch": 9.896688741721855,
|
26268 |
+
"grad_norm": 2.2357382774353027,
|
26269 |
+
"learning_rate": 4.5092838196286476e-06,
|
26270 |
+
"loss": 0.0515,
|
26271 |
+
"step": 3736
|
26272 |
+
},
|
26273 |
+
{
|
26274 |
+
"epoch": 9.89933774834437,
|
26275 |
+
"grad_norm": 0.7119673490524292,
|
26276 |
+
"learning_rate": 4.376657824933687e-06,
|
26277 |
+
"loss": 0.0209,
|
26278 |
+
"step": 3737
|
26279 |
+
},
|
26280 |
+
{
|
26281 |
+
"epoch": 9.901986754966888,
|
26282 |
+
"grad_norm": 0.9527900815010071,
|
26283 |
+
"learning_rate": 4.244031830238726e-06,
|
26284 |
+
"loss": 0.0246,
|
26285 |
+
"step": 3738
|
26286 |
+
},
|
26287 |
+
{
|
26288 |
+
"epoch": 9.904635761589404,
|
26289 |
+
"grad_norm": 0.7238677740097046,
|
26290 |
+
"learning_rate": 4.111405835543767e-06,
|
26291 |
+
"loss": 0.0244,
|
26292 |
+
"step": 3739
|
26293 |
+
},
|
26294 |
+
{
|
26295 |
+
"epoch": 9.90728476821192,
|
26296 |
+
"grad_norm": 0.7796650528907776,
|
26297 |
+
"learning_rate": 3.978779840848806e-06,
|
26298 |
+
"loss": 0.0198,
|
26299 |
+
"step": 3740
|
26300 |
+
},
|
26301 |
+
{
|
26302 |
+
"epoch": 9.909933774834437,
|
26303 |
+
"grad_norm": 0.9221249222755432,
|
26304 |
+
"learning_rate": 3.846153846153847e-06,
|
26305 |
+
"loss": 0.0225,
|
26306 |
+
"step": 3741
|
26307 |
+
},
|
26308 |
+
{
|
26309 |
+
"epoch": 9.912582781456953,
|
26310 |
+
"grad_norm": 1.4351009130477905,
|
26311 |
+
"learning_rate": 3.713527851458886e-06,
|
26312 |
+
"loss": 0.0317,
|
26313 |
+
"step": 3742
|
26314 |
+
},
|
26315 |
+
{
|
26316 |
+
"epoch": 9.91523178807947,
|
26317 |
+
"grad_norm": 2.418729543685913,
|
26318 |
+
"learning_rate": 3.580901856763926e-06,
|
26319 |
+
"loss": 0.043,
|
26320 |
+
"step": 3743
|
26321 |
+
},
|
26322 |
+
{
|
26323 |
+
"epoch": 9.917880794701986,
|
26324 |
+
"grad_norm": 1.8398008346557617,
|
26325 |
+
"learning_rate": 3.4482758620689654e-06,
|
26326 |
+
"loss": 0.0378,
|
26327 |
+
"step": 3744
|
26328 |
+
},
|
26329 |
+
{
|
26330 |
+
"epoch": 9.920529801324504,
|
26331 |
+
"grad_norm": 0.809978187084198,
|
26332 |
+
"learning_rate": 3.3156498673740055e-06,
|
26333 |
+
"loss": 0.0217,
|
26334 |
+
"step": 3745
|
26335 |
+
},
|
26336 |
+
{
|
26337 |
+
"epoch": 9.92317880794702,
|
26338 |
+
"grad_norm": 4.460946083068848,
|
26339 |
+
"learning_rate": 3.183023872679045e-06,
|
26340 |
+
"loss": 0.0753,
|
26341 |
+
"step": 3746
|
26342 |
+
},
|
26343 |
+
{
|
26344 |
+
"epoch": 9.925827814569537,
|
26345 |
+
"grad_norm": 1.242308497428894,
|
26346 |
+
"learning_rate": 3.050397877984085e-06,
|
26347 |
+
"loss": 0.0222,
|
26348 |
+
"step": 3747
|
26349 |
+
},
|
26350 |
+
{
|
26351 |
+
"epoch": 9.928476821192053,
|
26352 |
+
"grad_norm": 0.6029403805732727,
|
26353 |
+
"learning_rate": 2.9177718832891245e-06,
|
26354 |
+
"loss": 0.0191,
|
26355 |
+
"step": 3748
|
26356 |
+
},
|
26357 |
+
{
|
26358 |
+
"epoch": 9.931125827814569,
|
26359 |
+
"grad_norm": 2.6921770572662354,
|
26360 |
+
"learning_rate": 2.7851458885941646e-06,
|
26361 |
+
"loss": 0.0325,
|
26362 |
+
"step": 3749
|
26363 |
+
},
|
26364 |
+
{
|
26365 |
+
"epoch": 9.933774834437086,
|
26366 |
+
"grad_norm": 0.6772416830062866,
|
26367 |
+
"learning_rate": 2.6525198938992043e-06,
|
26368 |
+
"loss": 0.0196,
|
26369 |
+
"step": 3750
|
26370 |
+
},
|
26371 |
+
{
|
26372 |
+
"epoch": 9.936423841059602,
|
26373 |
+
"grad_norm": 0.4584940969944,
|
26374 |
+
"learning_rate": 2.519893899204244e-06,
|
26375 |
+
"loss": 0.0174,
|
26376 |
+
"step": 3751
|
26377 |
+
},
|
26378 |
+
{
|
26379 |
+
"epoch": 9.93907284768212,
|
26380 |
+
"grad_norm": 0.4869374632835388,
|
26381 |
+
"learning_rate": 2.3872679045092837e-06,
|
26382 |
+
"loss": 0.0156,
|
26383 |
+
"step": 3752
|
26384 |
+
},
|
26385 |
+
{
|
26386 |
+
"epoch": 9.941721854304635,
|
26387 |
+
"grad_norm": 6.000686168670654,
|
26388 |
+
"learning_rate": 2.2546419098143238e-06,
|
26389 |
+
"loss": 0.0857,
|
26390 |
+
"step": 3753
|
26391 |
+
},
|
26392 |
+
{
|
26393 |
+
"epoch": 9.944370860927153,
|
26394 |
+
"grad_norm": 3.851257085800171,
|
26395 |
+
"learning_rate": 2.122015915119363e-06,
|
26396 |
+
"loss": 0.0529,
|
26397 |
+
"step": 3754
|
26398 |
+
},
|
26399 |
+
{
|
26400 |
+
"epoch": 9.947019867549669,
|
26401 |
+
"grad_norm": 0.6516218185424805,
|
26402 |
+
"learning_rate": 1.989389920424403e-06,
|
26403 |
+
"loss": 0.0205,
|
26404 |
+
"step": 3755
|
26405 |
+
},
|
26406 |
+
{
|
26407 |
+
"epoch": 9.949668874172186,
|
26408 |
+
"grad_norm": 1.1415655612945557,
|
26409 |
+
"learning_rate": 1.856763925729443e-06,
|
26410 |
+
"loss": 0.0283,
|
26411 |
+
"step": 3756
|
26412 |
+
},
|
26413 |
+
{
|
26414 |
+
"epoch": 9.952317880794702,
|
26415 |
+
"grad_norm": 2.023425817489624,
|
26416 |
+
"learning_rate": 1.7241379310344827e-06,
|
26417 |
+
"loss": 0.0357,
|
26418 |
+
"step": 3757
|
26419 |
+
},
|
26420 |
+
{
|
26421 |
+
"epoch": 9.954966887417218,
|
26422 |
+
"grad_norm": 1.8523999452590942,
|
26423 |
+
"learning_rate": 1.5915119363395226e-06,
|
26424 |
+
"loss": 0.0352,
|
26425 |
+
"step": 3758
|
26426 |
+
},
|
26427 |
+
{
|
26428 |
+
"epoch": 9.957615894039735,
|
26429 |
+
"grad_norm": 1.5999332666397095,
|
26430 |
+
"learning_rate": 1.4588859416445623e-06,
|
26431 |
+
"loss": 0.0345,
|
26432 |
+
"step": 3759
|
26433 |
+
},
|
26434 |
+
{
|
26435 |
+
"epoch": 9.960264900662251,
|
26436 |
+
"grad_norm": 2.143679618835449,
|
26437 |
+
"learning_rate": 1.3262599469496022e-06,
|
26438 |
+
"loss": 0.0338,
|
26439 |
+
"step": 3760
|
26440 |
+
},
|
26441 |
+
{
|
26442 |
+
"epoch": 9.962913907284769,
|
26443 |
+
"grad_norm": 0.6662954092025757,
|
26444 |
+
"learning_rate": 1.1936339522546418e-06,
|
26445 |
+
"loss": 0.0227,
|
26446 |
+
"step": 3761
|
26447 |
+
},
|
26448 |
+
{
|
26449 |
+
"epoch": 9.965562913907284,
|
26450 |
+
"grad_norm": 2.4478042125701904,
|
26451 |
+
"learning_rate": 1.0610079575596815e-06,
|
26452 |
+
"loss": 0.0329,
|
26453 |
+
"step": 3762
|
26454 |
+
},
|
26455 |
+
{
|
26456 |
+
"epoch": 9.968211920529802,
|
26457 |
+
"grad_norm": 1.0733085870742798,
|
26458 |
+
"learning_rate": 9.283819628647215e-07,
|
26459 |
+
"loss": 0.0254,
|
26460 |
+
"step": 3763
|
26461 |
+
},
|
26462 |
+
{
|
26463 |
+
"epoch": 9.970860927152318,
|
26464 |
+
"grad_norm": 1.583178162574768,
|
26465 |
+
"learning_rate": 7.957559681697613e-07,
|
26466 |
+
"loss": 0.027,
|
26467 |
+
"step": 3764
|
26468 |
+
},
|
26469 |
+
{
|
26470 |
+
"epoch": 9.973509933774835,
|
26471 |
+
"grad_norm": 2.4378557205200195,
|
26472 |
+
"learning_rate": 6.631299734748011e-07,
|
26473 |
+
"loss": 0.0308,
|
26474 |
+
"step": 3765
|
26475 |
+
},
|
26476 |
+
{
|
26477 |
+
"epoch": 9.976158940397351,
|
26478 |
+
"grad_norm": 1.4309372901916504,
|
26479 |
+
"learning_rate": 5.305039787798408e-07,
|
26480 |
+
"loss": 0.0257,
|
26481 |
+
"step": 3766
|
26482 |
+
},
|
26483 |
+
{
|
26484 |
+
"epoch": 9.978807947019867,
|
26485 |
+
"grad_norm": 1.0006424188613892,
|
26486 |
+
"learning_rate": 3.9787798408488065e-07,
|
26487 |
+
"loss": 0.0271,
|
26488 |
+
"step": 3767
|
26489 |
+
},
|
26490 |
+
{
|
26491 |
+
"epoch": 9.981456953642384,
|
26492 |
+
"grad_norm": 0.8016912937164307,
|
26493 |
+
"learning_rate": 2.652519893899204e-07,
|
26494 |
+
"loss": 0.0198,
|
26495 |
+
"step": 3768
|
26496 |
+
},
|
26497 |
+
{
|
26498 |
+
"epoch": 9.9841059602649,
|
26499 |
+
"grad_norm": 1.2243459224700928,
|
26500 |
+
"learning_rate": 1.326259946949602e-07,
|
26501 |
+
"loss": 0.029,
|
26502 |
+
"step": 3769
|
26503 |
+
},
|
26504 |
+
{
|
26505 |
+
"epoch": 9.986754966887418,
|
26506 |
+
"grad_norm": 1.4527411460876465,
|
26507 |
+
"learning_rate": 0.0,
|
26508 |
+
"loss": 0.0273,
|
26509 |
+
"step": 3770
|
26510 |
}
|
26511 |
],
|
26512 |
"logging_steps": 1,
|
|
|
26521 |
"should_evaluate": false,
|
26522 |
"should_log": false,
|
26523 |
"should_save": true,
|
26524 |
+
"should_training_stop": true
|
26525 |
},
|
26526 |
"attributes": {}
|
26527 |
}
|
26528 |
},
|
26529 |
+
"total_flos": 2.1337935347515392e+20,
|
26530 |
"train_batch_size": 4,
|
26531 |
"trial_name": null,
|
26532 |
"trial_params": null
|