Upload folder using huggingface_hub
Browse files- global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step5000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- global_step5000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- global_step5000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- global_step5000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- global_step5000/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- global_step5000/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- global_step5000/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- global_step5000/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- global_step5000/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- global_step5000/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- global_step5000/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- global_step5000/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- latest +1 -1
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3503 -3
global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06466086dee701cbe722ae107fb8ec1a80970d141bea0d18508fbfe1ad670a05
|
3 |
+
size 5939820828
|
global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea84d69e24e3c321c331e60029e3541824eec5931ea5012d3bac14515d817391
|
3 |
+
size 5939820828
|
global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a076cbb8b3b6c0c8f4817cc1c07fc3c9baa867470480278f4bfeb0bc369541d
|
3 |
+
size 5939820828
|
global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b6f2ce026059fb91e597626bb7c57101df78b9f1293442290c1d19e28461b6f
|
3 |
+
size 5939820828
|
global_step5000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72544dffe76ae23965289b4cc067f4cde4285a83f467893b40633b93d1d0d278
|
3 |
+
size 5939820828
|
global_step5000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:447808c131d563e37d25a824342c1b66d012fcd484cbb838a5f64d8a2ff41910
|
3 |
+
size 5939820828
|
global_step5000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62b232a5831c1233293017065e950b48a46baf6e4c8857ace6868ef49c17f032
|
3 |
+
size 5939820828
|
global_step5000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b72b5665ae4d2185c4c6416ce28f5a0ef7e553c2f0045d2258fd95eb0158d6e
|
3 |
+
size 5939820828
|
global_step5000/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:452e4b05e3c56dc57b8e75c9c4f97810420fa5a599cfd0f71f474e088723677b
|
3 |
+
size 107706008
|
global_step5000/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f97d5fe0b381c3da18941c4a226c9199f6da1a78190a5166271a97dcb5b5516
|
3 |
+
size 107706008
|
global_step5000/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8946799e3c78f06898c0cee757f52d4a3d67e3098ccd04ebe46592c36259b684
|
3 |
+
size 107706008
|
global_step5000/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74e01dd094883b1b5ac19a670aeb18171b9c211b46315c415b0b9c14fa827908
|
3 |
+
size 107706008
|
global_step5000/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:614331d549444f501f441e7673f7f00ec63b946fe6c5727beeb43d8168519ff6
|
3 |
+
size 107706008
|
global_step5000/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c45f9b8ba995f620647bb93b8cdf9e54cf762aaf03b8f4bbe120ea7d0b768475
|
3 |
+
size 107706008
|
global_step5000/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:320e3f915838a1188b740ff78746b8e669856dd11ef0056722e42dc7b526b057
|
3 |
+
size 107706008
|
global_step5000/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a42e9f9aacd9ce8f5674cee935e6443fe71253eb88c057c3274e524570b5772
|
3 |
+
size 107706008
|
latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step5000
|
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4989973456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2407ec1ec8fa85107e8ebc1b659b5435c3a5bc8ca6bb1741dd92c7139fec22e8
|
3 |
size 4989973456
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3786358064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ff1c1883d58d4062b6ff60c91c98c018617933e570cbcbcdc665b56143a0ece
|
3 |
size 3786358064
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38371fe746a8513231631b5bd32374702b655c44b4e2865b7a45576402f7015f
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -31507,6 +31507,3506 @@
|
|
31507 |
"learning_rate": 9.19645679769382e-07,
|
31508 |
"loss": 0.8809,
|
31509 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31510 |
}
|
31511 |
],
|
31512 |
"logging_steps": 1.0,
|
@@ -31514,7 +35014,7 @@
|
|
31514 |
"num_input_tokens_seen": 0,
|
31515 |
"num_train_epochs": 1,
|
31516 |
"save_steps": 500,
|
31517 |
-
"total_flos":
|
31518 |
"train_batch_size": 4,
|
31519 |
"trial_name": null,
|
31520 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9628345850182939,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
31507 |
"learning_rate": 9.19645679769382e-07,
|
31508 |
"loss": 0.8809,
|
31509 |
"step": 4500
|
31510 |
+
},
|
31511 |
+
{
|
31512 |
+
"epoch": 0.87,
|
31513 |
+
"grad_norm": 1.594906527805702,
|
31514 |
+
"learning_rate": 9.170347950523506e-07,
|
31515 |
+
"loss": 0.8415,
|
31516 |
+
"step": 4501
|
31517 |
+
},
|
31518 |
+
{
|
31519 |
+
"epoch": 0.87,
|
31520 |
+
"grad_norm": 1.6839026327500048,
|
31521 |
+
"learning_rate": 9.144274436594558e-07,
|
31522 |
+
"loss": 0.8076,
|
31523 |
+
"step": 4502
|
31524 |
+
},
|
31525 |
+
{
|
31526 |
+
"epoch": 0.87,
|
31527 |
+
"grad_norm": 1.5852937015977455,
|
31528 |
+
"learning_rate": 9.118236266049707e-07,
|
31529 |
+
"loss": 0.8393,
|
31530 |
+
"step": 4503
|
31531 |
+
},
|
31532 |
+
{
|
31533 |
+
"epoch": 0.87,
|
31534 |
+
"grad_norm": 1.482828251223841,
|
31535 |
+
"learning_rate": 9.092233449017962e-07,
|
31536 |
+
"loss": 0.8594,
|
31537 |
+
"step": 4504
|
31538 |
+
},
|
31539 |
+
{
|
31540 |
+
"epoch": 0.87,
|
31541 |
+
"grad_norm": 1.454387680780983,
|
31542 |
+
"learning_rate": 9.066265995614554e-07,
|
31543 |
+
"loss": 0.7132,
|
31544 |
+
"step": 4505
|
31545 |
+
},
|
31546 |
+
{
|
31547 |
+
"epoch": 0.87,
|
31548 |
+
"grad_norm": 1.6223084567765689,
|
31549 |
+
"learning_rate": 9.040333915940991e-07,
|
31550 |
+
"loss": 0.7961,
|
31551 |
+
"step": 4506
|
31552 |
+
},
|
31553 |
+
{
|
31554 |
+
"epoch": 0.87,
|
31555 |
+
"grad_norm": 1.6762400840034626,
|
31556 |
+
"learning_rate": 9.014437220084948e-07,
|
31557 |
+
"loss": 0.8379,
|
31558 |
+
"step": 4507
|
31559 |
+
},
|
31560 |
+
{
|
31561 |
+
"epoch": 0.87,
|
31562 |
+
"grad_norm": 1.5407728498704398,
|
31563 |
+
"learning_rate": 8.988575918120413e-07,
|
31564 |
+
"loss": 0.7942,
|
31565 |
+
"step": 4508
|
31566 |
+
},
|
31567 |
+
{
|
31568 |
+
"epoch": 0.87,
|
31569 |
+
"grad_norm": 1.8192484009488412,
|
31570 |
+
"learning_rate": 8.96275002010758e-07,
|
31571 |
+
"loss": 0.8772,
|
31572 |
+
"step": 4509
|
31573 |
+
},
|
31574 |
+
{
|
31575 |
+
"epoch": 0.87,
|
31576 |
+
"grad_norm": 1.7400925959795057,
|
31577 |
+
"learning_rate": 8.93695953609286e-07,
|
31578 |
+
"loss": 0.8795,
|
31579 |
+
"step": 4510
|
31580 |
+
},
|
31581 |
+
{
|
31582 |
+
"epoch": 0.87,
|
31583 |
+
"grad_norm": 1.6922514370220625,
|
31584 |
+
"learning_rate": 8.911204476108892e-07,
|
31585 |
+
"loss": 0.8421,
|
31586 |
+
"step": 4511
|
31587 |
+
},
|
31588 |
+
{
|
31589 |
+
"epoch": 0.87,
|
31590 |
+
"grad_norm": 1.5665789156211634,
|
31591 |
+
"learning_rate": 8.885484850174541e-07,
|
31592 |
+
"loss": 0.7943,
|
31593 |
+
"step": 4512
|
31594 |
+
},
|
31595 |
+
{
|
31596 |
+
"epoch": 0.87,
|
31597 |
+
"grad_norm": 1.4469490149019075,
|
31598 |
+
"learning_rate": 8.859800668294916e-07,
|
31599 |
+
"loss": 0.8074,
|
31600 |
+
"step": 4513
|
31601 |
+
},
|
31602 |
+
{
|
31603 |
+
"epoch": 0.87,
|
31604 |
+
"grad_norm": 1.753138597801452,
|
31605 |
+
"learning_rate": 8.834151940461255e-07,
|
31606 |
+
"loss": 0.8427,
|
31607 |
+
"step": 4514
|
31608 |
+
},
|
31609 |
+
{
|
31610 |
+
"epoch": 0.87,
|
31611 |
+
"grad_norm": 1.76427896737675,
|
31612 |
+
"learning_rate": 8.808538676651079e-07,
|
31613 |
+
"loss": 0.8607,
|
31614 |
+
"step": 4515
|
31615 |
+
},
|
31616 |
+
{
|
31617 |
+
"epoch": 0.87,
|
31618 |
+
"grad_norm": 1.6686741522110797,
|
31619 |
+
"learning_rate": 8.782960886828084e-07,
|
31620 |
+
"loss": 0.8144,
|
31621 |
+
"step": 4516
|
31622 |
+
},
|
31623 |
+
{
|
31624 |
+
"epoch": 0.87,
|
31625 |
+
"grad_norm": 1.5827402549651326,
|
31626 |
+
"learning_rate": 8.75741858094219e-07,
|
31627 |
+
"loss": 0.8336,
|
31628 |
+
"step": 4517
|
31629 |
+
},
|
31630 |
+
{
|
31631 |
+
"epoch": 0.87,
|
31632 |
+
"grad_norm": 1.6691552735982413,
|
31633 |
+
"learning_rate": 8.73191176892948e-07,
|
31634 |
+
"loss": 0.7777,
|
31635 |
+
"step": 4518
|
31636 |
+
},
|
31637 |
+
{
|
31638 |
+
"epoch": 0.87,
|
31639 |
+
"grad_norm": 1.569135781232643,
|
31640 |
+
"learning_rate": 8.706440460712251e-07,
|
31641 |
+
"loss": 0.8517,
|
31642 |
+
"step": 4519
|
31643 |
+
},
|
31644 |
+
{
|
31645 |
+
"epoch": 0.87,
|
31646 |
+
"grad_norm": 1.7146933616106483,
|
31647 |
+
"learning_rate": 8.681004666199011e-07,
|
31648 |
+
"loss": 0.8775,
|
31649 |
+
"step": 4520
|
31650 |
+
},
|
31651 |
+
{
|
31652 |
+
"epoch": 0.87,
|
31653 |
+
"grad_norm": 1.6239984857466807,
|
31654 |
+
"learning_rate": 8.655604395284378e-07,
|
31655 |
+
"loss": 0.8086,
|
31656 |
+
"step": 4521
|
31657 |
+
},
|
31658 |
+
{
|
31659 |
+
"epoch": 0.87,
|
31660 |
+
"grad_norm": 1.656663129562715,
|
31661 |
+
"learning_rate": 8.630239657849215e-07,
|
31662 |
+
"loss": 0.8586,
|
31663 |
+
"step": 4522
|
31664 |
+
},
|
31665 |
+
{
|
31666 |
+
"epoch": 0.87,
|
31667 |
+
"grad_norm": 1.6751159708848744,
|
31668 |
+
"learning_rate": 8.604910463760585e-07,
|
31669 |
+
"loss": 0.8371,
|
31670 |
+
"step": 4523
|
31671 |
+
},
|
31672 |
+
{
|
31673 |
+
"epoch": 0.87,
|
31674 |
+
"grad_norm": 1.5923042899442412,
|
31675 |
+
"learning_rate": 8.579616822871628e-07,
|
31676 |
+
"loss": 0.8247,
|
31677 |
+
"step": 4524
|
31678 |
+
},
|
31679 |
+
{
|
31680 |
+
"epoch": 0.87,
|
31681 |
+
"grad_norm": 1.6631348630525378,
|
31682 |
+
"learning_rate": 8.554358745021741e-07,
|
31683 |
+
"loss": 0.8895,
|
31684 |
+
"step": 4525
|
31685 |
+
},
|
31686 |
+
{
|
31687 |
+
"epoch": 0.87,
|
31688 |
+
"grad_norm": 1.7292854021155024,
|
31689 |
+
"learning_rate": 8.529136240036439e-07,
|
31690 |
+
"loss": 0.8041,
|
31691 |
+
"step": 4526
|
31692 |
+
},
|
31693 |
+
{
|
31694 |
+
"epoch": 0.87,
|
31695 |
+
"grad_norm": 1.6301954531826386,
|
31696 |
+
"learning_rate": 8.503949317727444e-07,
|
31697 |
+
"loss": 0.8314,
|
31698 |
+
"step": 4527
|
31699 |
+
},
|
31700 |
+
{
|
31701 |
+
"epoch": 0.87,
|
31702 |
+
"grad_norm": 1.8784379544296637,
|
31703 |
+
"learning_rate": 8.478797987892595e-07,
|
31704 |
+
"loss": 0.7901,
|
31705 |
+
"step": 4528
|
31706 |
+
},
|
31707 |
+
{
|
31708 |
+
"epoch": 0.87,
|
31709 |
+
"grad_norm": 1.5767654640020843,
|
31710 |
+
"learning_rate": 8.45368226031592e-07,
|
31711 |
+
"loss": 0.8215,
|
31712 |
+
"step": 4529
|
31713 |
+
},
|
31714 |
+
{
|
31715 |
+
"epoch": 0.87,
|
31716 |
+
"grad_norm": 1.5956433850813396,
|
31717 |
+
"learning_rate": 8.42860214476754e-07,
|
31718 |
+
"loss": 0.7906,
|
31719 |
+
"step": 4530
|
31720 |
+
},
|
31721 |
+
{
|
31722 |
+
"epoch": 0.87,
|
31723 |
+
"grad_norm": 1.9037041961425434,
|
31724 |
+
"learning_rate": 8.403557651003779e-07,
|
31725 |
+
"loss": 0.8136,
|
31726 |
+
"step": 4531
|
31727 |
+
},
|
31728 |
+
{
|
31729 |
+
"epoch": 0.87,
|
31730 |
+
"grad_norm": 1.7133031905826972,
|
31731 |
+
"learning_rate": 8.378548788767083e-07,
|
31732 |
+
"loss": 0.7756,
|
31733 |
+
"step": 4532
|
31734 |
+
},
|
31735 |
+
{
|
31736 |
+
"epoch": 0.87,
|
31737 |
+
"grad_norm": 1.6653051307639297,
|
31738 |
+
"learning_rate": 8.353575567786032e-07,
|
31739 |
+
"loss": 0.8682,
|
31740 |
+
"step": 4533
|
31741 |
+
},
|
31742 |
+
{
|
31743 |
+
"epoch": 0.87,
|
31744 |
+
"grad_norm": 1.6041819040966523,
|
31745 |
+
"learning_rate": 8.328637997775368e-07,
|
31746 |
+
"loss": 0.8277,
|
31747 |
+
"step": 4534
|
31748 |
+
},
|
31749 |
+
{
|
31750 |
+
"epoch": 0.87,
|
31751 |
+
"grad_norm": 1.788934035577091,
|
31752 |
+
"learning_rate": 8.303736088435921e-07,
|
31753 |
+
"loss": 0.9114,
|
31754 |
+
"step": 4535
|
31755 |
+
},
|
31756 |
+
{
|
31757 |
+
"epoch": 0.87,
|
31758 |
+
"grad_norm": 1.5686609721094282,
|
31759 |
+
"learning_rate": 8.278869849454718e-07,
|
31760 |
+
"loss": 0.809,
|
31761 |
+
"step": 4536
|
31762 |
+
},
|
31763 |
+
{
|
31764 |
+
"epoch": 0.87,
|
31765 |
+
"grad_norm": 1.4676365965258953,
|
31766 |
+
"learning_rate": 8.2540392905048e-07,
|
31767 |
+
"loss": 0.8122,
|
31768 |
+
"step": 4537
|
31769 |
+
},
|
31770 |
+
{
|
31771 |
+
"epoch": 0.87,
|
31772 |
+
"grad_norm": 1.6464117299937717,
|
31773 |
+
"learning_rate": 8.22924442124543e-07,
|
31774 |
+
"loss": 0.8284,
|
31775 |
+
"step": 4538
|
31776 |
+
},
|
31777 |
+
{
|
31778 |
+
"epoch": 0.87,
|
31779 |
+
"grad_norm": 1.5900447488545841,
|
31780 |
+
"learning_rate": 8.204485251321947e-07,
|
31781 |
+
"loss": 0.8182,
|
31782 |
+
"step": 4539
|
31783 |
+
},
|
31784 |
+
{
|
31785 |
+
"epoch": 0.87,
|
31786 |
+
"grad_norm": 1.5356577844841082,
|
31787 |
+
"learning_rate": 8.179761790365803e-07,
|
31788 |
+
"loss": 0.7832,
|
31789 |
+
"step": 4540
|
31790 |
+
},
|
31791 |
+
{
|
31792 |
+
"epoch": 0.87,
|
31793 |
+
"grad_norm": 1.692243945185495,
|
31794 |
+
"learning_rate": 8.15507404799456e-07,
|
31795 |
+
"loss": 0.9026,
|
31796 |
+
"step": 4541
|
31797 |
+
},
|
31798 |
+
{
|
31799 |
+
"epoch": 0.87,
|
31800 |
+
"grad_norm": 1.6824591329207468,
|
31801 |
+
"learning_rate": 8.130422033811892e-07,
|
31802 |
+
"loss": 0.8577,
|
31803 |
+
"step": 4542
|
31804 |
+
},
|
31805 |
+
{
|
31806 |
+
"epoch": 0.87,
|
31807 |
+
"grad_norm": 1.6332709059097492,
|
31808 |
+
"learning_rate": 8.105805757407592e-07,
|
31809 |
+
"loss": 0.8176,
|
31810 |
+
"step": 4543
|
31811 |
+
},
|
31812 |
+
{
|
31813 |
+
"epoch": 0.88,
|
31814 |
+
"grad_norm": 1.0034749651827926,
|
31815 |
+
"learning_rate": 8.081225228357481e-07,
|
31816 |
+
"loss": 0.7566,
|
31817 |
+
"step": 4544
|
31818 |
+
},
|
31819 |
+
{
|
31820 |
+
"epoch": 0.88,
|
31821 |
+
"grad_norm": 1.5996492708651493,
|
31822 |
+
"learning_rate": 8.056680456223553e-07,
|
31823 |
+
"loss": 0.8032,
|
31824 |
+
"step": 4545
|
31825 |
+
},
|
31826 |
+
{
|
31827 |
+
"epoch": 0.88,
|
31828 |
+
"grad_norm": 0.9290414679374163,
|
31829 |
+
"learning_rate": 8.03217145055385e-07,
|
31830 |
+
"loss": 0.7635,
|
31831 |
+
"step": 4546
|
31832 |
+
},
|
31833 |
+
{
|
31834 |
+
"epoch": 0.88,
|
31835 |
+
"grad_norm": 1.637370705811582,
|
31836 |
+
"learning_rate": 8.007698220882521e-07,
|
31837 |
+
"loss": 0.8748,
|
31838 |
+
"step": 4547
|
31839 |
+
},
|
31840 |
+
{
|
31841 |
+
"epoch": 0.88,
|
31842 |
+
"grad_norm": 1.6635441096211938,
|
31843 |
+
"learning_rate": 7.983260776729773e-07,
|
31844 |
+
"loss": 0.8472,
|
31845 |
+
"step": 4548
|
31846 |
+
},
|
31847 |
+
{
|
31848 |
+
"epoch": 0.88,
|
31849 |
+
"grad_norm": 1.5998342354299384,
|
31850 |
+
"learning_rate": 7.958859127601937e-07,
|
31851 |
+
"loss": 0.8708,
|
31852 |
+
"step": 4549
|
31853 |
+
},
|
31854 |
+
{
|
31855 |
+
"epoch": 0.88,
|
31856 |
+
"grad_norm": 0.9729592102274293,
|
31857 |
+
"learning_rate": 7.934493282991373e-07,
|
31858 |
+
"loss": 0.7931,
|
31859 |
+
"step": 4550
|
31860 |
+
},
|
31861 |
+
{
|
31862 |
+
"epoch": 0.88,
|
31863 |
+
"grad_norm": 1.6456794482605581,
|
31864 |
+
"learning_rate": 7.910163252376524e-07,
|
31865 |
+
"loss": 0.7578,
|
31866 |
+
"step": 4551
|
31867 |
+
},
|
31868 |
+
{
|
31869 |
+
"epoch": 0.88,
|
31870 |
+
"grad_norm": 1.5501096019336218,
|
31871 |
+
"learning_rate": 7.885869045221917e-07,
|
31872 |
+
"loss": 0.8705,
|
31873 |
+
"step": 4552
|
31874 |
+
},
|
31875 |
+
{
|
31876 |
+
"epoch": 0.88,
|
31877 |
+
"grad_norm": 1.5009277862429256,
|
31878 |
+
"learning_rate": 7.861610670978126e-07,
|
31879 |
+
"loss": 0.8719,
|
31880 |
+
"step": 4553
|
31881 |
+
},
|
31882 |
+
{
|
31883 |
+
"epoch": 0.88,
|
31884 |
+
"grad_norm": 1.6530036454212895,
|
31885 |
+
"learning_rate": 7.837388139081803e-07,
|
31886 |
+
"loss": 0.839,
|
31887 |
+
"step": 4554
|
31888 |
+
},
|
31889 |
+
{
|
31890 |
+
"epoch": 0.88,
|
31891 |
+
"grad_norm": 1.728410895192731,
|
31892 |
+
"learning_rate": 7.813201458955644e-07,
|
31893 |
+
"loss": 0.7931,
|
31894 |
+
"step": 4555
|
31895 |
+
},
|
31896 |
+
{
|
31897 |
+
"epoch": 0.88,
|
31898 |
+
"grad_norm": 1.645971164332409,
|
31899 |
+
"learning_rate": 7.789050640008411e-07,
|
31900 |
+
"loss": 0.8329,
|
31901 |
+
"step": 4556
|
31902 |
+
},
|
31903 |
+
{
|
31904 |
+
"epoch": 0.88,
|
31905 |
+
"grad_norm": 1.8456320281211538,
|
31906 |
+
"learning_rate": 7.7649356916349e-07,
|
31907 |
+
"loss": 0.8624,
|
31908 |
+
"step": 4557
|
31909 |
+
},
|
31910 |
+
{
|
31911 |
+
"epoch": 0.88,
|
31912 |
+
"grad_norm": 1.5518984995792304,
|
31913 |
+
"learning_rate": 7.740856623215953e-07,
|
31914 |
+
"loss": 0.7869,
|
31915 |
+
"step": 4558
|
31916 |
+
},
|
31917 |
+
{
|
31918 |
+
"epoch": 0.88,
|
31919 |
+
"grad_norm": 1.701746462823959,
|
31920 |
+
"learning_rate": 7.716813444118476e-07,
|
31921 |
+
"loss": 0.8131,
|
31922 |
+
"step": 4559
|
31923 |
+
},
|
31924 |
+
{
|
31925 |
+
"epoch": 0.88,
|
31926 |
+
"grad_norm": 1.5597681704026225,
|
31927 |
+
"learning_rate": 7.692806163695377e-07,
|
31928 |
+
"loss": 0.8239,
|
31929 |
+
"step": 4560
|
31930 |
+
},
|
31931 |
+
{
|
31932 |
+
"epoch": 0.88,
|
31933 |
+
"grad_norm": 1.6461590533877206,
|
31934 |
+
"learning_rate": 7.668834791285651e-07,
|
31935 |
+
"loss": 0.9086,
|
31936 |
+
"step": 4561
|
31937 |
+
},
|
31938 |
+
{
|
31939 |
+
"epoch": 0.88,
|
31940 |
+
"grad_norm": 1.7135544196441648,
|
31941 |
+
"learning_rate": 7.644899336214273e-07,
|
31942 |
+
"loss": 0.899,
|
31943 |
+
"step": 4562
|
31944 |
+
},
|
31945 |
+
{
|
31946 |
+
"epoch": 0.88,
|
31947 |
+
"grad_norm": 1.6928781909018462,
|
31948 |
+
"learning_rate": 7.620999807792284e-07,
|
31949 |
+
"loss": 0.892,
|
31950 |
+
"step": 4563
|
31951 |
+
},
|
31952 |
+
{
|
31953 |
+
"epoch": 0.88,
|
31954 |
+
"grad_norm": 1.7778128461398606,
|
31955 |
+
"learning_rate": 7.597136215316737e-07,
|
31956 |
+
"loss": 0.8981,
|
31957 |
+
"step": 4564
|
31958 |
+
},
|
31959 |
+
{
|
31960 |
+
"epoch": 0.88,
|
31961 |
+
"grad_norm": 1.5417553658687158,
|
31962 |
+
"learning_rate": 7.573308568070681e-07,
|
31963 |
+
"loss": 0.8443,
|
31964 |
+
"step": 4565
|
31965 |
+
},
|
31966 |
+
{
|
31967 |
+
"epoch": 0.88,
|
31968 |
+
"grad_norm": 1.645822549835426,
|
31969 |
+
"learning_rate": 7.549516875323215e-07,
|
31970 |
+
"loss": 0.7982,
|
31971 |
+
"step": 4566
|
31972 |
+
},
|
31973 |
+
{
|
31974 |
+
"epoch": 0.88,
|
31975 |
+
"grad_norm": 1.5611574387528526,
|
31976 |
+
"learning_rate": 7.525761146329447e-07,
|
31977 |
+
"loss": 0.9295,
|
31978 |
+
"step": 4567
|
31979 |
+
},
|
31980 |
+
{
|
31981 |
+
"epoch": 0.88,
|
31982 |
+
"grad_norm": 1.611365281760663,
|
31983 |
+
"learning_rate": 7.502041390330472e-07,
|
31984 |
+
"loss": 0.7989,
|
31985 |
+
"step": 4568
|
31986 |
+
},
|
31987 |
+
{
|
31988 |
+
"epoch": 0.88,
|
31989 |
+
"grad_norm": 1.4307881929378798,
|
31990 |
+
"learning_rate": 7.478357616553433e-07,
|
31991 |
+
"loss": 0.8531,
|
31992 |
+
"step": 4569
|
31993 |
+
},
|
31994 |
+
{
|
31995 |
+
"epoch": 0.88,
|
31996 |
+
"grad_norm": 1.6613458669647516,
|
31997 |
+
"learning_rate": 7.454709834211438e-07,
|
31998 |
+
"loss": 0.8417,
|
31999 |
+
"step": 4570
|
32000 |
+
},
|
32001 |
+
{
|
32002 |
+
"epoch": 0.88,
|
32003 |
+
"grad_norm": 1.5988643310648785,
|
32004 |
+
"learning_rate": 7.431098052503594e-07,
|
32005 |
+
"loss": 0.8731,
|
32006 |
+
"step": 4571
|
32007 |
+
},
|
32008 |
+
{
|
32009 |
+
"epoch": 0.88,
|
32010 |
+
"grad_norm": 1.520700797069022,
|
32011 |
+
"learning_rate": 7.40752228061502e-07,
|
32012 |
+
"loss": 0.8015,
|
32013 |
+
"step": 4572
|
32014 |
+
},
|
32015 |
+
{
|
32016 |
+
"epoch": 0.88,
|
32017 |
+
"grad_norm": 1.6569071538578897,
|
32018 |
+
"learning_rate": 7.383982527716848e-07,
|
32019 |
+
"loss": 0.9232,
|
32020 |
+
"step": 4573
|
32021 |
+
},
|
32022 |
+
{
|
32023 |
+
"epoch": 0.88,
|
32024 |
+
"grad_norm": 1.6385053303931487,
|
32025 |
+
"learning_rate": 7.360478802966131e-07,
|
32026 |
+
"loss": 0.7837,
|
32027 |
+
"step": 4574
|
32028 |
+
},
|
32029 |
+
{
|
32030 |
+
"epoch": 0.88,
|
32031 |
+
"grad_norm": 1.58320715655672,
|
32032 |
+
"learning_rate": 7.337011115505976e-07,
|
32033 |
+
"loss": 0.7945,
|
32034 |
+
"step": 4575
|
32035 |
+
},
|
32036 |
+
{
|
32037 |
+
"epoch": 0.88,
|
32038 |
+
"grad_norm": 1.6038227379699823,
|
32039 |
+
"learning_rate": 7.313579474465437e-07,
|
32040 |
+
"loss": 0.8834,
|
32041 |
+
"step": 4576
|
32042 |
+
},
|
32043 |
+
{
|
32044 |
+
"epoch": 0.88,
|
32045 |
+
"grad_norm": 1.701399427841146,
|
32046 |
+
"learning_rate": 7.290183888959557e-07,
|
32047 |
+
"loss": 0.8813,
|
32048 |
+
"step": 4577
|
32049 |
+
},
|
32050 |
+
{
|
32051 |
+
"epoch": 0.88,
|
32052 |
+
"grad_norm": 1.671771680207799,
|
32053 |
+
"learning_rate": 7.266824368089342e-07,
|
32054 |
+
"loss": 0.7681,
|
32055 |
+
"step": 4578
|
32056 |
+
},
|
32057 |
+
{
|
32058 |
+
"epoch": 0.88,
|
32059 |
+
"grad_norm": 0.9405244410682378,
|
32060 |
+
"learning_rate": 7.243500920941793e-07,
|
32061 |
+
"loss": 0.8179,
|
32062 |
+
"step": 4579
|
32063 |
+
},
|
32064 |
+
{
|
32065 |
+
"epoch": 0.88,
|
32066 |
+
"grad_norm": 1.6971553092278484,
|
32067 |
+
"learning_rate": 7.220213556589851e-07,
|
32068 |
+
"loss": 0.8041,
|
32069 |
+
"step": 4580
|
32070 |
+
},
|
32071 |
+
{
|
32072 |
+
"epoch": 0.88,
|
32073 |
+
"grad_norm": 1.6092722366159369,
|
32074 |
+
"learning_rate": 7.196962284092423e-07,
|
32075 |
+
"loss": 0.906,
|
32076 |
+
"step": 4581
|
32077 |
+
},
|
32078 |
+
{
|
32079 |
+
"epoch": 0.88,
|
32080 |
+
"grad_norm": 1.5790028990958835,
|
32081 |
+
"learning_rate": 7.173747112494389e-07,
|
32082 |
+
"loss": 0.9017,
|
32083 |
+
"step": 4582
|
32084 |
+
},
|
32085 |
+
{
|
32086 |
+
"epoch": 0.88,
|
32087 |
+
"grad_norm": 1.6613994467242639,
|
32088 |
+
"learning_rate": 7.150568050826579e-07,
|
32089 |
+
"loss": 0.7846,
|
32090 |
+
"step": 4583
|
32091 |
+
},
|
32092 |
+
{
|
32093 |
+
"epoch": 0.88,
|
32094 |
+
"grad_norm": 1.7813916794522948,
|
32095 |
+
"learning_rate": 7.12742510810579e-07,
|
32096 |
+
"loss": 0.8844,
|
32097 |
+
"step": 4584
|
32098 |
+
},
|
32099 |
+
{
|
32100 |
+
"epoch": 0.88,
|
32101 |
+
"grad_norm": 1.6133888168185018,
|
32102 |
+
"learning_rate": 7.104318293334733e-07,
|
32103 |
+
"loss": 0.8234,
|
32104 |
+
"step": 4585
|
32105 |
+
},
|
32106 |
+
{
|
32107 |
+
"epoch": 0.88,
|
32108 |
+
"grad_norm": 1.6638283699302128,
|
32109 |
+
"learning_rate": 7.081247615502107e-07,
|
32110 |
+
"loss": 0.8494,
|
32111 |
+
"step": 4586
|
32112 |
+
},
|
32113 |
+
{
|
32114 |
+
"epoch": 0.88,
|
32115 |
+
"grad_norm": 1.713196915699892,
|
32116 |
+
"learning_rate": 7.058213083582532e-07,
|
32117 |
+
"loss": 0.7312,
|
32118 |
+
"step": 4587
|
32119 |
+
},
|
32120 |
+
{
|
32121 |
+
"epoch": 0.88,
|
32122 |
+
"grad_norm": 1.5004791256135879,
|
32123 |
+
"learning_rate": 7.035214706536564e-07,
|
32124 |
+
"loss": 0.7935,
|
32125 |
+
"step": 4588
|
32126 |
+
},
|
32127 |
+
{
|
32128 |
+
"epoch": 0.88,
|
32129 |
+
"grad_norm": 1.5682227642510727,
|
32130 |
+
"learning_rate": 7.012252493310689e-07,
|
32131 |
+
"loss": 0.762,
|
32132 |
+
"step": 4589
|
32133 |
+
},
|
32134 |
+
{
|
32135 |
+
"epoch": 0.88,
|
32136 |
+
"grad_norm": 1.6640041441800169,
|
32137 |
+
"learning_rate": 6.989326452837364e-07,
|
32138 |
+
"loss": 0.7774,
|
32139 |
+
"step": 4590
|
32140 |
+
},
|
32141 |
+
{
|
32142 |
+
"epoch": 0.88,
|
32143 |
+
"grad_norm": 1.7428000494600797,
|
32144 |
+
"learning_rate": 6.96643659403492e-07,
|
32145 |
+
"loss": 0.863,
|
32146 |
+
"step": 4591
|
32147 |
+
},
|
32148 |
+
{
|
32149 |
+
"epoch": 0.88,
|
32150 |
+
"grad_norm": 1.7553837389304079,
|
32151 |
+
"learning_rate": 6.943582925807646e-07,
|
32152 |
+
"loss": 0.8168,
|
32153 |
+
"step": 4592
|
32154 |
+
},
|
32155 |
+
{
|
32156 |
+
"epoch": 0.88,
|
32157 |
+
"grad_norm": 1.5911234884094496,
|
32158 |
+
"learning_rate": 6.920765457045753e-07,
|
32159 |
+
"loss": 0.7925,
|
32160 |
+
"step": 4593
|
32161 |
+
},
|
32162 |
+
{
|
32163 |
+
"epoch": 0.88,
|
32164 |
+
"grad_norm": 1.6509026867265733,
|
32165 |
+
"learning_rate": 6.897984196625385e-07,
|
32166 |
+
"loss": 0.871,
|
32167 |
+
"step": 4594
|
32168 |
+
},
|
32169 |
+
{
|
32170 |
+
"epoch": 0.88,
|
32171 |
+
"grad_norm": 0.9510394161604407,
|
32172 |
+
"learning_rate": 6.875239153408541e-07,
|
32173 |
+
"loss": 0.8161,
|
32174 |
+
"step": 4595
|
32175 |
+
},
|
32176 |
+
{
|
32177 |
+
"epoch": 0.89,
|
32178 |
+
"grad_norm": 1.536910091669346,
|
32179 |
+
"learning_rate": 6.852530336243179e-07,
|
32180 |
+
"loss": 0.8231,
|
32181 |
+
"step": 4596
|
32182 |
+
},
|
32183 |
+
{
|
32184 |
+
"epoch": 0.89,
|
32185 |
+
"grad_norm": 1.6302778327684713,
|
32186 |
+
"learning_rate": 6.829857753963154e-07,
|
32187 |
+
"loss": 0.8091,
|
32188 |
+
"step": 4597
|
32189 |
+
},
|
32190 |
+
{
|
32191 |
+
"epoch": 0.89,
|
32192 |
+
"grad_norm": 1.6899195310445059,
|
32193 |
+
"learning_rate": 6.807221415388243e-07,
|
32194 |
+
"loss": 0.8516,
|
32195 |
+
"step": 4598
|
32196 |
+
},
|
32197 |
+
{
|
32198 |
+
"epoch": 0.89,
|
32199 |
+
"grad_norm": 1.6688393652366873,
|
32200 |
+
"learning_rate": 6.784621329324104e-07,
|
32201 |
+
"loss": 0.8116,
|
32202 |
+
"step": 4599
|
32203 |
+
},
|
32204 |
+
{
|
32205 |
+
"epoch": 0.89,
|
32206 |
+
"grad_norm": 1.6968648933180437,
|
32207 |
+
"learning_rate": 6.76205750456228e-07,
|
32208 |
+
"loss": 0.8566,
|
32209 |
+
"step": 4600
|
32210 |
+
},
|
32211 |
+
{
|
32212 |
+
"epoch": 0.89,
|
32213 |
+
"grad_norm": 1.699073788028988,
|
32214 |
+
"learning_rate": 6.739529949880263e-07,
|
32215 |
+
"loss": 0.9198,
|
32216 |
+
"step": 4601
|
32217 |
+
},
|
32218 |
+
{
|
32219 |
+
"epoch": 0.89,
|
32220 |
+
"grad_norm": 1.0303363034885953,
|
32221 |
+
"learning_rate": 6.717038674041354e-07,
|
32222 |
+
"loss": 0.8691,
|
32223 |
+
"step": 4602
|
32224 |
+
},
|
32225 |
+
{
|
32226 |
+
"epoch": 0.89,
|
32227 |
+
"grad_norm": 1.7700745086786087,
|
32228 |
+
"learning_rate": 6.694583685794798e-07,
|
32229 |
+
"loss": 0.8631,
|
32230 |
+
"step": 4603
|
32231 |
+
},
|
32232 |
+
{
|
32233 |
+
"epoch": 0.89,
|
32234 |
+
"grad_norm": 1.7156230999545747,
|
32235 |
+
"learning_rate": 6.672164993875707e-07,
|
32236 |
+
"loss": 0.8802,
|
32237 |
+
"step": 4604
|
32238 |
+
},
|
32239 |
+
{
|
32240 |
+
"epoch": 0.89,
|
32241 |
+
"grad_norm": 1.7587124281022528,
|
32242 |
+
"learning_rate": 6.649782607005095e-07,
|
32243 |
+
"loss": 0.8197,
|
32244 |
+
"step": 4605
|
32245 |
+
},
|
32246 |
+
{
|
32247 |
+
"epoch": 0.89,
|
32248 |
+
"grad_norm": 1.5890448851947154,
|
32249 |
+
"learning_rate": 6.62743653388982e-07,
|
32250 |
+
"loss": 0.7421,
|
32251 |
+
"step": 4606
|
32252 |
+
},
|
32253 |
+
{
|
32254 |
+
"epoch": 0.89,
|
32255 |
+
"grad_norm": 1.6941583093864598,
|
32256 |
+
"learning_rate": 6.605126783222637e-07,
|
32257 |
+
"loss": 0.9055,
|
32258 |
+
"step": 4607
|
32259 |
+
},
|
32260 |
+
{
|
32261 |
+
"epoch": 0.89,
|
32262 |
+
"grad_norm": 1.9345631981398188,
|
32263 |
+
"learning_rate": 6.582853363682184e-07,
|
32264 |
+
"loss": 0.8264,
|
32265 |
+
"step": 4608
|
32266 |
+
},
|
32267 |
+
{
|
32268 |
+
"epoch": 0.89,
|
32269 |
+
"grad_norm": 1.5727895120331519,
|
32270 |
+
"learning_rate": 6.560616283932897e-07,
|
32271 |
+
"loss": 0.8729,
|
32272 |
+
"step": 4609
|
32273 |
+
},
|
32274 |
+
{
|
32275 |
+
"epoch": 0.89,
|
32276 |
+
"grad_norm": 1.7270552928379281,
|
32277 |
+
"learning_rate": 6.538415552625143e-07,
|
32278 |
+
"loss": 0.839,
|
32279 |
+
"step": 4610
|
32280 |
+
},
|
32281 |
+
{
|
32282 |
+
"epoch": 0.89,
|
32283 |
+
"grad_norm": 1.6423156022324654,
|
32284 |
+
"learning_rate": 6.51625117839515e-07,
|
32285 |
+
"loss": 0.9414,
|
32286 |
+
"step": 4611
|
32287 |
+
},
|
32288 |
+
{
|
32289 |
+
"epoch": 0.89,
|
32290 |
+
"grad_norm": 1.588099616516975,
|
32291 |
+
"learning_rate": 6.494123169864964e-07,
|
32292 |
+
"loss": 0.9061,
|
32293 |
+
"step": 4612
|
32294 |
+
},
|
32295 |
+
{
|
32296 |
+
"epoch": 0.89,
|
32297 |
+
"grad_norm": 1.6908422858313596,
|
32298 |
+
"learning_rate": 6.472031535642509e-07,
|
32299 |
+
"loss": 0.8322,
|
32300 |
+
"step": 4613
|
32301 |
+
},
|
32302 |
+
{
|
32303 |
+
"epoch": 0.89,
|
32304 |
+
"grad_norm": 1.6507473606262408,
|
32305 |
+
"learning_rate": 6.449976284321547e-07,
|
32306 |
+
"loss": 0.8885,
|
32307 |
+
"step": 4614
|
32308 |
+
},
|
32309 |
+
{
|
32310 |
+
"epoch": 0.89,
|
32311 |
+
"grad_norm": 1.6452922413130318,
|
32312 |
+
"learning_rate": 6.427957424481724e-07,
|
32313 |
+
"loss": 0.7852,
|
32314 |
+
"step": 4615
|
32315 |
+
},
|
32316 |
+
{
|
32317 |
+
"epoch": 0.89,
|
32318 |
+
"grad_norm": 1.5906868583803813,
|
32319 |
+
"learning_rate": 6.405974964688477e-07,
|
32320 |
+
"loss": 0.8616,
|
32321 |
+
"step": 4616
|
32322 |
+
},
|
32323 |
+
{
|
32324 |
+
"epoch": 0.89,
|
32325 |
+
"grad_norm": 1.8525527893589988,
|
32326 |
+
"learning_rate": 6.38402891349309e-07,
|
32327 |
+
"loss": 0.8991,
|
32328 |
+
"step": 4617
|
32329 |
+
},
|
32330 |
+
{
|
32331 |
+
"epoch": 0.89,
|
32332 |
+
"grad_norm": 1.77877164245849,
|
32333 |
+
"learning_rate": 6.36211927943271e-07,
|
32334 |
+
"loss": 0.92,
|
32335 |
+
"step": 4618
|
32336 |
+
},
|
32337 |
+
{
|
32338 |
+
"epoch": 0.89,
|
32339 |
+
"grad_norm": 1.679959528587235,
|
32340 |
+
"learning_rate": 6.34024607103032e-07,
|
32341 |
+
"loss": 0.8461,
|
32342 |
+
"step": 4619
|
32343 |
+
},
|
32344 |
+
{
|
32345 |
+
"epoch": 0.89,
|
32346 |
+
"grad_norm": 1.650275475813174,
|
32347 |
+
"learning_rate": 6.318409296794703e-07,
|
32348 |
+
"loss": 0.7834,
|
32349 |
+
"step": 4620
|
32350 |
+
},
|
32351 |
+
{
|
32352 |
+
"epoch": 0.89,
|
32353 |
+
"grad_norm": 1.7103844161253232,
|
32354 |
+
"learning_rate": 6.296608965220496e-07,
|
32355 |
+
"loss": 0.8533,
|
32356 |
+
"step": 4621
|
32357 |
+
},
|
32358 |
+
{
|
32359 |
+
"epoch": 0.89,
|
32360 |
+
"grad_norm": 1.5560480879884606,
|
32361 |
+
"learning_rate": 6.274845084788151e-07,
|
32362 |
+
"loss": 0.8134,
|
32363 |
+
"step": 4622
|
32364 |
+
},
|
32365 |
+
{
|
32366 |
+
"epoch": 0.89,
|
32367 |
+
"grad_norm": 1.6758532716694152,
|
32368 |
+
"learning_rate": 6.253117663963948e-07,
|
32369 |
+
"loss": 0.8064,
|
32370 |
+
"step": 4623
|
32371 |
+
},
|
32372 |
+
{
|
32373 |
+
"epoch": 0.89,
|
32374 |
+
"grad_norm": 1.703648351870057,
|
32375 |
+
"learning_rate": 6.23142671119995e-07,
|
32376 |
+
"loss": 0.8758,
|
32377 |
+
"step": 4624
|
32378 |
+
},
|
32379 |
+
{
|
32380 |
+
"epoch": 0.89,
|
32381 |
+
"grad_norm": 1.7443635783563851,
|
32382 |
+
"learning_rate": 6.209772234934075e-07,
|
32383 |
+
"loss": 0.9093,
|
32384 |
+
"step": 4625
|
32385 |
+
},
|
32386 |
+
{
|
32387 |
+
"epoch": 0.89,
|
32388 |
+
"grad_norm": 1.6349407103004254,
|
32389 |
+
"learning_rate": 6.188154243590028e-07,
|
32390 |
+
"loss": 0.8695,
|
32391 |
+
"step": 4626
|
32392 |
+
},
|
32393 |
+
{
|
32394 |
+
"epoch": 0.89,
|
32395 |
+
"grad_norm": 1.7170681228786828,
|
32396 |
+
"learning_rate": 6.166572745577326e-07,
|
32397 |
+
"loss": 0.8604,
|
32398 |
+
"step": 4627
|
32399 |
+
},
|
32400 |
+
{
|
32401 |
+
"epoch": 0.89,
|
32402 |
+
"grad_norm": 1.6149701805074526,
|
32403 |
+
"learning_rate": 6.145027749291299e-07,
|
32404 |
+
"loss": 0.7834,
|
32405 |
+
"step": 4628
|
32406 |
+
},
|
32407 |
+
{
|
32408 |
+
"epoch": 0.89,
|
32409 |
+
"grad_norm": 0.9629179919446664,
|
32410 |
+
"learning_rate": 6.123519263113075e-07,
|
32411 |
+
"loss": 0.7807,
|
32412 |
+
"step": 4629
|
32413 |
+
},
|
32414 |
+
{
|
32415 |
+
"epoch": 0.89,
|
32416 |
+
"grad_norm": 1.4310449112443988,
|
32417 |
+
"learning_rate": 6.102047295409585e-07,
|
32418 |
+
"loss": 0.8408,
|
32419 |
+
"step": 4630
|
32420 |
+
},
|
32421 |
+
{
|
32422 |
+
"epoch": 0.89,
|
32423 |
+
"grad_norm": 1.431591253406188,
|
32424 |
+
"learning_rate": 6.080611854533514e-07,
|
32425 |
+
"loss": 0.7416,
|
32426 |
+
"step": 4631
|
32427 |
+
},
|
32428 |
+
{
|
32429 |
+
"epoch": 0.89,
|
32430 |
+
"grad_norm": 1.452024940046649,
|
32431 |
+
"learning_rate": 6.059212948823379e-07,
|
32432 |
+
"loss": 0.7764,
|
32433 |
+
"step": 4632
|
32434 |
+
},
|
32435 |
+
{
|
32436 |
+
"epoch": 0.89,
|
32437 |
+
"grad_norm": 1.6641309102131858,
|
32438 |
+
"learning_rate": 6.037850586603478e-07,
|
32439 |
+
"loss": 0.9695,
|
32440 |
+
"step": 4633
|
32441 |
+
},
|
32442 |
+
{
|
32443 |
+
"epoch": 0.89,
|
32444 |
+
"grad_norm": 1.5963484121260585,
|
32445 |
+
"learning_rate": 6.016524776183886e-07,
|
32446 |
+
"loss": 0.77,
|
32447 |
+
"step": 4634
|
32448 |
+
},
|
32449 |
+
{
|
32450 |
+
"epoch": 0.89,
|
32451 |
+
"grad_norm": 1.7710370822654729,
|
32452 |
+
"learning_rate": 5.995235525860466e-07,
|
32453 |
+
"loss": 0.835,
|
32454 |
+
"step": 4635
|
32455 |
+
},
|
32456 |
+
{
|
32457 |
+
"epoch": 0.89,
|
32458 |
+
"grad_norm": 0.9238054758388379,
|
32459 |
+
"learning_rate": 5.97398284391486e-07,
|
32460 |
+
"loss": 0.8299,
|
32461 |
+
"step": 4636
|
32462 |
+
},
|
32463 |
+
{
|
32464 |
+
"epoch": 0.89,
|
32465 |
+
"grad_norm": 1.732216941168927,
|
32466 |
+
"learning_rate": 5.952766738614468e-07,
|
32467 |
+
"loss": 0.8778,
|
32468 |
+
"step": 4637
|
32469 |
+
},
|
32470 |
+
{
|
32471 |
+
"epoch": 0.89,
|
32472 |
+
"grad_norm": 1.618882221464819,
|
32473 |
+
"learning_rate": 5.931587218212498e-07,
|
32474 |
+
"loss": 0.7453,
|
32475 |
+
"step": 4638
|
32476 |
+
},
|
32477 |
+
{
|
32478 |
+
"epoch": 0.89,
|
32479 |
+
"grad_norm": 1.7275497499921202,
|
32480 |
+
"learning_rate": 5.91044429094787e-07,
|
32481 |
+
"loss": 0.8808,
|
32482 |
+
"step": 4639
|
32483 |
+
},
|
32484 |
+
{
|
32485 |
+
"epoch": 0.89,
|
32486 |
+
"grad_norm": 1.6947725598628667,
|
32487 |
+
"learning_rate": 5.889337965045305e-07,
|
32488 |
+
"loss": 0.817,
|
32489 |
+
"step": 4640
|
32490 |
+
},
|
32491 |
+
{
|
32492 |
+
"epoch": 0.89,
|
32493 |
+
"grad_norm": 1.645115300544802,
|
32494 |
+
"learning_rate": 5.868268248715292e-07,
|
32495 |
+
"loss": 0.8123,
|
32496 |
+
"step": 4641
|
32497 |
+
},
|
32498 |
+
{
|
32499 |
+
"epoch": 0.89,
|
32500 |
+
"grad_norm": 1.552338918938507,
|
32501 |
+
"learning_rate": 5.847235150154074e-07,
|
32502 |
+
"loss": 0.8338,
|
32503 |
+
"step": 4642
|
32504 |
+
},
|
32505 |
+
{
|
32506 |
+
"epoch": 0.89,
|
32507 |
+
"grad_norm": 1.6695952907682257,
|
32508 |
+
"learning_rate": 5.826238677543628e-07,
|
32509 |
+
"loss": 0.8792,
|
32510 |
+
"step": 4643
|
32511 |
+
},
|
32512 |
+
{
|
32513 |
+
"epoch": 0.89,
|
32514 |
+
"grad_norm": 1.768852515536146,
|
32515 |
+
"learning_rate": 5.805278839051709e-07,
|
32516 |
+
"loss": 0.8514,
|
32517 |
+
"step": 4644
|
32518 |
+
},
|
32519 |
+
{
|
32520 |
+
"epoch": 0.89,
|
32521 |
+
"grad_norm": 1.4671139405698723,
|
32522 |
+
"learning_rate": 5.784355642831829e-07,
|
32523 |
+
"loss": 0.771,
|
32524 |
+
"step": 4645
|
32525 |
+
},
|
32526 |
+
{
|
32527 |
+
"epoch": 0.89,
|
32528 |
+
"grad_norm": 1.7568780187384645,
|
32529 |
+
"learning_rate": 5.763469097023189e-07,
|
32530 |
+
"loss": 0.8898,
|
32531 |
+
"step": 4646
|
32532 |
+
},
|
32533 |
+
{
|
32534 |
+
"epoch": 0.89,
|
32535 |
+
"grad_norm": 0.9703185609130268,
|
32536 |
+
"learning_rate": 5.742619209750788e-07,
|
32537 |
+
"loss": 0.8261,
|
32538 |
+
"step": 4647
|
32539 |
+
},
|
32540 |
+
{
|
32541 |
+
"epoch": 0.9,
|
32542 |
+
"grad_norm": 1.552615942105755,
|
32543 |
+
"learning_rate": 5.721805989125362e-07,
|
32544 |
+
"loss": 0.84,
|
32545 |
+
"step": 4648
|
32546 |
+
},
|
32547 |
+
{
|
32548 |
+
"epoch": 0.9,
|
32549 |
+
"grad_norm": 1.566663525662729,
|
32550 |
+
"learning_rate": 5.701029443243345e-07,
|
32551 |
+
"loss": 0.7267,
|
32552 |
+
"step": 4649
|
32553 |
+
},
|
32554 |
+
{
|
32555 |
+
"epoch": 0.9,
|
32556 |
+
"grad_norm": 1.8205065610738194,
|
32557 |
+
"learning_rate": 5.680289580186959e-07,
|
32558 |
+
"loss": 0.8879,
|
32559 |
+
"step": 4650
|
32560 |
+
},
|
32561 |
+
{
|
32562 |
+
"epoch": 0.9,
|
32563 |
+
"grad_norm": 1.654652335755769,
|
32564 |
+
"learning_rate": 5.659586408024098e-07,
|
32565 |
+
"loss": 0.8391,
|
32566 |
+
"step": 4651
|
32567 |
+
},
|
32568 |
+
{
|
32569 |
+
"epoch": 0.9,
|
32570 |
+
"grad_norm": 1.6905002178206892,
|
32571 |
+
"learning_rate": 5.63891993480844e-07,
|
32572 |
+
"loss": 0.8697,
|
32573 |
+
"step": 4652
|
32574 |
+
},
|
32575 |
+
{
|
32576 |
+
"epoch": 0.9,
|
32577 |
+
"grad_norm": 0.9546336563075242,
|
32578 |
+
"learning_rate": 5.618290168579333e-07,
|
32579 |
+
"loss": 0.7947,
|
32580 |
+
"step": 4653
|
32581 |
+
},
|
32582 |
+
{
|
32583 |
+
"epoch": 0.9,
|
32584 |
+
"grad_norm": 1.6926962772920793,
|
32585 |
+
"learning_rate": 5.597697117361877e-07,
|
32586 |
+
"loss": 0.8929,
|
32587 |
+
"step": 4654
|
32588 |
+
},
|
32589 |
+
{
|
32590 |
+
"epoch": 0.9,
|
32591 |
+
"grad_norm": 1.7332662858890597,
|
32592 |
+
"learning_rate": 5.57714078916689e-07,
|
32593 |
+
"loss": 0.9356,
|
32594 |
+
"step": 4655
|
32595 |
+
},
|
32596 |
+
{
|
32597 |
+
"epoch": 0.9,
|
32598 |
+
"grad_norm": 1.8157951920257163,
|
32599 |
+
"learning_rate": 5.556621191990907e-07,
|
32600 |
+
"loss": 0.9756,
|
32601 |
+
"step": 4656
|
32602 |
+
},
|
32603 |
+
{
|
32604 |
+
"epoch": 0.9,
|
32605 |
+
"grad_norm": 1.7425133604802003,
|
32606 |
+
"learning_rate": 5.536138333816144e-07,
|
32607 |
+
"loss": 0.7792,
|
32608 |
+
"step": 4657
|
32609 |
+
},
|
32610 |
+
{
|
32611 |
+
"epoch": 0.9,
|
32612 |
+
"grad_norm": 1.5621644307282403,
|
32613 |
+
"learning_rate": 5.515692222610558e-07,
|
32614 |
+
"loss": 0.7705,
|
32615 |
+
"step": 4658
|
32616 |
+
},
|
32617 |
+
{
|
32618 |
+
"epoch": 0.9,
|
32619 |
+
"grad_norm": 1.6356106516646511,
|
32620 |
+
"learning_rate": 5.495282866327812e-07,
|
32621 |
+
"loss": 0.8203,
|
32622 |
+
"step": 4659
|
32623 |
+
},
|
32624 |
+
{
|
32625 |
+
"epoch": 0.9,
|
32626 |
+
"grad_norm": 1.7357420054931105,
|
32627 |
+
"learning_rate": 5.474910272907242e-07,
|
32628 |
+
"loss": 0.8684,
|
32629 |
+
"step": 4660
|
32630 |
+
},
|
32631 |
+
{
|
32632 |
+
"epoch": 0.9,
|
32633 |
+
"grad_norm": 1.644574615849858,
|
32634 |
+
"learning_rate": 5.454574450273898e-07,
|
32635 |
+
"loss": 0.8575,
|
32636 |
+
"step": 4661
|
32637 |
+
},
|
32638 |
+
{
|
32639 |
+
"epoch": 0.9,
|
32640 |
+
"grad_norm": 1.5828708989675613,
|
32641 |
+
"learning_rate": 5.434275406338552e-07,
|
32642 |
+
"loss": 0.8204,
|
32643 |
+
"step": 4662
|
32644 |
+
},
|
32645 |
+
{
|
32646 |
+
"epoch": 0.9,
|
32647 |
+
"grad_norm": 1.4186623546018264,
|
32648 |
+
"learning_rate": 5.414013148997621e-07,
|
32649 |
+
"loss": 0.7704,
|
32650 |
+
"step": 4663
|
32651 |
+
},
|
32652 |
+
{
|
32653 |
+
"epoch": 0.9,
|
32654 |
+
"grad_norm": 1.6833154380110336,
|
32655 |
+
"learning_rate": 5.393787686133234e-07,
|
32656 |
+
"loss": 0.8795,
|
32657 |
+
"step": 4664
|
32658 |
+
},
|
32659 |
+
{
|
32660 |
+
"epoch": 0.9,
|
32661 |
+
"grad_norm": 1.0095255383815676,
|
32662 |
+
"learning_rate": 5.37359902561323e-07,
|
32663 |
+
"loss": 0.86,
|
32664 |
+
"step": 4665
|
32665 |
+
},
|
32666 |
+
{
|
32667 |
+
"epoch": 0.9,
|
32668 |
+
"grad_norm": 1.5918336391342192,
|
32669 |
+
"learning_rate": 5.353447175291104e-07,
|
32670 |
+
"loss": 0.8537,
|
32671 |
+
"step": 4666
|
32672 |
+
},
|
32673 |
+
{
|
32674 |
+
"epoch": 0.9,
|
32675 |
+
"grad_norm": 1.6224700248960138,
|
32676 |
+
"learning_rate": 5.333332143006043e-07,
|
32677 |
+
"loss": 0.8482,
|
32678 |
+
"step": 4667
|
32679 |
+
},
|
32680 |
+
{
|
32681 |
+
"epoch": 0.9,
|
32682 |
+
"grad_norm": 1.6704298241763658,
|
32683 |
+
"learning_rate": 5.313253936582885e-07,
|
32684 |
+
"loss": 0.7602,
|
32685 |
+
"step": 4668
|
32686 |
+
},
|
32687 |
+
{
|
32688 |
+
"epoch": 0.9,
|
32689 |
+
"grad_norm": 0.9501917910467306,
|
32690 |
+
"learning_rate": 5.293212563832173e-07,
|
32691 |
+
"loss": 0.7765,
|
32692 |
+
"step": 4669
|
32693 |
+
},
|
32694 |
+
{
|
32695 |
+
"epoch": 0.9,
|
32696 |
+
"grad_norm": 1.527283488559378,
|
32697 |
+
"learning_rate": 5.273208032550126e-07,
|
32698 |
+
"loss": 0.7678,
|
32699 |
+
"step": 4670
|
32700 |
+
},
|
32701 |
+
{
|
32702 |
+
"epoch": 0.9,
|
32703 |
+
"grad_norm": 1.6792519676900624,
|
32704 |
+
"learning_rate": 5.253240350518607e-07,
|
32705 |
+
"loss": 0.7365,
|
32706 |
+
"step": 4671
|
32707 |
+
},
|
32708 |
+
{
|
32709 |
+
"epoch": 0.9,
|
32710 |
+
"grad_norm": 1.64356865145821,
|
32711 |
+
"learning_rate": 5.233309525505159e-07,
|
32712 |
+
"loss": 0.8504,
|
32713 |
+
"step": 4672
|
32714 |
+
},
|
32715 |
+
{
|
32716 |
+
"epoch": 0.9,
|
32717 |
+
"grad_norm": 1.4855516463960838,
|
32718 |
+
"learning_rate": 5.213415565262981e-07,
|
32719 |
+
"loss": 0.7894,
|
32720 |
+
"step": 4673
|
32721 |
+
},
|
32722 |
+
{
|
32723 |
+
"epoch": 0.9,
|
32724 |
+
"grad_norm": 1.623938335284566,
|
32725 |
+
"learning_rate": 5.193558477530958e-07,
|
32726 |
+
"loss": 0.8903,
|
32727 |
+
"step": 4674
|
32728 |
+
},
|
32729 |
+
{
|
32730 |
+
"epoch": 0.9,
|
32731 |
+
"grad_norm": 1.6525083687168214,
|
32732 |
+
"learning_rate": 5.173738270033568e-07,
|
32733 |
+
"loss": 0.7674,
|
32734 |
+
"step": 4675
|
32735 |
+
},
|
32736 |
+
{
|
32737 |
+
"epoch": 0.9,
|
32738 |
+
"grad_norm": 1.471112511849222,
|
32739 |
+
"learning_rate": 5.153954950481e-07,
|
32740 |
+
"loss": 0.7344,
|
32741 |
+
"step": 4676
|
32742 |
+
},
|
32743 |
+
{
|
32744 |
+
"epoch": 0.9,
|
32745 |
+
"grad_norm": 1.6775328255178097,
|
32746 |
+
"learning_rate": 5.134208526569084e-07,
|
32747 |
+
"loss": 0.9133,
|
32748 |
+
"step": 4677
|
32749 |
+
},
|
32750 |
+
{
|
32751 |
+
"epoch": 0.9,
|
32752 |
+
"grad_norm": 1.687127162343473,
|
32753 |
+
"learning_rate": 5.114499005979279e-07,
|
32754 |
+
"loss": 0.8031,
|
32755 |
+
"step": 4678
|
32756 |
+
},
|
32757 |
+
{
|
32758 |
+
"epoch": 0.9,
|
32759 |
+
"grad_norm": 1.749454069288342,
|
32760 |
+
"learning_rate": 5.094826396378694e-07,
|
32761 |
+
"loss": 0.7052,
|
32762 |
+
"step": 4679
|
32763 |
+
},
|
32764 |
+
{
|
32765 |
+
"epoch": 0.9,
|
32766 |
+
"grad_norm": 1.7090967580697272,
|
32767 |
+
"learning_rate": 5.075190705420097e-07,
|
32768 |
+
"loss": 0.8282,
|
32769 |
+
"step": 4680
|
32770 |
+
},
|
32771 |
+
{
|
32772 |
+
"epoch": 0.9,
|
32773 |
+
"grad_norm": 1.6219991440125505,
|
32774 |
+
"learning_rate": 5.0555919407419e-07,
|
32775 |
+
"loss": 0.7687,
|
32776 |
+
"step": 4681
|
32777 |
+
},
|
32778 |
+
{
|
32779 |
+
"epoch": 0.9,
|
32780 |
+
"grad_norm": 1.712762287461227,
|
32781 |
+
"learning_rate": 5.036030109968082e-07,
|
32782 |
+
"loss": 0.9188,
|
32783 |
+
"step": 4682
|
32784 |
+
},
|
32785 |
+
{
|
32786 |
+
"epoch": 0.9,
|
32787 |
+
"grad_norm": 1.6465939602965196,
|
32788 |
+
"learning_rate": 5.016505220708334e-07,
|
32789 |
+
"loss": 0.91,
|
32790 |
+
"step": 4683
|
32791 |
+
},
|
32792 |
+
{
|
32793 |
+
"epoch": 0.9,
|
32794 |
+
"grad_norm": 1.6876805121272327,
|
32795 |
+
"learning_rate": 4.997017280557936e-07,
|
32796 |
+
"loss": 0.8541,
|
32797 |
+
"step": 4684
|
32798 |
+
},
|
32799 |
+
{
|
32800 |
+
"epoch": 0.9,
|
32801 |
+
"grad_norm": 1.5718236529911196,
|
32802 |
+
"learning_rate": 4.977566297097824e-07,
|
32803 |
+
"loss": 0.8048,
|
32804 |
+
"step": 4685
|
32805 |
+
},
|
32806 |
+
{
|
32807 |
+
"epoch": 0.9,
|
32808 |
+
"grad_norm": 1.7317068845916241,
|
32809 |
+
"learning_rate": 4.958152277894523e-07,
|
32810 |
+
"loss": 0.8608,
|
32811 |
+
"step": 4686
|
32812 |
+
},
|
32813 |
+
{
|
32814 |
+
"epoch": 0.9,
|
32815 |
+
"grad_norm": 1.5823403939243572,
|
32816 |
+
"learning_rate": 4.938775230500192e-07,
|
32817 |
+
"loss": 0.8097,
|
32818 |
+
"step": 4687
|
32819 |
+
},
|
32820 |
+
{
|
32821 |
+
"epoch": 0.9,
|
32822 |
+
"grad_norm": 1.7201556233252429,
|
32823 |
+
"learning_rate": 4.919435162452635e-07,
|
32824 |
+
"loss": 0.8875,
|
32825 |
+
"step": 4688
|
32826 |
+
},
|
32827 |
+
{
|
32828 |
+
"epoch": 0.9,
|
32829 |
+
"grad_norm": 1.5195980744396738,
|
32830 |
+
"learning_rate": 4.900132081275222e-07,
|
32831 |
+
"loss": 0.7755,
|
32832 |
+
"step": 4689
|
32833 |
+
},
|
32834 |
+
{
|
32835 |
+
"epoch": 0.9,
|
32836 |
+
"grad_norm": 1.6020899057466513,
|
32837 |
+
"learning_rate": 4.880865994476958e-07,
|
32838 |
+
"loss": 0.8562,
|
32839 |
+
"step": 4690
|
32840 |
+
},
|
32841 |
+
{
|
32842 |
+
"epoch": 0.9,
|
32843 |
+
"grad_norm": 1.806839673319644,
|
32844 |
+
"learning_rate": 4.861636909552469e-07,
|
32845 |
+
"loss": 0.7896,
|
32846 |
+
"step": 4691
|
32847 |
+
},
|
32848 |
+
{
|
32849 |
+
"epoch": 0.9,
|
32850 |
+
"grad_norm": 1.7078013031874621,
|
32851 |
+
"learning_rate": 4.842444833981985e-07,
|
32852 |
+
"loss": 0.8869,
|
32853 |
+
"step": 4692
|
32854 |
+
},
|
32855 |
+
{
|
32856 |
+
"epoch": 0.9,
|
32857 |
+
"grad_norm": 1.7614360589223628,
|
32858 |
+
"learning_rate": 4.82328977523131e-07,
|
32859 |
+
"loss": 0.8127,
|
32860 |
+
"step": 4693
|
32861 |
+
},
|
32862 |
+
{
|
32863 |
+
"epoch": 0.9,
|
32864 |
+
"grad_norm": 1.7649355206668866,
|
32865 |
+
"learning_rate": 4.804171740751873e-07,
|
32866 |
+
"loss": 0.9398,
|
32867 |
+
"step": 4694
|
32868 |
+
},
|
32869 |
+
{
|
32870 |
+
"epoch": 0.9,
|
32871 |
+
"grad_norm": 1.7582056140597686,
|
32872 |
+
"learning_rate": 4.785090737980725e-07,
|
32873 |
+
"loss": 0.9227,
|
32874 |
+
"step": 4695
|
32875 |
+
},
|
32876 |
+
{
|
32877 |
+
"epoch": 0.9,
|
32878 |
+
"grad_norm": 1.5040542605572478,
|
32879 |
+
"learning_rate": 4.766046774340438e-07,
|
32880 |
+
"loss": 0.8656,
|
32881 |
+
"step": 4696
|
32882 |
+
},
|
32883 |
+
{
|
32884 |
+
"epoch": 0.9,
|
32885 |
+
"grad_norm": 1.6405321539227562,
|
32886 |
+
"learning_rate": 4.747039857239244e-07,
|
32887 |
+
"loss": 0.797,
|
32888 |
+
"step": 4697
|
32889 |
+
},
|
32890 |
+
{
|
32891 |
+
"epoch": 0.9,
|
32892 |
+
"grad_norm": 0.9701252958307068,
|
32893 |
+
"learning_rate": 4.7280699940709275e-07,
|
32894 |
+
"loss": 0.8364,
|
32895 |
+
"step": 4698
|
32896 |
+
},
|
32897 |
+
{
|
32898 |
+
"epoch": 0.9,
|
32899 |
+
"grad_norm": 1.632269414247232,
|
32900 |
+
"learning_rate": 4.7091371922148966e-07,
|
32901 |
+
"loss": 0.8506,
|
32902 |
+
"step": 4699
|
32903 |
+
},
|
32904 |
+
{
|
32905 |
+
"epoch": 0.91,
|
32906 |
+
"grad_norm": 1.8161203221894053,
|
32907 |
+
"learning_rate": 4.6902414590360935e-07,
|
32908 |
+
"loss": 0.9253,
|
32909 |
+
"step": 4700
|
32910 |
+
},
|
32911 |
+
{
|
32912 |
+
"epoch": 0.91,
|
32913 |
+
"grad_norm": 1.591192668567659,
|
32914 |
+
"learning_rate": 4.6713828018850606e-07,
|
32915 |
+
"loss": 0.8823,
|
32916 |
+
"step": 4701
|
32917 |
+
},
|
32918 |
+
{
|
32919 |
+
"epoch": 0.91,
|
32920 |
+
"grad_norm": 1.753644938899693,
|
32921 |
+
"learning_rate": 4.6525612280979514e-07,
|
32922 |
+
"loss": 0.8554,
|
32923 |
+
"step": 4702
|
32924 |
+
},
|
32925 |
+
{
|
32926 |
+
"epoch": 0.91,
|
32927 |
+
"grad_norm": 1.7057079269067104,
|
32928 |
+
"learning_rate": 4.6337767449964323e-07,
|
32929 |
+
"loss": 0.8982,
|
32930 |
+
"step": 4703
|
32931 |
+
},
|
32932 |
+
{
|
32933 |
+
"epoch": 0.91,
|
32934 |
+
"grad_norm": 1.8081618193792555,
|
32935 |
+
"learning_rate": 4.6150293598877793e-07,
|
32936 |
+
"loss": 0.8193,
|
32937 |
+
"step": 4704
|
32938 |
+
},
|
32939 |
+
{
|
32940 |
+
"epoch": 0.91,
|
32941 |
+
"grad_norm": 1.7086633815503671,
|
32942 |
+
"learning_rate": 4.596319080064837e-07,
|
32943 |
+
"loss": 0.8331,
|
32944 |
+
"step": 4705
|
32945 |
+
},
|
32946 |
+
{
|
32947 |
+
"epoch": 0.91,
|
32948 |
+
"grad_norm": 1.6974864863612644,
|
32949 |
+
"learning_rate": 4.577645912806006e-07,
|
32950 |
+
"loss": 0.7984,
|
32951 |
+
"step": 4706
|
32952 |
+
},
|
32953 |
+
{
|
32954 |
+
"epoch": 0.91,
|
32955 |
+
"grad_norm": 1.643569617189755,
|
32956 |
+
"learning_rate": 4.559009865375241e-07,
|
32957 |
+
"loss": 0.8059,
|
32958 |
+
"step": 4707
|
32959 |
+
},
|
32960 |
+
{
|
32961 |
+
"epoch": 0.91,
|
32962 |
+
"grad_norm": 1.5728461903585766,
|
32963 |
+
"learning_rate": 4.5404109450221e-07,
|
32964 |
+
"loss": 0.7617,
|
32965 |
+
"step": 4708
|
32966 |
+
},
|
32967 |
+
{
|
32968 |
+
"epoch": 0.91,
|
32969 |
+
"grad_norm": 1.5168007933423657,
|
32970 |
+
"learning_rate": 4.521849158981628e-07,
|
32971 |
+
"loss": 0.7905,
|
32972 |
+
"step": 4709
|
32973 |
+
},
|
32974 |
+
{
|
32975 |
+
"epoch": 0.91,
|
32976 |
+
"grad_norm": 1.6654829931616189,
|
32977 |
+
"learning_rate": 4.503324514474483e-07,
|
32978 |
+
"loss": 0.8554,
|
32979 |
+
"step": 4710
|
32980 |
+
},
|
32981 |
+
{
|
32982 |
+
"epoch": 0.91,
|
32983 |
+
"grad_norm": 1.601183658916333,
|
32984 |
+
"learning_rate": 4.484837018706867e-07,
|
32985 |
+
"loss": 0.8237,
|
32986 |
+
"step": 4711
|
32987 |
+
},
|
32988 |
+
{
|
32989 |
+
"epoch": 0.91,
|
32990 |
+
"grad_norm": 1.7492383350646525,
|
32991 |
+
"learning_rate": 4.4663866788704823e-07,
|
32992 |
+
"loss": 0.9066,
|
32993 |
+
"step": 4712
|
32994 |
+
},
|
32995 |
+
{
|
32996 |
+
"epoch": 0.91,
|
32997 |
+
"grad_norm": 1.6652859699269564,
|
32998 |
+
"learning_rate": 4.447973502142644e-07,
|
32999 |
+
"loss": 0.8496,
|
33000 |
+
"step": 4713
|
33001 |
+
},
|
33002 |
+
{
|
33003 |
+
"epoch": 0.91,
|
33004 |
+
"grad_norm": 1.5176071936844036,
|
33005 |
+
"learning_rate": 4.4295974956861664e-07,
|
33006 |
+
"loss": 0.9279,
|
33007 |
+
"step": 4714
|
33008 |
+
},
|
33009 |
+
{
|
33010 |
+
"epoch": 0.91,
|
33011 |
+
"grad_norm": 1.7104988370209713,
|
33012 |
+
"learning_rate": 4.4112586666494317e-07,
|
33013 |
+
"loss": 0.8779,
|
33014 |
+
"step": 4715
|
33015 |
+
},
|
33016 |
+
{
|
33017 |
+
"epoch": 0.91,
|
33018 |
+
"grad_norm": 1.466684809772613,
|
33019 |
+
"learning_rate": 4.3929570221663444e-07,
|
33020 |
+
"loss": 0.8849,
|
33021 |
+
"step": 4716
|
33022 |
+
},
|
33023 |
+
{
|
33024 |
+
"epoch": 0.91,
|
33025 |
+
"grad_norm": 1.5064332377635345,
|
33026 |
+
"learning_rate": 4.3746925693563314e-07,
|
33027 |
+
"loss": 0.7313,
|
33028 |
+
"step": 4717
|
33029 |
+
},
|
33030 |
+
{
|
33031 |
+
"epoch": 0.91,
|
33032 |
+
"grad_norm": 1.6662312130875077,
|
33033 |
+
"learning_rate": 4.3564653153243875e-07,
|
33034 |
+
"loss": 0.8869,
|
33035 |
+
"step": 4718
|
33036 |
+
},
|
33037 |
+
{
|
33038 |
+
"epoch": 0.91,
|
33039 |
+
"grad_norm": 1.669135117581719,
|
33040 |
+
"learning_rate": 4.3382752671610075e-07,
|
33041 |
+
"loss": 0.8753,
|
33042 |
+
"step": 4719
|
33043 |
+
},
|
33044 |
+
{
|
33045 |
+
"epoch": 0.91,
|
33046 |
+
"grad_norm": 1.5739945493482388,
|
33047 |
+
"learning_rate": 4.3201224319422084e-07,
|
33048 |
+
"loss": 0.8615,
|
33049 |
+
"step": 4720
|
33050 |
+
},
|
33051 |
+
{
|
33052 |
+
"epoch": 0.91,
|
33053 |
+
"grad_norm": 1.6975091587403395,
|
33054 |
+
"learning_rate": 4.3020068167295646e-07,
|
33055 |
+
"loss": 0.8725,
|
33056 |
+
"step": 4721
|
33057 |
+
},
|
33058 |
+
{
|
33059 |
+
"epoch": 0.91,
|
33060 |
+
"grad_norm": 1.559228645302732,
|
33061 |
+
"learning_rate": 4.283928428570139e-07,
|
33062 |
+
"loss": 0.8113,
|
33063 |
+
"step": 4722
|
33064 |
+
},
|
33065 |
+
{
|
33066 |
+
"epoch": 0.91,
|
33067 |
+
"grad_norm": 1.4789161304081258,
|
33068 |
+
"learning_rate": 4.2658872744965273e-07,
|
33069 |
+
"loss": 0.8488,
|
33070 |
+
"step": 4723
|
33071 |
+
},
|
33072 |
+
{
|
33073 |
+
"epoch": 0.91,
|
33074 |
+
"grad_norm": 1.762455128063593,
|
33075 |
+
"learning_rate": 4.2478833615268386e-07,
|
33076 |
+
"loss": 0.8784,
|
33077 |
+
"step": 4724
|
33078 |
+
},
|
33079 |
+
{
|
33080 |
+
"epoch": 0.91,
|
33081 |
+
"grad_norm": 1.6819388276178813,
|
33082 |
+
"learning_rate": 4.2299166966647154e-07,
|
33083 |
+
"loss": 0.9057,
|
33084 |
+
"step": 4725
|
33085 |
+
},
|
33086 |
+
{
|
33087 |
+
"epoch": 0.91,
|
33088 |
+
"grad_norm": 1.5074004293968402,
|
33089 |
+
"learning_rate": 4.211987286899255e-07,
|
33090 |
+
"loss": 0.7763,
|
33091 |
+
"step": 4726
|
33092 |
+
},
|
33093 |
+
{
|
33094 |
+
"epoch": 0.91,
|
33095 |
+
"grad_norm": 1.6874112613914176,
|
33096 |
+
"learning_rate": 4.194095139205123e-07,
|
33097 |
+
"loss": 0.8848,
|
33098 |
+
"step": 4727
|
33099 |
+
},
|
33100 |
+
{
|
33101 |
+
"epoch": 0.91,
|
33102 |
+
"grad_norm": 1.4389099026076047,
|
33103 |
+
"learning_rate": 4.1762402605424526e-07,
|
33104 |
+
"loss": 0.8292,
|
33105 |
+
"step": 4728
|
33106 |
+
},
|
33107 |
+
{
|
33108 |
+
"epoch": 0.91,
|
33109 |
+
"grad_norm": 1.7523147070895562,
|
33110 |
+
"learning_rate": 4.1584226578568977e-07,
|
33111 |
+
"loss": 0.8041,
|
33112 |
+
"step": 4729
|
33113 |
+
},
|
33114 |
+
{
|
33115 |
+
"epoch": 0.91,
|
33116 |
+
"grad_norm": 1.5440277838586294,
|
33117 |
+
"learning_rate": 4.1406423380796037e-07,
|
33118 |
+
"loss": 0.842,
|
33119 |
+
"step": 4730
|
33120 |
+
},
|
33121 |
+
{
|
33122 |
+
"epoch": 0.91,
|
33123 |
+
"grad_norm": 1.683555768036824,
|
33124 |
+
"learning_rate": 4.122899308127215e-07,
|
33125 |
+
"loss": 0.7741,
|
33126 |
+
"step": 4731
|
33127 |
+
},
|
33128 |
+
{
|
33129 |
+
"epoch": 0.91,
|
33130 |
+
"grad_norm": 1.5537148224409472,
|
33131 |
+
"learning_rate": 4.105193574901878e-07,
|
33132 |
+
"loss": 0.8457,
|
33133 |
+
"step": 4732
|
33134 |
+
},
|
33135 |
+
{
|
33136 |
+
"epoch": 0.91,
|
33137 |
+
"grad_norm": 1.5393656799619777,
|
33138 |
+
"learning_rate": 4.087525145291205e-07,
|
33139 |
+
"loss": 0.8413,
|
33140 |
+
"step": 4733
|
33141 |
+
},
|
33142 |
+
{
|
33143 |
+
"epoch": 0.91,
|
33144 |
+
"grad_norm": 1.5918678095739118,
|
33145 |
+
"learning_rate": 4.0698940261683197e-07,
|
33146 |
+
"loss": 0.807,
|
33147 |
+
"step": 4734
|
33148 |
+
},
|
33149 |
+
{
|
33150 |
+
"epoch": 0.91,
|
33151 |
+
"grad_norm": 1.7728363838958756,
|
33152 |
+
"learning_rate": 4.052300224391825e-07,
|
33153 |
+
"loss": 0.7864,
|
33154 |
+
"step": 4735
|
33155 |
+
},
|
33156 |
+
{
|
33157 |
+
"epoch": 0.91,
|
33158 |
+
"grad_norm": 1.7482045193198998,
|
33159 |
+
"learning_rate": 4.0347437468058026e-07,
|
33160 |
+
"loss": 0.877,
|
33161 |
+
"step": 4736
|
33162 |
+
},
|
33163 |
+
{
|
33164 |
+
"epoch": 0.91,
|
33165 |
+
"grad_norm": 1.7525840635202183,
|
33166 |
+
"learning_rate": 4.017224600239833e-07,
|
33167 |
+
"loss": 0.8769,
|
33168 |
+
"step": 4737
|
33169 |
+
},
|
33170 |
+
{
|
33171 |
+
"epoch": 0.91,
|
33172 |
+
"grad_norm": 1.5071276409577965,
|
33173 |
+
"learning_rate": 3.999742791508965e-07,
|
33174 |
+
"loss": 0.8583,
|
33175 |
+
"step": 4738
|
33176 |
+
},
|
33177 |
+
{
|
33178 |
+
"epoch": 0.91,
|
33179 |
+
"grad_norm": 1.5494750382317546,
|
33180 |
+
"learning_rate": 3.9822983274137137e-07,
|
33181 |
+
"loss": 0.8042,
|
33182 |
+
"step": 4739
|
33183 |
+
},
|
33184 |
+
{
|
33185 |
+
"epoch": 0.91,
|
33186 |
+
"grad_norm": 1.7760013173719795,
|
33187 |
+
"learning_rate": 3.964891214740063e-07,
|
33188 |
+
"loss": 0.7957,
|
33189 |
+
"step": 4740
|
33190 |
+
},
|
33191 |
+
{
|
33192 |
+
"epoch": 0.91,
|
33193 |
+
"grad_norm": 1.616107440677217,
|
33194 |
+
"learning_rate": 3.9475214602594844e-07,
|
33195 |
+
"loss": 0.9369,
|
33196 |
+
"step": 4741
|
33197 |
+
},
|
33198 |
+
{
|
33199 |
+
"epoch": 0.91,
|
33200 |
+
"grad_norm": 1.6297856162850126,
|
33201 |
+
"learning_rate": 3.930189070728907e-07,
|
33202 |
+
"loss": 0.8166,
|
33203 |
+
"step": 4742
|
33204 |
+
},
|
33205 |
+
{
|
33206 |
+
"epoch": 0.91,
|
33207 |
+
"grad_norm": 1.5261404817116855,
|
33208 |
+
"learning_rate": 3.9128940528907256e-07,
|
33209 |
+
"loss": 0.882,
|
33210 |
+
"step": 4743
|
33211 |
+
},
|
33212 |
+
{
|
33213 |
+
"epoch": 0.91,
|
33214 |
+
"grad_norm": 1.6171829450573758,
|
33215 |
+
"learning_rate": 3.895636413472803e-07,
|
33216 |
+
"loss": 0.8086,
|
33217 |
+
"step": 4744
|
33218 |
+
},
|
33219 |
+
{
|
33220 |
+
"epoch": 0.91,
|
33221 |
+
"grad_norm": 1.65460452013808,
|
33222 |
+
"learning_rate": 3.878416159188458e-07,
|
33223 |
+
"loss": 0.9052,
|
33224 |
+
"step": 4745
|
33225 |
+
},
|
33226 |
+
{
|
33227 |
+
"epoch": 0.91,
|
33228 |
+
"grad_norm": 1.634016027582634,
|
33229 |
+
"learning_rate": 3.8612332967364776e-07,
|
33230 |
+
"loss": 0.8796,
|
33231 |
+
"step": 4746
|
33232 |
+
},
|
33233 |
+
{
|
33234 |
+
"epoch": 0.91,
|
33235 |
+
"grad_norm": 1.4705543286456317,
|
33236 |
+
"learning_rate": 3.844087832801069e-07,
|
33237 |
+
"loss": 0.8437,
|
33238 |
+
"step": 4747
|
33239 |
+
},
|
33240 |
+
{
|
33241 |
+
"epoch": 0.91,
|
33242 |
+
"grad_norm": 1.7151010171281107,
|
33243 |
+
"learning_rate": 3.826979774051909e-07,
|
33244 |
+
"loss": 0.8337,
|
33245 |
+
"step": 4748
|
33246 |
+
},
|
33247 |
+
{
|
33248 |
+
"epoch": 0.91,
|
33249 |
+
"grad_norm": 1.5843930520104088,
|
33250 |
+
"learning_rate": 3.809909127144151e-07,
|
33251 |
+
"loss": 0.8299,
|
33252 |
+
"step": 4749
|
33253 |
+
},
|
33254 |
+
{
|
33255 |
+
"epoch": 0.91,
|
33256 |
+
"grad_norm": 1.526416336262696,
|
33257 |
+
"learning_rate": 3.7928758987183624e-07,
|
33258 |
+
"loss": 0.7759,
|
33259 |
+
"step": 4750
|
33260 |
+
},
|
33261 |
+
{
|
33262 |
+
"epoch": 0.91,
|
33263 |
+
"grad_norm": 1.6428339947689987,
|
33264 |
+
"learning_rate": 3.7758800954005636e-07,
|
33265 |
+
"loss": 0.8362,
|
33266 |
+
"step": 4751
|
33267 |
+
},
|
33268 |
+
{
|
33269 |
+
"epoch": 0.92,
|
33270 |
+
"grad_norm": 1.6391771092396883,
|
33271 |
+
"learning_rate": 3.7589217238022113e-07,
|
33272 |
+
"loss": 0.8826,
|
33273 |
+
"step": 4752
|
33274 |
+
},
|
33275 |
+
{
|
33276 |
+
"epoch": 0.92,
|
33277 |
+
"grad_norm": 1.7667209649913396,
|
33278 |
+
"learning_rate": 3.7420007905202283e-07,
|
33279 |
+
"loss": 0.8631,
|
33280 |
+
"step": 4753
|
33281 |
+
},
|
33282 |
+
{
|
33283 |
+
"epoch": 0.92,
|
33284 |
+
"grad_norm": 1.5839302857640825,
|
33285 |
+
"learning_rate": 3.7251173021369156e-07,
|
33286 |
+
"loss": 0.8494,
|
33287 |
+
"step": 4754
|
33288 |
+
},
|
33289 |
+
{
|
33290 |
+
"epoch": 0.92,
|
33291 |
+
"grad_norm": 1.5713058747975237,
|
33292 |
+
"learning_rate": 3.708271265220087e-07,
|
33293 |
+
"loss": 0.7535,
|
33294 |
+
"step": 4755
|
33295 |
+
},
|
33296 |
+
{
|
33297 |
+
"epoch": 0.92,
|
33298 |
+
"grad_norm": 1.4581919015342857,
|
33299 |
+
"learning_rate": 3.6914626863229e-07,
|
33300 |
+
"loss": 0.7839,
|
33301 |
+
"step": 4756
|
33302 |
+
},
|
33303 |
+
{
|
33304 |
+
"epoch": 0.92,
|
33305 |
+
"grad_norm": 1.7593764793503315,
|
33306 |
+
"learning_rate": 3.674691571984013e-07,
|
33307 |
+
"loss": 0.8261,
|
33308 |
+
"step": 4757
|
33309 |
+
},
|
33310 |
+
{
|
33311 |
+
"epoch": 0.92,
|
33312 |
+
"grad_norm": 1.0229678649372016,
|
33313 |
+
"learning_rate": 3.657957928727474e-07,
|
33314 |
+
"loss": 0.8256,
|
33315 |
+
"step": 4758
|
33316 |
+
},
|
33317 |
+
{
|
33318 |
+
"epoch": 0.92,
|
33319 |
+
"grad_norm": 0.9535118258403272,
|
33320 |
+
"learning_rate": 3.6412617630627755e-07,
|
33321 |
+
"loss": 0.7838,
|
33322 |
+
"step": 4759
|
33323 |
+
},
|
33324 |
+
{
|
33325 |
+
"epoch": 0.92,
|
33326 |
+
"grad_norm": 1.7347847279444861,
|
33327 |
+
"learning_rate": 3.624603081484812e-07,
|
33328 |
+
"loss": 0.8139,
|
33329 |
+
"step": 4760
|
33330 |
+
},
|
33331 |
+
{
|
33332 |
+
"epoch": 0.92,
|
33333 |
+
"grad_norm": 1.622160056422886,
|
33334 |
+
"learning_rate": 3.6079818904738884e-07,
|
33335 |
+
"loss": 0.7831,
|
33336 |
+
"step": 4761
|
33337 |
+
},
|
33338 |
+
{
|
33339 |
+
"epoch": 0.92,
|
33340 |
+
"grad_norm": 0.9716276915514496,
|
33341 |
+
"learning_rate": 3.5913981964957766e-07,
|
33342 |
+
"loss": 0.8886,
|
33343 |
+
"step": 4762
|
33344 |
+
},
|
33345 |
+
{
|
33346 |
+
"epoch": 0.92,
|
33347 |
+
"grad_norm": 1.6441177659837407,
|
33348 |
+
"learning_rate": 3.5748520060015944e-07,
|
33349 |
+
"loss": 0.8599,
|
33350 |
+
"step": 4763
|
33351 |
+
},
|
33352 |
+
{
|
33353 |
+
"epoch": 0.92,
|
33354 |
+
"grad_norm": 1.6551197231737023,
|
33355 |
+
"learning_rate": 3.5583433254279155e-07,
|
33356 |
+
"loss": 0.8248,
|
33357 |
+
"step": 4764
|
33358 |
+
},
|
33359 |
+
{
|
33360 |
+
"epoch": 0.92,
|
33361 |
+
"grad_norm": 0.9760164605599994,
|
33362 |
+
"learning_rate": 3.541872161196691e-07,
|
33363 |
+
"loss": 0.7797,
|
33364 |
+
"step": 4765
|
33365 |
+
},
|
33366 |
+
{
|
33367 |
+
"epoch": 0.92,
|
33368 |
+
"grad_norm": 1.4504786189252332,
|
33369 |
+
"learning_rate": 3.5254385197153297e-07,
|
33370 |
+
"loss": 0.8549,
|
33371 |
+
"step": 4766
|
33372 |
+
},
|
33373 |
+
{
|
33374 |
+
"epoch": 0.92,
|
33375 |
+
"grad_norm": 1.6089778666465842,
|
33376 |
+
"learning_rate": 3.5090424073765725e-07,
|
33377 |
+
"loss": 0.9239,
|
33378 |
+
"step": 4767
|
33379 |
+
},
|
33380 |
+
{
|
33381 |
+
"epoch": 0.92,
|
33382 |
+
"grad_norm": 1.6161196157246704,
|
33383 |
+
"learning_rate": 3.4926838305586295e-07,
|
33384 |
+
"loss": 0.9207,
|
33385 |
+
"step": 4768
|
33386 |
+
},
|
33387 |
+
{
|
33388 |
+
"epoch": 0.92,
|
33389 |
+
"grad_norm": 1.6606225882806407,
|
33390 |
+
"learning_rate": 3.4763627956250654e-07,
|
33391 |
+
"loss": 0.7786,
|
33392 |
+
"step": 4769
|
33393 |
+
},
|
33394 |
+
{
|
33395 |
+
"epoch": 0.92,
|
33396 |
+
"grad_norm": 0.9505629813721634,
|
33397 |
+
"learning_rate": 3.4600793089248465e-07,
|
33398 |
+
"loss": 0.8017,
|
33399 |
+
"step": 4770
|
33400 |
+
},
|
33401 |
+
{
|
33402 |
+
"epoch": 0.92,
|
33403 |
+
"grad_norm": 1.8036875245945276,
|
33404 |
+
"learning_rate": 3.443833376792349e-07,
|
33405 |
+
"loss": 0.8881,
|
33406 |
+
"step": 4771
|
33407 |
+
},
|
33408 |
+
{
|
33409 |
+
"epoch": 0.92,
|
33410 |
+
"grad_norm": 1.694822329392725,
|
33411 |
+
"learning_rate": 3.4276250055473304e-07,
|
33412 |
+
"loss": 0.8805,
|
33413 |
+
"step": 4772
|
33414 |
+
},
|
33415 |
+
{
|
33416 |
+
"epoch": 0.92,
|
33417 |
+
"grad_norm": 1.5982949061577643,
|
33418 |
+
"learning_rate": 3.411454201494935e-07,
|
33419 |
+
"loss": 0.8112,
|
33420 |
+
"step": 4773
|
33421 |
+
},
|
33422 |
+
{
|
33423 |
+
"epoch": 0.92,
|
33424 |
+
"grad_norm": 1.575896533752538,
|
33425 |
+
"learning_rate": 3.3953209709256975e-07,
|
33426 |
+
"loss": 0.7669,
|
33427 |
+
"step": 4774
|
33428 |
+
},
|
33429 |
+
{
|
33430 |
+
"epoch": 0.92,
|
33431 |
+
"grad_norm": 1.7289475032328157,
|
33432 |
+
"learning_rate": 3.3792253201155313e-07,
|
33433 |
+
"loss": 0.8515,
|
33434 |
+
"step": 4775
|
33435 |
+
},
|
33436 |
+
{
|
33437 |
+
"epoch": 0.92,
|
33438 |
+
"grad_norm": 1.6772017195281679,
|
33439 |
+
"learning_rate": 3.36316725532575e-07,
|
33440 |
+
"loss": 0.9792,
|
33441 |
+
"step": 4776
|
33442 |
+
},
|
33443 |
+
{
|
33444 |
+
"epoch": 0.92,
|
33445 |
+
"grad_norm": 1.6483114346919296,
|
33446 |
+
"learning_rate": 3.3471467828030013e-07,
|
33447 |
+
"loss": 0.8247,
|
33448 |
+
"step": 4777
|
33449 |
+
},
|
33450 |
+
{
|
33451 |
+
"epoch": 0.92,
|
33452 |
+
"grad_norm": 1.7981315587309108,
|
33453 |
+
"learning_rate": 3.3311639087793556e-07,
|
33454 |
+
"loss": 0.9334,
|
33455 |
+
"step": 4778
|
33456 |
+
},
|
33457 |
+
{
|
33458 |
+
"epoch": 0.92,
|
33459 |
+
"grad_norm": 1.798395276634243,
|
33460 |
+
"learning_rate": 3.3152186394722506e-07,
|
33461 |
+
"loss": 0.8194,
|
33462 |
+
"step": 4779
|
33463 |
+
},
|
33464 |
+
{
|
33465 |
+
"epoch": 0.92,
|
33466 |
+
"grad_norm": 1.6884601725851665,
|
33467 |
+
"learning_rate": 3.299310981084469e-07,
|
33468 |
+
"loss": 0.8869,
|
33469 |
+
"step": 4780
|
33470 |
+
},
|
33471 |
+
{
|
33472 |
+
"epoch": 0.92,
|
33473 |
+
"grad_norm": 1.6532311253737053,
|
33474 |
+
"learning_rate": 3.283440939804172e-07,
|
33475 |
+
"loss": 0.7801,
|
33476 |
+
"step": 4781
|
33477 |
+
},
|
33478 |
+
{
|
33479 |
+
"epoch": 0.92,
|
33480 |
+
"grad_norm": 1.4639294831852823,
|
33481 |
+
"learning_rate": 3.2676085218049215e-07,
|
33482 |
+
"loss": 0.8718,
|
33483 |
+
"step": 4782
|
33484 |
+
},
|
33485 |
+
{
|
33486 |
+
"epoch": 0.92,
|
33487 |
+
"grad_norm": 1.688433245042595,
|
33488 |
+
"learning_rate": 3.251813733245601e-07,
|
33489 |
+
"loss": 0.7258,
|
33490 |
+
"step": 4783
|
33491 |
+
},
|
33492 |
+
{
|
33493 |
+
"epoch": 0.92,
|
33494 |
+
"grad_norm": 1.5282375927340333,
|
33495 |
+
"learning_rate": 3.2360565802704634e-07,
|
33496 |
+
"loss": 0.7776,
|
33497 |
+
"step": 4784
|
33498 |
+
},
|
33499 |
+
{
|
33500 |
+
"epoch": 0.92,
|
33501 |
+
"grad_norm": 1.4939423179271425,
|
33502 |
+
"learning_rate": 3.2203370690091385e-07,
|
33503 |
+
"loss": 0.8289,
|
33504 |
+
"step": 4785
|
33505 |
+
},
|
33506 |
+
{
|
33507 |
+
"epoch": 0.92,
|
33508 |
+
"grad_norm": 1.686477793286507,
|
33509 |
+
"learning_rate": 3.2046552055766014e-07,
|
33510 |
+
"loss": 0.8194,
|
33511 |
+
"step": 4786
|
33512 |
+
},
|
33513 |
+
{
|
33514 |
+
"epoch": 0.92,
|
33515 |
+
"grad_norm": 1.6118000752000259,
|
33516 |
+
"learning_rate": 3.189010996073183e-07,
|
33517 |
+
"loss": 0.8074,
|
33518 |
+
"step": 4787
|
33519 |
+
},
|
33520 |
+
{
|
33521 |
+
"epoch": 0.92,
|
33522 |
+
"grad_norm": 1.7914929045154324,
|
33523 |
+
"learning_rate": 3.17340444658456e-07,
|
33524 |
+
"loss": 0.846,
|
33525 |
+
"step": 4788
|
33526 |
+
},
|
33527 |
+
{
|
33528 |
+
"epoch": 0.92,
|
33529 |
+
"grad_norm": 1.6823572069118011,
|
33530 |
+
"learning_rate": 3.157835563181788e-07,
|
33531 |
+
"loss": 0.8335,
|
33532 |
+
"step": 4789
|
33533 |
+
},
|
33534 |
+
{
|
33535 |
+
"epoch": 0.92,
|
33536 |
+
"grad_norm": 1.6606169497019847,
|
33537 |
+
"learning_rate": 3.1423043519212546e-07,
|
33538 |
+
"loss": 0.7818,
|
33539 |
+
"step": 4790
|
33540 |
+
},
|
33541 |
+
{
|
33542 |
+
"epoch": 0.92,
|
33543 |
+
"grad_norm": 1.5313722890918302,
|
33544 |
+
"learning_rate": 3.12681081884465e-07,
|
33545 |
+
"loss": 0.8774,
|
33546 |
+
"step": 4791
|
33547 |
+
},
|
33548 |
+
{
|
33549 |
+
"epoch": 0.92,
|
33550 |
+
"grad_norm": 1.729672334609458,
|
33551 |
+
"learning_rate": 3.1113549699790745e-07,
|
33552 |
+
"loss": 0.7877,
|
33553 |
+
"step": 4792
|
33554 |
+
},
|
33555 |
+
{
|
33556 |
+
"epoch": 0.92,
|
33557 |
+
"grad_norm": 1.7275981350830443,
|
33558 |
+
"learning_rate": 3.0959368113369305e-07,
|
33559 |
+
"loss": 0.8518,
|
33560 |
+
"step": 4793
|
33561 |
+
},
|
33562 |
+
{
|
33563 |
+
"epoch": 0.92,
|
33564 |
+
"grad_norm": 1.5431849570033087,
|
33565 |
+
"learning_rate": 3.080556348915964e-07,
|
33566 |
+
"loss": 0.8513,
|
33567 |
+
"step": 4794
|
33568 |
+
},
|
33569 |
+
{
|
33570 |
+
"epoch": 0.92,
|
33571 |
+
"grad_norm": 1.5549587703130405,
|
33572 |
+
"learning_rate": 3.0652135886992674e-07,
|
33573 |
+
"loss": 0.845,
|
33574 |
+
"step": 4795
|
33575 |
+
},
|
33576 |
+
{
|
33577 |
+
"epoch": 0.92,
|
33578 |
+
"grad_norm": 1.6150632070900868,
|
33579 |
+
"learning_rate": 3.049908536655266e-07,
|
33580 |
+
"loss": 0.8748,
|
33581 |
+
"step": 4796
|
33582 |
+
},
|
33583 |
+
{
|
33584 |
+
"epoch": 0.92,
|
33585 |
+
"grad_norm": 1.7712407035313649,
|
33586 |
+
"learning_rate": 3.0346411987377087e-07,
|
33587 |
+
"loss": 0.8705,
|
33588 |
+
"step": 4797
|
33589 |
+
},
|
33590 |
+
{
|
33591 |
+
"epoch": 0.92,
|
33592 |
+
"grad_norm": 1.7296781843055429,
|
33593 |
+
"learning_rate": 3.019411580885656e-07,
|
33594 |
+
"loss": 0.8107,
|
33595 |
+
"step": 4798
|
33596 |
+
},
|
33597 |
+
{
|
33598 |
+
"epoch": 0.92,
|
33599 |
+
"grad_norm": 1.7321411729256602,
|
33600 |
+
"learning_rate": 3.0042196890235244e-07,
|
33601 |
+
"loss": 0.8312,
|
33602 |
+
"step": 4799
|
33603 |
+
},
|
33604 |
+
{
|
33605 |
+
"epoch": 0.92,
|
33606 |
+
"grad_norm": 1.6667705278133578,
|
33607 |
+
"learning_rate": 2.9890655290610524e-07,
|
33608 |
+
"loss": 0.8439,
|
33609 |
+
"step": 4800
|
33610 |
+
},
|
33611 |
+
{
|
33612 |
+
"epoch": 0.92,
|
33613 |
+
"grad_norm": 1.6183317675327349,
|
33614 |
+
"learning_rate": 2.9739491068932924e-07,
|
33615 |
+
"loss": 0.8938,
|
33616 |
+
"step": 4801
|
33617 |
+
},
|
33618 |
+
{
|
33619 |
+
"epoch": 0.92,
|
33620 |
+
"grad_norm": 1.6711648840775002,
|
33621 |
+
"learning_rate": 2.9588704284006176e-07,
|
33622 |
+
"loss": 0.9273,
|
33623 |
+
"step": 4802
|
33624 |
+
},
|
33625 |
+
{
|
33626 |
+
"epoch": 0.92,
|
33627 |
+
"grad_norm": 1.5800037148285875,
|
33628 |
+
"learning_rate": 2.9438294994487025e-07,
|
33629 |
+
"loss": 0.7868,
|
33630 |
+
"step": 4803
|
33631 |
+
},
|
33632 |
+
{
|
33633 |
+
"epoch": 0.93,
|
33634 |
+
"grad_norm": 1.674519887401665,
|
33635 |
+
"learning_rate": 2.9288263258885564e-07,
|
33636 |
+
"loss": 0.8108,
|
33637 |
+
"step": 4804
|
33638 |
+
},
|
33639 |
+
{
|
33640 |
+
"epoch": 0.93,
|
33641 |
+
"grad_norm": 1.685941013734219,
|
33642 |
+
"learning_rate": 2.913860913556521e-07,
|
33643 |
+
"loss": 0.8894,
|
33644 |
+
"step": 4805
|
33645 |
+
},
|
33646 |
+
{
|
33647 |
+
"epoch": 0.93,
|
33648 |
+
"grad_norm": 1.6992304001777496,
|
33649 |
+
"learning_rate": 2.8989332682741953e-07,
|
33650 |
+
"loss": 0.9151,
|
33651 |
+
"step": 4806
|
33652 |
+
},
|
33653 |
+
{
|
33654 |
+
"epoch": 0.93,
|
33655 |
+
"grad_norm": 1.5681189714737205,
|
33656 |
+
"learning_rate": 2.8840433958485346e-07,
|
33657 |
+
"loss": 0.828,
|
33658 |
+
"step": 4807
|
33659 |
+
},
|
33660 |
+
{
|
33661 |
+
"epoch": 0.93,
|
33662 |
+
"grad_norm": 1.73459313527006,
|
33663 |
+
"learning_rate": 2.869191302071772e-07,
|
33664 |
+
"loss": 0.8636,
|
33665 |
+
"step": 4808
|
33666 |
+
},
|
33667 |
+
{
|
33668 |
+
"epoch": 0.93,
|
33669 |
+
"grad_norm": 1.5708889933524866,
|
33670 |
+
"learning_rate": 2.8543769927214635e-07,
|
33671 |
+
"loss": 0.8016,
|
33672 |
+
"step": 4809
|
33673 |
+
},
|
33674 |
+
{
|
33675 |
+
"epoch": 0.93,
|
33676 |
+
"grad_norm": 1.6556950571345788,
|
33677 |
+
"learning_rate": 2.8396004735604556e-07,
|
33678 |
+
"loss": 0.8362,
|
33679 |
+
"step": 4810
|
33680 |
+
},
|
33681 |
+
{
|
33682 |
+
"epoch": 0.93,
|
33683 |
+
"grad_norm": 1.5939396280990095,
|
33684 |
+
"learning_rate": 2.8248617503368934e-07,
|
33685 |
+
"loss": 0.8214,
|
33686 |
+
"step": 4811
|
33687 |
+
},
|
33688 |
+
{
|
33689 |
+
"epoch": 0.93,
|
33690 |
+
"grad_norm": 1.6445737537214664,
|
33691 |
+
"learning_rate": 2.8101608287842255e-07,
|
33692 |
+
"loss": 0.8101,
|
33693 |
+
"step": 4812
|
33694 |
+
},
|
33695 |
+
{
|
33696 |
+
"epoch": 0.93,
|
33697 |
+
"grad_norm": 1.530900385278659,
|
33698 |
+
"learning_rate": 2.795497714621198e-07,
|
33699 |
+
"loss": 0.8873,
|
33700 |
+
"step": 4813
|
33701 |
+
},
|
33702 |
+
{
|
33703 |
+
"epoch": 0.93,
|
33704 |
+
"grad_norm": 1.6440147626071886,
|
33705 |
+
"learning_rate": 2.7808724135518275e-07,
|
33706 |
+
"loss": 0.8459,
|
33707 |
+
"step": 4814
|
33708 |
+
},
|
33709 |
+
{
|
33710 |
+
"epoch": 0.93,
|
33711 |
+
"grad_norm": 1.5967301329211467,
|
33712 |
+
"learning_rate": 2.76628493126545e-07,
|
33713 |
+
"loss": 0.8309,
|
33714 |
+
"step": 4815
|
33715 |
+
},
|
33716 |
+
{
|
33717 |
+
"epoch": 0.93,
|
33718 |
+
"grad_norm": 0.9060266887554025,
|
33719 |
+
"learning_rate": 2.751735273436673e-07,
|
33720 |
+
"loss": 0.7643,
|
33721 |
+
"step": 4816
|
33722 |
+
},
|
33723 |
+
{
|
33724 |
+
"epoch": 0.93,
|
33725 |
+
"grad_norm": 1.7546073748438287,
|
33726 |
+
"learning_rate": 2.737223445725401e-07,
|
33727 |
+
"loss": 0.8663,
|
33728 |
+
"step": 4817
|
33729 |
+
},
|
33730 |
+
{
|
33731 |
+
"epoch": 0.93,
|
33732 |
+
"grad_norm": 1.6244540188287706,
|
33733 |
+
"learning_rate": 2.7227494537768075e-07,
|
33734 |
+
"loss": 0.765,
|
33735 |
+
"step": 4818
|
33736 |
+
},
|
33737 |
+
{
|
33738 |
+
"epoch": 0.93,
|
33739 |
+
"grad_norm": 1.760202352517511,
|
33740 |
+
"learning_rate": 2.708313303221377e-07,
|
33741 |
+
"loss": 0.9034,
|
33742 |
+
"step": 4819
|
33743 |
+
},
|
33744 |
+
{
|
33745 |
+
"epoch": 0.93,
|
33746 |
+
"grad_norm": 1.7666061320096818,
|
33747 |
+
"learning_rate": 2.693914999674818e-07,
|
33748 |
+
"loss": 0.8755,
|
33749 |
+
"step": 4820
|
33750 |
+
},
|
33751 |
+
{
|
33752 |
+
"epoch": 0.93,
|
33753 |
+
"grad_norm": 1.5613714635005946,
|
33754 |
+
"learning_rate": 2.6795545487381724e-07,
|
33755 |
+
"loss": 0.7883,
|
33756 |
+
"step": 4821
|
33757 |
+
},
|
33758 |
+
{
|
33759 |
+
"epoch": 0.93,
|
33760 |
+
"grad_norm": 1.5200286900067441,
|
33761 |
+
"learning_rate": 2.665231955997738e-07,
|
33762 |
+
"loss": 0.8187,
|
33763 |
+
"step": 4822
|
33764 |
+
},
|
33765 |
+
{
|
33766 |
+
"epoch": 0.93,
|
33767 |
+
"grad_norm": 1.588152135460304,
|
33768 |
+
"learning_rate": 2.650947227025069e-07,
|
33769 |
+
"loss": 0.814,
|
33770 |
+
"step": 4823
|
33771 |
+
},
|
33772 |
+
{
|
33773 |
+
"epoch": 0.93,
|
33774 |
+
"grad_norm": 1.5266137304631693,
|
33775 |
+
"learning_rate": 2.6367003673770207e-07,
|
33776 |
+
"loss": 0.7984,
|
33777 |
+
"step": 4824
|
33778 |
+
},
|
33779 |
+
{
|
33780 |
+
"epoch": 0.93,
|
33781 |
+
"grad_norm": 1.5381375133627766,
|
33782 |
+
"learning_rate": 2.6224913825956933e-07,
|
33783 |
+
"loss": 0.868,
|
33784 |
+
"step": 4825
|
33785 |
+
},
|
33786 |
+
{
|
33787 |
+
"epoch": 0.93,
|
33788 |
+
"grad_norm": 1.649187576200236,
|
33789 |
+
"learning_rate": 2.608320278208465e-07,
|
33790 |
+
"loss": 0.8201,
|
33791 |
+
"step": 4826
|
33792 |
+
},
|
33793 |
+
{
|
33794 |
+
"epoch": 0.93,
|
33795 |
+
"grad_norm": 1.8076495675440554,
|
33796 |
+
"learning_rate": 2.5941870597279705e-07,
|
33797 |
+
"loss": 0.8381,
|
33798 |
+
"step": 4827
|
33799 |
+
},
|
33800 |
+
{
|
33801 |
+
"epoch": 0.93,
|
33802 |
+
"grad_norm": 1.7049459231631452,
|
33803 |
+
"learning_rate": 2.5800917326521013e-07,
|
33804 |
+
"loss": 0.8039,
|
33805 |
+
"step": 4828
|
33806 |
+
},
|
33807 |
+
{
|
33808 |
+
"epoch": 0.93,
|
33809 |
+
"grad_norm": 0.9538485271628983,
|
33810 |
+
"learning_rate": 2.566034302464027e-07,
|
33811 |
+
"loss": 0.7581,
|
33812 |
+
"step": 4829
|
33813 |
+
},
|
33814 |
+
{
|
33815 |
+
"epoch": 0.93,
|
33816 |
+
"grad_norm": 1.6724880354656986,
|
33817 |
+
"learning_rate": 2.552014774632172e-07,
|
33818 |
+
"loss": 0.8479,
|
33819 |
+
"step": 4830
|
33820 |
+
},
|
33821 |
+
{
|
33822 |
+
"epoch": 0.93,
|
33823 |
+
"grad_norm": 1.8133591830513744,
|
33824 |
+
"learning_rate": 2.538033154610209e-07,
|
33825 |
+
"loss": 0.8705,
|
33826 |
+
"step": 4831
|
33827 |
+
},
|
33828 |
+
{
|
33829 |
+
"epoch": 0.93,
|
33830 |
+
"grad_norm": 1.6579651660565642,
|
33831 |
+
"learning_rate": 2.524089447837064e-07,
|
33832 |
+
"loss": 0.8498,
|
33833 |
+
"step": 4832
|
33834 |
+
},
|
33835 |
+
{
|
33836 |
+
"epoch": 0.93,
|
33837 |
+
"grad_norm": 1.6548338743351285,
|
33838 |
+
"learning_rate": 2.51018365973692e-07,
|
33839 |
+
"loss": 0.8748,
|
33840 |
+
"step": 4833
|
33841 |
+
},
|
33842 |
+
{
|
33843 |
+
"epoch": 0.93,
|
33844 |
+
"grad_norm": 1.525397157490326,
|
33845 |
+
"learning_rate": 2.496315795719195e-07,
|
33846 |
+
"loss": 0.8361,
|
33847 |
+
"step": 4834
|
33848 |
+
},
|
33849 |
+
{
|
33850 |
+
"epoch": 0.93,
|
33851 |
+
"grad_norm": 1.5920176646750865,
|
33852 |
+
"learning_rate": 2.482485861178563e-07,
|
33853 |
+
"loss": 0.8105,
|
33854 |
+
"step": 4835
|
33855 |
+
},
|
33856 |
+
{
|
33857 |
+
"epoch": 0.93,
|
33858 |
+
"grad_norm": 1.6429465465349558,
|
33859 |
+
"learning_rate": 2.4686938614949643e-07,
|
33860 |
+
"loss": 0.8905,
|
33861 |
+
"step": 4836
|
33862 |
+
},
|
33863 |
+
{
|
33864 |
+
"epoch": 0.93,
|
33865 |
+
"grad_norm": 1.7832098610576956,
|
33866 |
+
"learning_rate": 2.4549398020335625e-07,
|
33867 |
+
"loss": 0.8103,
|
33868 |
+
"step": 4837
|
33869 |
+
},
|
33870 |
+
{
|
33871 |
+
"epoch": 0.93,
|
33872 |
+
"grad_norm": 1.5345252846573398,
|
33873 |
+
"learning_rate": 2.441223688144745e-07,
|
33874 |
+
"loss": 0.8195,
|
33875 |
+
"step": 4838
|
33876 |
+
},
|
33877 |
+
{
|
33878 |
+
"epoch": 0.93,
|
33879 |
+
"grad_norm": 1.5290031237515256,
|
33880 |
+
"learning_rate": 2.4275455251641653e-07,
|
33881 |
+
"loss": 0.8786,
|
33882 |
+
"step": 4839
|
33883 |
+
},
|
33884 |
+
{
|
33885 |
+
"epoch": 0.93,
|
33886 |
+
"grad_norm": 1.58164443431506,
|
33887 |
+
"learning_rate": 2.4139053184127237e-07,
|
33888 |
+
"loss": 0.827,
|
33889 |
+
"step": 4840
|
33890 |
+
},
|
33891 |
+
{
|
33892 |
+
"epoch": 0.93,
|
33893 |
+
"grad_norm": 1.610597724461763,
|
33894 |
+
"learning_rate": 2.400303073196508e-07,
|
33895 |
+
"loss": 0.8535,
|
33896 |
+
"step": 4841
|
33897 |
+
},
|
33898 |
+
{
|
33899 |
+
"epoch": 0.93,
|
33900 |
+
"grad_norm": 1.3834549233361646,
|
33901 |
+
"learning_rate": 2.3867387948068865e-07,
|
33902 |
+
"loss": 0.799,
|
33903 |
+
"step": 4842
|
33904 |
+
},
|
33905 |
+
{
|
33906 |
+
"epoch": 0.93,
|
33907 |
+
"grad_norm": 1.8398115746259007,
|
33908 |
+
"learning_rate": 2.3732124885204266e-07,
|
33909 |
+
"loss": 0.8797,
|
33910 |
+
"step": 4843
|
33911 |
+
},
|
33912 |
+
{
|
33913 |
+
"epoch": 0.93,
|
33914 |
+
"grad_norm": 1.5963950703347418,
|
33915 |
+
"learning_rate": 2.3597241595989417e-07,
|
33916 |
+
"loss": 0.7603,
|
33917 |
+
"step": 4844
|
33918 |
+
},
|
33919 |
+
{
|
33920 |
+
"epoch": 0.93,
|
33921 |
+
"grad_norm": 1.5744137147113135,
|
33922 |
+
"learning_rate": 2.346273813289468e-07,
|
33923 |
+
"loss": 0.8245,
|
33924 |
+
"step": 4845
|
33925 |
+
},
|
33926 |
+
{
|
33927 |
+
"epoch": 0.93,
|
33928 |
+
"grad_norm": 1.4577636380990016,
|
33929 |
+
"learning_rate": 2.3328614548242646e-07,
|
33930 |
+
"loss": 0.8599,
|
33931 |
+
"step": 4846
|
33932 |
+
},
|
33933 |
+
{
|
33934 |
+
"epoch": 0.93,
|
33935 |
+
"grad_norm": 1.6929866898934611,
|
33936 |
+
"learning_rate": 2.3194870894208244e-07,
|
33937 |
+
"loss": 0.8307,
|
33938 |
+
"step": 4847
|
33939 |
+
},
|
33940 |
+
{
|
33941 |
+
"epoch": 0.93,
|
33942 |
+
"grad_norm": 1.5446532896341592,
|
33943 |
+
"learning_rate": 2.3061507222818303e-07,
|
33944 |
+
"loss": 0.8035,
|
33945 |
+
"step": 4848
|
33946 |
+
},
|
33947 |
+
{
|
33948 |
+
"epoch": 0.93,
|
33949 |
+
"grad_norm": 1.6311185078352026,
|
33950 |
+
"learning_rate": 2.2928523585952323e-07,
|
33951 |
+
"loss": 0.8205,
|
33952 |
+
"step": 4849
|
33953 |
+
},
|
33954 |
+
{
|
33955 |
+
"epoch": 0.93,
|
33956 |
+
"grad_norm": 1.5943013003594422,
|
33957 |
+
"learning_rate": 2.2795920035341258e-07,
|
33958 |
+
"loss": 0.9206,
|
33959 |
+
"step": 4850
|
33960 |
+
},
|
33961 |
+
{
|
33962 |
+
"epoch": 0.93,
|
33963 |
+
"grad_norm": 1.6240632118828648,
|
33964 |
+
"learning_rate": 2.2663696622568955e-07,
|
33965 |
+
"loss": 0.877,
|
33966 |
+
"step": 4851
|
33967 |
+
},
|
33968 |
+
{
|
33969 |
+
"epoch": 0.93,
|
33970 |
+
"grad_norm": 1.4939145654179917,
|
33971 |
+
"learning_rate": 2.2531853399070936e-07,
|
33972 |
+
"loss": 0.8568,
|
33973 |
+
"step": 4852
|
33974 |
+
},
|
33975 |
+
{
|
33976 |
+
"epoch": 0.93,
|
33977 |
+
"grad_norm": 1.5981329832684763,
|
33978 |
+
"learning_rate": 2.2400390416134953e-07,
|
33979 |
+
"loss": 0.8412,
|
33980 |
+
"step": 4853
|
33981 |
+
},
|
33982 |
+
{
|
33983 |
+
"epoch": 0.93,
|
33984 |
+
"grad_norm": 1.570133236687216,
|
33985 |
+
"learning_rate": 2.226930772490088e-07,
|
33986 |
+
"loss": 0.845,
|
33987 |
+
"step": 4854
|
33988 |
+
},
|
33989 |
+
{
|
33990 |
+
"epoch": 0.93,
|
33991 |
+
"grad_norm": 1.743169482849413,
|
33992 |
+
"learning_rate": 2.213860537636059e-07,
|
33993 |
+
"loss": 0.9209,
|
33994 |
+
"step": 4855
|
33995 |
+
},
|
33996 |
+
{
|
33997 |
+
"epoch": 0.94,
|
33998 |
+
"grad_norm": 1.6958034405374705,
|
33999 |
+
"learning_rate": 2.200828342135819e-07,
|
34000 |
+
"loss": 0.8887,
|
34001 |
+
"step": 4856
|
34002 |
+
},
|
34003 |
+
{
|
34004 |
+
"epoch": 0.94,
|
34005 |
+
"grad_norm": 1.5922805104822988,
|
34006 |
+
"learning_rate": 2.187834191058935e-07,
|
34007 |
+
"loss": 0.8502,
|
34008 |
+
"step": 4857
|
34009 |
+
},
|
34010 |
+
{
|
34011 |
+
"epoch": 0.94,
|
34012 |
+
"grad_norm": 1.518497807992221,
|
34013 |
+
"learning_rate": 2.1748780894602194e-07,
|
34014 |
+
"loss": 0.8574,
|
34015 |
+
"step": 4858
|
34016 |
+
},
|
34017 |
+
{
|
34018 |
+
"epoch": 0.94,
|
34019 |
+
"grad_norm": 1.686973118522028,
|
34020 |
+
"learning_rate": 2.1619600423796628e-07,
|
34021 |
+
"loss": 0.8445,
|
34022 |
+
"step": 4859
|
34023 |
+
},
|
34024 |
+
{
|
34025 |
+
"epoch": 0.94,
|
34026 |
+
"grad_norm": 1.6772664760048672,
|
34027 |
+
"learning_rate": 2.1490800548424672e-07,
|
34028 |
+
"loss": 0.8219,
|
34029 |
+
"step": 4860
|
34030 |
+
},
|
34031 |
+
{
|
34032 |
+
"epoch": 0.94,
|
34033 |
+
"grad_norm": 1.5617253901249581,
|
34034 |
+
"learning_rate": 2.136238131859014e-07,
|
34035 |
+
"loss": 0.8314,
|
34036 |
+
"step": 4861
|
34037 |
+
},
|
34038 |
+
{
|
34039 |
+
"epoch": 0.94,
|
34040 |
+
"grad_norm": 1.6075469863562961,
|
34041 |
+
"learning_rate": 2.123434278424885e-07,
|
34042 |
+
"loss": 0.8681,
|
34043 |
+
"step": 4862
|
34044 |
+
},
|
34045 |
+
{
|
34046 |
+
"epoch": 0.94,
|
34047 |
+
"grad_norm": 1.789235541183758,
|
34048 |
+
"learning_rate": 2.1106684995208626e-07,
|
34049 |
+
"loss": 0.8466,
|
34050 |
+
"step": 4863
|
34051 |
+
},
|
34052 |
+
{
|
34053 |
+
"epoch": 0.94,
|
34054 |
+
"grad_norm": 1.6447974400912926,
|
34055 |
+
"learning_rate": 2.0979408001128743e-07,
|
34056 |
+
"loss": 0.8522,
|
34057 |
+
"step": 4864
|
34058 |
+
},
|
34059 |
+
{
|
34060 |
+
"epoch": 0.94,
|
34061 |
+
"grad_norm": 1.765136819247867,
|
34062 |
+
"learning_rate": 2.085251185152093e-07,
|
34063 |
+
"loss": 0.8604,
|
34064 |
+
"step": 4865
|
34065 |
+
},
|
34066 |
+
{
|
34067 |
+
"epoch": 0.94,
|
34068 |
+
"grad_norm": 1.6266851116541106,
|
34069 |
+
"learning_rate": 2.0725996595748366e-07,
|
34070 |
+
"loss": 0.8664,
|
34071 |
+
"step": 4866
|
34072 |
+
},
|
34073 |
+
{
|
34074 |
+
"epoch": 0.94,
|
34075 |
+
"grad_norm": 1.67819672473352,
|
34076 |
+
"learning_rate": 2.059986228302624e-07,
|
34077 |
+
"loss": 0.7761,
|
34078 |
+
"step": 4867
|
34079 |
+
},
|
34080 |
+
{
|
34081 |
+
"epoch": 0.94,
|
34082 |
+
"grad_norm": 1.65969174055999,
|
34083 |
+
"learning_rate": 2.0474108962421524e-07,
|
34084 |
+
"loss": 0.8423,
|
34085 |
+
"step": 4868
|
34086 |
+
},
|
34087 |
+
{
|
34088 |
+
"epoch": 0.94,
|
34089 |
+
"grad_norm": 1.5079462034803506,
|
34090 |
+
"learning_rate": 2.0348736682852864e-07,
|
34091 |
+
"loss": 0.7754,
|
34092 |
+
"step": 4869
|
34093 |
+
},
|
34094 |
+
{
|
34095 |
+
"epoch": 0.94,
|
34096 |
+
"grad_norm": 1.6052162598092945,
|
34097 |
+
"learning_rate": 2.022374549309103e-07,
|
34098 |
+
"loss": 0.8417,
|
34099 |
+
"step": 4870
|
34100 |
+
},
|
34101 |
+
{
|
34102 |
+
"epoch": 0.94,
|
34103 |
+
"grad_norm": 1.7703126547521626,
|
34104 |
+
"learning_rate": 2.00991354417579e-07,
|
34105 |
+
"loss": 0.9103,
|
34106 |
+
"step": 4871
|
34107 |
+
},
|
34108 |
+
{
|
34109 |
+
"epoch": 0.94,
|
34110 |
+
"grad_norm": 1.7526946610459049,
|
34111 |
+
"learning_rate": 1.9974906577327813e-07,
|
34112 |
+
"loss": 0.8527,
|
34113 |
+
"step": 4872
|
34114 |
+
},
|
34115 |
+
{
|
34116 |
+
"epoch": 0.94,
|
34117 |
+
"grad_norm": 1.9231864358048003,
|
34118 |
+
"learning_rate": 1.9851058948126333e-07,
|
34119 |
+
"loss": 0.8165,
|
34120 |
+
"step": 4873
|
34121 |
+
},
|
34122 |
+
{
|
34123 |
+
"epoch": 0.94,
|
34124 |
+
"grad_norm": 1.6876394188237012,
|
34125 |
+
"learning_rate": 1.9727592602330926e-07,
|
34126 |
+
"loss": 0.8579,
|
34127 |
+
"step": 4874
|
34128 |
+
},
|
34129 |
+
{
|
34130 |
+
"epoch": 0.94,
|
34131 |
+
"grad_norm": 1.546043471808283,
|
34132 |
+
"learning_rate": 1.9604507587970612e-07,
|
34133 |
+
"loss": 0.7516,
|
34134 |
+
"step": 4875
|
34135 |
+
},
|
34136 |
+
{
|
34137 |
+
"epoch": 0.94,
|
34138 |
+
"grad_norm": 1.8330467033982552,
|
34139 |
+
"learning_rate": 1.9481803952926314e-07,
|
34140 |
+
"loss": 0.8899,
|
34141 |
+
"step": 4876
|
34142 |
+
},
|
34143 |
+
{
|
34144 |
+
"epoch": 0.94,
|
34145 |
+
"grad_norm": 1.6924749568421145,
|
34146 |
+
"learning_rate": 1.9359481744930297e-07,
|
34147 |
+
"loss": 0.831,
|
34148 |
+
"step": 4877
|
34149 |
+
},
|
34150 |
+
{
|
34151 |
+
"epoch": 0.94,
|
34152 |
+
"grad_norm": 1.7045527025533789,
|
34153 |
+
"learning_rate": 1.9237541011566606e-07,
|
34154 |
+
"loss": 0.8909,
|
34155 |
+
"step": 4878
|
34156 |
+
},
|
34157 |
+
{
|
34158 |
+
"epoch": 0.94,
|
34159 |
+
"grad_norm": 1.6507040101580346,
|
34160 |
+
"learning_rate": 1.9115981800270855e-07,
|
34161 |
+
"loss": 0.8436,
|
34162 |
+
"step": 4879
|
34163 |
+
},
|
34164 |
+
{
|
34165 |
+
"epoch": 0.94,
|
34166 |
+
"grad_norm": 1.510467164138072,
|
34167 |
+
"learning_rate": 1.8994804158330327e-07,
|
34168 |
+
"loss": 0.7436,
|
34169 |
+
"step": 4880
|
34170 |
+
},
|
34171 |
+
{
|
34172 |
+
"epoch": 0.94,
|
34173 |
+
"grad_norm": 1.4970798059476809,
|
34174 |
+
"learning_rate": 1.8874008132883646e-07,
|
34175 |
+
"loss": 0.8396,
|
34176 |
+
"step": 4881
|
34177 |
+
},
|
34178 |
+
{
|
34179 |
+
"epoch": 0.94,
|
34180 |
+
"grad_norm": 1.7338623941791689,
|
34181 |
+
"learning_rate": 1.875359377092134e-07,
|
34182 |
+
"loss": 0.8812,
|
34183 |
+
"step": 4882
|
34184 |
+
},
|
34185 |
+
{
|
34186 |
+
"epoch": 0.94,
|
34187 |
+
"grad_norm": 1.6861050115544627,
|
34188 |
+
"learning_rate": 1.863356111928516e-07,
|
34189 |
+
"loss": 0.8854,
|
34190 |
+
"step": 4883
|
34191 |
+
},
|
34192 |
+
{
|
34193 |
+
"epoch": 0.94,
|
34194 |
+
"grad_norm": 1.526204807597603,
|
34195 |
+
"learning_rate": 1.851391022466853e-07,
|
34196 |
+
"loss": 0.858,
|
34197 |
+
"step": 4884
|
34198 |
+
},
|
34199 |
+
{
|
34200 |
+
"epoch": 0.94,
|
34201 |
+
"grad_norm": 1.6976895766365234,
|
34202 |
+
"learning_rate": 1.839464113361611e-07,
|
34203 |
+
"loss": 0.7955,
|
34204 |
+
"step": 4885
|
34205 |
+
},
|
34206 |
+
{
|
34207 |
+
"epoch": 0.94,
|
34208 |
+
"grad_norm": 1.6559912626039093,
|
34209 |
+
"learning_rate": 1.827575389252456e-07,
|
34210 |
+
"loss": 0.8226,
|
34211 |
+
"step": 4886
|
34212 |
+
},
|
34213 |
+
{
|
34214 |
+
"epoch": 0.94,
|
34215 |
+
"grad_norm": 1.622119688685812,
|
34216 |
+
"learning_rate": 1.815724854764145e-07,
|
34217 |
+
"loss": 0.8671,
|
34218 |
+
"step": 4887
|
34219 |
+
},
|
34220 |
+
{
|
34221 |
+
"epoch": 0.94,
|
34222 |
+
"grad_norm": 1.5437991175471397,
|
34223 |
+
"learning_rate": 1.8039125145066115e-07,
|
34224 |
+
"loss": 0.8028,
|
34225 |
+
"step": 4888
|
34226 |
+
},
|
34227 |
+
{
|
34228 |
+
"epoch": 0.94,
|
34229 |
+
"grad_norm": 1.723305371104265,
|
34230 |
+
"learning_rate": 1.7921383730749143e-07,
|
34231 |
+
"loss": 0.8234,
|
34232 |
+
"step": 4889
|
34233 |
+
},
|
34234 |
+
{
|
34235 |
+
"epoch": 0.94,
|
34236 |
+
"grad_norm": 1.6710362217257508,
|
34237 |
+
"learning_rate": 1.7804024350492778e-07,
|
34238 |
+
"loss": 0.8893,
|
34239 |
+
"step": 4890
|
34240 |
+
},
|
34241 |
+
{
|
34242 |
+
"epoch": 0.94,
|
34243 |
+
"grad_norm": 1.6106969878587862,
|
34244 |
+
"learning_rate": 1.768704704995028e-07,
|
34245 |
+
"loss": 0.8756,
|
34246 |
+
"step": 4891
|
34247 |
+
},
|
34248 |
+
{
|
34249 |
+
"epoch": 0.94,
|
34250 |
+
"grad_norm": 1.6065165768298206,
|
34251 |
+
"learning_rate": 1.7570451874626583e-07,
|
34252 |
+
"loss": 0.7772,
|
34253 |
+
"step": 4892
|
34254 |
+
},
|
34255 |
+
{
|
34256 |
+
"epoch": 0.94,
|
34257 |
+
"grad_norm": 1.629084990697285,
|
34258 |
+
"learning_rate": 1.745423886987785e-07,
|
34259 |
+
"loss": 0.8493,
|
34260 |
+
"step": 4893
|
34261 |
+
},
|
34262 |
+
{
|
34263 |
+
"epoch": 0.94,
|
34264 |
+
"grad_norm": 1.7264309930868569,
|
34265 |
+
"learning_rate": 1.7338408080911473e-07,
|
34266 |
+
"loss": 0.8663,
|
34267 |
+
"step": 4894
|
34268 |
+
},
|
34269 |
+
{
|
34270 |
+
"epoch": 0.94,
|
34271 |
+
"grad_norm": 1.7192802669576013,
|
34272 |
+
"learning_rate": 1.7222959552786412e-07,
|
34273 |
+
"loss": 0.8826,
|
34274 |
+
"step": 4895
|
34275 |
+
},
|
34276 |
+
{
|
34277 |
+
"epoch": 0.94,
|
34278 |
+
"grad_norm": 1.5667756101128578,
|
34279 |
+
"learning_rate": 1.7107893330412738e-07,
|
34280 |
+
"loss": 0.7617,
|
34281 |
+
"step": 4896
|
34282 |
+
},
|
34283 |
+
{
|
34284 |
+
"epoch": 0.94,
|
34285 |
+
"grad_norm": 1.6970738721179803,
|
34286 |
+
"learning_rate": 1.6993209458551763e-07,
|
34287 |
+
"loss": 0.9408,
|
34288 |
+
"step": 4897
|
34289 |
+
},
|
34290 |
+
{
|
34291 |
+
"epoch": 0.94,
|
34292 |
+
"grad_norm": 1.5125985162571438,
|
34293 |
+
"learning_rate": 1.687890798181635e-07,
|
34294 |
+
"loss": 0.8234,
|
34295 |
+
"step": 4898
|
34296 |
+
},
|
34297 |
+
{
|
34298 |
+
"epoch": 0.94,
|
34299 |
+
"grad_norm": 1.5611681895794751,
|
34300 |
+
"learning_rate": 1.6764988944670158e-07,
|
34301 |
+
"loss": 0.8612,
|
34302 |
+
"step": 4899
|
34303 |
+
},
|
34304 |
+
{
|
34305 |
+
"epoch": 0.94,
|
34306 |
+
"grad_norm": 1.725702892372231,
|
34307 |
+
"learning_rate": 1.6651452391428736e-07,
|
34308 |
+
"loss": 0.9418,
|
34309 |
+
"step": 4900
|
34310 |
+
},
|
34311 |
+
{
|
34312 |
+
"epoch": 0.94,
|
34313 |
+
"grad_norm": 1.6391013469838338,
|
34314 |
+
"learning_rate": 1.6538298366257975e-07,
|
34315 |
+
"loss": 0.7918,
|
34316 |
+
"step": 4901
|
34317 |
+
},
|
34318 |
+
{
|
34319 |
+
"epoch": 0.94,
|
34320 |
+
"grad_norm": 1.661758037273611,
|
34321 |
+
"learning_rate": 1.6425526913175672e-07,
|
34322 |
+
"loss": 0.8399,
|
34323 |
+
"step": 4902
|
34324 |
+
},
|
34325 |
+
{
|
34326 |
+
"epoch": 0.94,
|
34327 |
+
"grad_norm": 1.5954579453399507,
|
34328 |
+
"learning_rate": 1.6313138076050505e-07,
|
34329 |
+
"loss": 0.8521,
|
34330 |
+
"step": 4903
|
34331 |
+
},
|
34332 |
+
{
|
34333 |
+
"epoch": 0.94,
|
34334 |
+
"grad_norm": 1.602569704425456,
|
34335 |
+
"learning_rate": 1.6201131898602284e-07,
|
34336 |
+
"loss": 0.8762,
|
34337 |
+
"step": 4904
|
34338 |
+
},
|
34339 |
+
{
|
34340 |
+
"epoch": 0.94,
|
34341 |
+
"grad_norm": 1.6726770139591156,
|
34342 |
+
"learning_rate": 1.6089508424402156e-07,
|
34343 |
+
"loss": 0.8862,
|
34344 |
+
"step": 4905
|
34345 |
+
},
|
34346 |
+
{
|
34347 |
+
"epoch": 0.94,
|
34348 |
+
"grad_norm": 1.720165892486497,
|
34349 |
+
"learning_rate": 1.5978267696872274e-07,
|
34350 |
+
"loss": 0.8524,
|
34351 |
+
"step": 4906
|
34352 |
+
},
|
34353 |
+
{
|
34354 |
+
"epoch": 0.94,
|
34355 |
+
"grad_norm": 1.5200991395166976,
|
34356 |
+
"learning_rate": 1.5867409759285802e-07,
|
34357 |
+
"loss": 0.8561,
|
34358 |
+
"step": 4907
|
34359 |
+
},
|
34360 |
+
{
|
34361 |
+
"epoch": 0.95,
|
34362 |
+
"grad_norm": 1.6590015797171926,
|
34363 |
+
"learning_rate": 1.5756934654767243e-07,
|
34364 |
+
"loss": 0.8831,
|
34365 |
+
"step": 4908
|
34366 |
+
},
|
34367 |
+
{
|
34368 |
+
"epoch": 0.95,
|
34369 |
+
"grad_norm": 1.556760071346181,
|
34370 |
+
"learning_rate": 1.5646842426291775e-07,
|
34371 |
+
"loss": 0.8126,
|
34372 |
+
"step": 4909
|
34373 |
+
},
|
34374 |
+
{
|
34375 |
+
"epoch": 0.95,
|
34376 |
+
"grad_norm": 1.6475807742135788,
|
34377 |
+
"learning_rate": 1.5537133116686142e-07,
|
34378 |
+
"loss": 0.7223,
|
34379 |
+
"step": 4910
|
34380 |
+
},
|
34381 |
+
{
|
34382 |
+
"epoch": 0.95,
|
34383 |
+
"grad_norm": 1.6864491977631575,
|
34384 |
+
"learning_rate": 1.542780676862776e-07,
|
34385 |
+
"loss": 0.8824,
|
34386 |
+
"step": 4911
|
34387 |
+
},
|
34388 |
+
{
|
34389 |
+
"epoch": 0.95,
|
34390 |
+
"grad_norm": 1.4541071162068364,
|
34391 |
+
"learning_rate": 1.531886342464517e-07,
|
34392 |
+
"loss": 0.7289,
|
34393 |
+
"step": 4912
|
34394 |
+
},
|
34395 |
+
{
|
34396 |
+
"epoch": 0.95,
|
34397 |
+
"grad_norm": 1.6553024079033145,
|
34398 |
+
"learning_rate": 1.521030312711802e-07,
|
34399 |
+
"loss": 0.8028,
|
34400 |
+
"step": 4913
|
34401 |
+
},
|
34402 |
+
{
|
34403 |
+
"epoch": 0.95,
|
34404 |
+
"grad_norm": 1.5381855897941494,
|
34405 |
+
"learning_rate": 1.5102125918276978e-07,
|
34406 |
+
"loss": 0.8452,
|
34407 |
+
"step": 4914
|
34408 |
+
},
|
34409 |
+
{
|
34410 |
+
"epoch": 0.95,
|
34411 |
+
"grad_norm": 1.81594511213681,
|
34412 |
+
"learning_rate": 1.499433184020327e-07,
|
34413 |
+
"loss": 0.9022,
|
34414 |
+
"step": 4915
|
34415 |
+
},
|
34416 |
+
{
|
34417 |
+
"epoch": 0.95,
|
34418 |
+
"grad_norm": 1.6259255253716556,
|
34419 |
+
"learning_rate": 1.4886920934829574e-07,
|
34420 |
+
"loss": 0.8288,
|
34421 |
+
"step": 4916
|
34422 |
+
},
|
34423 |
+
{
|
34424 |
+
"epoch": 0.95,
|
34425 |
+
"grad_norm": 1.6591298059831057,
|
34426 |
+
"learning_rate": 1.4779893243939358e-07,
|
34427 |
+
"loss": 0.8641,
|
34428 |
+
"step": 4917
|
34429 |
+
},
|
34430 |
+
{
|
34431 |
+
"epoch": 0.95,
|
34432 |
+
"grad_norm": 1.6529473509522685,
|
34433 |
+
"learning_rate": 1.4673248809166984e-07,
|
34434 |
+
"loss": 0.7588,
|
34435 |
+
"step": 4918
|
34436 |
+
},
|
34437 |
+
{
|
34438 |
+
"epoch": 0.95,
|
34439 |
+
"grad_norm": 1.6004992764307049,
|
34440 |
+
"learning_rate": 1.45669876719976e-07,
|
34441 |
+
"loss": 0.803,
|
34442 |
+
"step": 4919
|
34443 |
+
},
|
34444 |
+
{
|
34445 |
+
"epoch": 0.95,
|
34446 |
+
"grad_norm": 1.6114035564591866,
|
34447 |
+
"learning_rate": 1.4461109873767587e-07,
|
34448 |
+
"loss": 0.8716,
|
34449 |
+
"step": 4920
|
34450 |
+
},
|
34451 |
+
{
|
34452 |
+
"epoch": 0.95,
|
34453 |
+
"grad_norm": 1.6912543405825373,
|
34454 |
+
"learning_rate": 1.4355615455663884e-07,
|
34455 |
+
"loss": 0.7688,
|
34456 |
+
"step": 4921
|
34457 |
+
},
|
34458 |
+
{
|
34459 |
+
"epoch": 0.95,
|
34460 |
+
"grad_norm": 0.9525310032028005,
|
34461 |
+
"learning_rate": 1.4250504458724336e-07,
|
34462 |
+
"loss": 0.8375,
|
34463 |
+
"step": 4922
|
34464 |
+
},
|
34465 |
+
{
|
34466 |
+
"epoch": 0.95,
|
34467 |
+
"grad_norm": 1.8051419365888983,
|
34468 |
+
"learning_rate": 1.4145776923837895e-07,
|
34469 |
+
"loss": 0.9364,
|
34470 |
+
"step": 4923
|
34471 |
+
},
|
34472 |
+
{
|
34473 |
+
"epoch": 0.95,
|
34474 |
+
"grad_norm": 1.7084739920204828,
|
34475 |
+
"learning_rate": 1.4041432891743978e-07,
|
34476 |
+
"loss": 0.9089,
|
34477 |
+
"step": 4924
|
34478 |
+
},
|
34479 |
+
{
|
34480 |
+
"epoch": 0.95,
|
34481 |
+
"grad_norm": 1.6253733176066043,
|
34482 |
+
"learning_rate": 1.3937472403033002e-07,
|
34483 |
+
"loss": 0.8488,
|
34484 |
+
"step": 4925
|
34485 |
+
},
|
34486 |
+
{
|
34487 |
+
"epoch": 0.95,
|
34488 |
+
"grad_norm": 1.7310556112020248,
|
34489 |
+
"learning_rate": 1.3833895498146287e-07,
|
34490 |
+
"loss": 0.9087,
|
34491 |
+
"step": 4926
|
34492 |
+
},
|
34493 |
+
{
|
34494 |
+
"epoch": 0.95,
|
34495 |
+
"grad_norm": 1.6576546514999582,
|
34496 |
+
"learning_rate": 1.3730702217375825e-07,
|
34497 |
+
"loss": 0.8847,
|
34498 |
+
"step": 4927
|
34499 |
+
},
|
34500 |
+
{
|
34501 |
+
"epoch": 0.95,
|
34502 |
+
"grad_norm": 1.6200150482231441,
|
34503 |
+
"learning_rate": 1.362789260086439e-07,
|
34504 |
+
"loss": 0.7445,
|
34505 |
+
"step": 4928
|
34506 |
+
},
|
34507 |
+
{
|
34508 |
+
"epoch": 0.95,
|
34509 |
+
"grad_norm": 1.5773998945496541,
|
34510 |
+
"learning_rate": 1.3525466688605328e-07,
|
34511 |
+
"loss": 0.7787,
|
34512 |
+
"step": 4929
|
34513 |
+
},
|
34514 |
+
{
|
34515 |
+
"epoch": 0.95,
|
34516 |
+
"grad_norm": 1.629333696277938,
|
34517 |
+
"learning_rate": 1.3423424520443095e-07,
|
34518 |
+
"loss": 0.9095,
|
34519 |
+
"step": 4930
|
34520 |
+
},
|
34521 |
+
{
|
34522 |
+
"epoch": 0.95,
|
34523 |
+
"grad_norm": 1.6469354057372534,
|
34524 |
+
"learning_rate": 1.33217661360725e-07,
|
34525 |
+
"loss": 0.8278,
|
34526 |
+
"step": 4931
|
34527 |
+
},
|
34528 |
+
{
|
34529 |
+
"epoch": 0.95,
|
34530 |
+
"grad_norm": 1.5712895075456599,
|
34531 |
+
"learning_rate": 1.3220491575039352e-07,
|
34532 |
+
"loss": 0.8249,
|
34533 |
+
"step": 4932
|
34534 |
+
},
|
34535 |
+
{
|
34536 |
+
"epoch": 0.95,
|
34537 |
+
"grad_norm": 1.6775311526534245,
|
34538 |
+
"learning_rate": 1.3119600876739913e-07,
|
34539 |
+
"loss": 0.8519,
|
34540 |
+
"step": 4933
|
34541 |
+
},
|
34542 |
+
{
|
34543 |
+
"epoch": 0.95,
|
34544 |
+
"grad_norm": 1.7074824689906087,
|
34545 |
+
"learning_rate": 1.3019094080421236e-07,
|
34546 |
+
"loss": 0.8262,
|
34547 |
+
"step": 4934
|
34548 |
+
},
|
34549 |
+
{
|
34550 |
+
"epoch": 0.95,
|
34551 |
+
"grad_norm": 1.5978239435800343,
|
34552 |
+
"learning_rate": 1.2918971225181265e-07,
|
34553 |
+
"loss": 0.8392,
|
34554 |
+
"step": 4935
|
34555 |
+
},
|
34556 |
+
{
|
34557 |
+
"epoch": 0.95,
|
34558 |
+
"grad_norm": 1.0103492039735638,
|
34559 |
+
"learning_rate": 1.2819232349967958e-07,
|
34560 |
+
"loss": 0.8086,
|
34561 |
+
"step": 4936
|
34562 |
+
},
|
34563 |
+
{
|
34564 |
+
"epoch": 0.95,
|
34565 |
+
"grad_norm": 1.5515066332509084,
|
34566 |
+
"learning_rate": 1.2719877493580613e-07,
|
34567 |
+
"loss": 0.8259,
|
34568 |
+
"step": 4937
|
34569 |
+
},
|
34570 |
+
{
|
34571 |
+
"epoch": 0.95,
|
34572 |
+
"grad_norm": 0.9456664714615861,
|
34573 |
+
"learning_rate": 1.2620906694668533e-07,
|
34574 |
+
"loss": 0.8409,
|
34575 |
+
"step": 4938
|
34576 |
+
},
|
34577 |
+
{
|
34578 |
+
"epoch": 0.95,
|
34579 |
+
"grad_norm": 1.716993131432421,
|
34580 |
+
"learning_rate": 1.2522319991732256e-07,
|
34581 |
+
"loss": 0.8065,
|
34582 |
+
"step": 4939
|
34583 |
+
},
|
34584 |
+
{
|
34585 |
+
"epoch": 0.95,
|
34586 |
+
"grad_norm": 1.519584786287795,
|
34587 |
+
"learning_rate": 1.242411742312233e-07,
|
34588 |
+
"loss": 0.7987,
|
34589 |
+
"step": 4940
|
34590 |
+
},
|
34591 |
+
{
|
34592 |
+
"epoch": 0.95,
|
34593 |
+
"grad_norm": 1.814988108224232,
|
34594 |
+
"learning_rate": 1.232629902704008e-07,
|
34595 |
+
"loss": 0.7686,
|
34596 |
+
"step": 4941
|
34597 |
+
},
|
34598 |
+
{
|
34599 |
+
"epoch": 0.95,
|
34600 |
+
"grad_norm": 1.587242203219095,
|
34601 |
+
"learning_rate": 1.2228864841537512e-07,
|
34602 |
+
"loss": 0.8539,
|
34603 |
+
"step": 4942
|
34604 |
+
},
|
34605 |
+
{
|
34606 |
+
"epoch": 0.95,
|
34607 |
+
"grad_norm": 1.5219598591187173,
|
34608 |
+
"learning_rate": 1.2131814904517093e-07,
|
34609 |
+
"loss": 0.8002,
|
34610 |
+
"step": 4943
|
34611 |
+
},
|
34612 |
+
{
|
34613 |
+
"epoch": 0.95,
|
34614 |
+
"grad_norm": 1.7067301828543635,
|
34615 |
+
"learning_rate": 1.2035149253731615e-07,
|
34616 |
+
"loss": 0.8167,
|
34617 |
+
"step": 4944
|
34618 |
+
},
|
34619 |
+
{
|
34620 |
+
"epoch": 0.95,
|
34621 |
+
"grad_norm": 1.4780687729376636,
|
34622 |
+
"learning_rate": 1.1938867926784782e-07,
|
34623 |
+
"loss": 0.8022,
|
34624 |
+
"step": 4945
|
34625 |
+
},
|
34626 |
+
{
|
34627 |
+
"epoch": 0.95,
|
34628 |
+
"grad_norm": 1.8245418722982043,
|
34629 |
+
"learning_rate": 1.1842970961130406e-07,
|
34630 |
+
"loss": 0.9316,
|
34631 |
+
"step": 4946
|
34632 |
+
},
|
34633 |
+
{
|
34634 |
+
"epoch": 0.95,
|
34635 |
+
"grad_norm": 1.8012418885011572,
|
34636 |
+
"learning_rate": 1.1747458394072986e-07,
|
34637 |
+
"loss": 0.8605,
|
34638 |
+
"step": 4947
|
34639 |
+
},
|
34640 |
+
{
|
34641 |
+
"epoch": 0.95,
|
34642 |
+
"grad_norm": 1.6095664936474214,
|
34643 |
+
"learning_rate": 1.1652330262767353e-07,
|
34644 |
+
"loss": 0.8166,
|
34645 |
+
"step": 4948
|
34646 |
+
},
|
34647 |
+
{
|
34648 |
+
"epoch": 0.95,
|
34649 |
+
"grad_norm": 1.7146979682040404,
|
34650 |
+
"learning_rate": 1.1557586604219129e-07,
|
34651 |
+
"loss": 0.8968,
|
34652 |
+
"step": 4949
|
34653 |
+
},
|
34654 |
+
{
|
34655 |
+
"epoch": 0.95,
|
34656 |
+
"grad_norm": 1.5895056850751075,
|
34657 |
+
"learning_rate": 1.1463227455283943e-07,
|
34658 |
+
"loss": 0.8926,
|
34659 |
+
"step": 4950
|
34660 |
+
},
|
34661 |
+
{
|
34662 |
+
"epoch": 0.95,
|
34663 |
+
"grad_norm": 1.5541333797103272,
|
34664 |
+
"learning_rate": 1.13692528526681e-07,
|
34665 |
+
"loss": 0.7857,
|
34666 |
+
"step": 4951
|
34667 |
+
},
|
34668 |
+
{
|
34669 |
+
"epoch": 0.95,
|
34670 |
+
"grad_norm": 1.5896127496722006,
|
34671 |
+
"learning_rate": 1.1275662832928135e-07,
|
34672 |
+
"loss": 0.8841,
|
34673 |
+
"step": 4952
|
34674 |
+
},
|
34675 |
+
{
|
34676 |
+
"epoch": 0.95,
|
34677 |
+
"grad_norm": 1.4811800456998214,
|
34678 |
+
"learning_rate": 1.118245743247115e-07,
|
34679 |
+
"loss": 0.7616,
|
34680 |
+
"step": 4953
|
34681 |
+
},
|
34682 |
+
{
|
34683 |
+
"epoch": 0.95,
|
34684 |
+
"grad_norm": 1.4505754590481381,
|
34685 |
+
"learning_rate": 1.1089636687554583e-07,
|
34686 |
+
"loss": 0.837,
|
34687 |
+
"step": 4954
|
34688 |
+
},
|
34689 |
+
{
|
34690 |
+
"epoch": 0.95,
|
34691 |
+
"grad_norm": 1.5661718657308172,
|
34692 |
+
"learning_rate": 1.099720063428622e-07,
|
34693 |
+
"loss": 0.8273,
|
34694 |
+
"step": 4955
|
34695 |
+
},
|
34696 |
+
{
|
34697 |
+
"epoch": 0.95,
|
34698 |
+
"grad_norm": 0.950799488866897,
|
34699 |
+
"learning_rate": 1.0905149308624185e-07,
|
34700 |
+
"loss": 0.7992,
|
34701 |
+
"step": 4956
|
34702 |
+
},
|
34703 |
+
{
|
34704 |
+
"epoch": 0.95,
|
34705 |
+
"grad_norm": 1.6311388423421525,
|
34706 |
+
"learning_rate": 1.0813482746377058e-07,
|
34707 |
+
"loss": 0.8573,
|
34708 |
+
"step": 4957
|
34709 |
+
},
|
34710 |
+
{
|
34711 |
+
"epoch": 0.95,
|
34712 |
+
"grad_norm": 1.5762042142140522,
|
34713 |
+
"learning_rate": 1.0722200983203423e-07,
|
34714 |
+
"loss": 0.7506,
|
34715 |
+
"step": 4958
|
34716 |
+
},
|
34717 |
+
{
|
34718 |
+
"epoch": 0.95,
|
34719 |
+
"grad_norm": 1.7399002723266332,
|
34720 |
+
"learning_rate": 1.0631304054612545e-07,
|
34721 |
+
"loss": 0.8777,
|
34722 |
+
"step": 4959
|
34723 |
+
},
|
34724 |
+
{
|
34725 |
+
"epoch": 0.96,
|
34726 |
+
"grad_norm": 1.64896309366328,
|
34727 |
+
"learning_rate": 1.0540791995963917e-07,
|
34728 |
+
"loss": 0.8091,
|
34729 |
+
"step": 4960
|
34730 |
+
},
|
34731 |
+
{
|
34732 |
+
"epoch": 0.96,
|
34733 |
+
"grad_norm": 1.6609498470045294,
|
34734 |
+
"learning_rate": 1.045066484246704e-07,
|
34735 |
+
"loss": 0.7811,
|
34736 |
+
"step": 4961
|
34737 |
+
},
|
34738 |
+
{
|
34739 |
+
"epoch": 0.96,
|
34740 |
+
"grad_norm": 1.0063699978672147,
|
34741 |
+
"learning_rate": 1.036092262918198e-07,
|
34742 |
+
"loss": 0.8015,
|
34743 |
+
"step": 4962
|
34744 |
+
},
|
34745 |
+
{
|
34746 |
+
"epoch": 0.96,
|
34747 |
+
"grad_norm": 1.6084066802033907,
|
34748 |
+
"learning_rate": 1.0271565391018922e-07,
|
34749 |
+
"loss": 0.8932,
|
34750 |
+
"step": 4963
|
34751 |
+
},
|
34752 |
+
{
|
34753 |
+
"epoch": 0.96,
|
34754 |
+
"grad_norm": 1.6165487237067675,
|
34755 |
+
"learning_rate": 1.0182593162738508e-07,
|
34756 |
+
"loss": 0.8962,
|
34757 |
+
"step": 4964
|
34758 |
+
},
|
34759 |
+
{
|
34760 |
+
"epoch": 0.96,
|
34761 |
+
"grad_norm": 1.6049658161860338,
|
34762 |
+
"learning_rate": 1.009400597895116e-07,
|
34763 |
+
"loss": 0.8494,
|
34764 |
+
"step": 4965
|
34765 |
+
},
|
34766 |
+
{
|
34767 |
+
"epoch": 0.96,
|
34768 |
+
"grad_norm": 1.6258440115836938,
|
34769 |
+
"learning_rate": 1.0005803874117981e-07,
|
34770 |
+
"loss": 0.8588,
|
34771 |
+
"step": 4966
|
34772 |
+
},
|
34773 |
+
{
|
34774 |
+
"epoch": 0.96,
|
34775 |
+
"grad_norm": 1.6540136388403845,
|
34776 |
+
"learning_rate": 9.917986882549968e-08,
|
34777 |
+
"loss": 0.8433,
|
34778 |
+
"step": 4967
|
34779 |
+
},
|
34780 |
+
{
|
34781 |
+
"epoch": 0.96,
|
34782 |
+
"grad_norm": 1.6530462586563348,
|
34783 |
+
"learning_rate": 9.830555038408463e-08,
|
34784 |
+
"loss": 0.856,
|
34785 |
+
"step": 4968
|
34786 |
+
},
|
34787 |
+
{
|
34788 |
+
"epoch": 0.96,
|
34789 |
+
"grad_norm": 1.4860983635785385,
|
34790 |
+
"learning_rate": 9.743508375704924e-08,
|
34791 |
+
"loss": 0.6871,
|
34792 |
+
"step": 4969
|
34793 |
+
},
|
34794 |
+
{
|
34795 |
+
"epoch": 0.96,
|
34796 |
+
"grad_norm": 1.5507562812035725,
|
34797 |
+
"learning_rate": 9.656846928300934e-08,
|
34798 |
+
"loss": 0.8492,
|
34799 |
+
"step": 4970
|
34800 |
+
},
|
34801 |
+
{
|
34802 |
+
"epoch": 0.96,
|
34803 |
+
"grad_norm": 1.6346673988974738,
|
34804 |
+
"learning_rate": 9.570570729908412e-08,
|
34805 |
+
"loss": 0.8396,
|
34806 |
+
"step": 4971
|
34807 |
+
},
|
34808 |
+
{
|
34809 |
+
"epoch": 0.96,
|
34810 |
+
"grad_norm": 1.7368013133597024,
|
34811 |
+
"learning_rate": 9.484679814089293e-08,
|
34812 |
+
"loss": 0.9203,
|
34813 |
+
"step": 4972
|
34814 |
+
},
|
34815 |
+
{
|
34816 |
+
"epoch": 0.96,
|
34817 |
+
"grad_norm": 1.6340143749831773,
|
34818 |
+
"learning_rate": 9.399174214255513e-08,
|
34819 |
+
"loss": 0.8939,
|
34820 |
+
"step": 4973
|
34821 |
+
},
|
34822 |
+
{
|
34823 |
+
"epoch": 0.96,
|
34824 |
+
"grad_norm": 1.707005458041116,
|
34825 |
+
"learning_rate": 9.314053963669245e-08,
|
34826 |
+
"loss": 0.7957,
|
34827 |
+
"step": 4974
|
34828 |
+
},
|
34829 |
+
{
|
34830 |
+
"epoch": 0.96,
|
34831 |
+
"grad_norm": 1.6392193096320475,
|
34832 |
+
"learning_rate": 9.22931909544278e-08,
|
34833 |
+
"loss": 0.8707,
|
34834 |
+
"step": 4975
|
34835 |
+
},
|
34836 |
+
{
|
34837 |
+
"epoch": 0.96,
|
34838 |
+
"grad_norm": 1.587703446312483,
|
34839 |
+
"learning_rate": 9.144969642538415e-08,
|
34840 |
+
"loss": 0.768,
|
34841 |
+
"step": 4976
|
34842 |
+
},
|
34843 |
+
{
|
34844 |
+
"epoch": 0.96,
|
34845 |
+
"grad_norm": 1.635947063395266,
|
34846 |
+
"learning_rate": 9.061005637768571e-08,
|
34847 |
+
"loss": 0.8609,
|
34848 |
+
"step": 4977
|
34849 |
+
},
|
34850 |
+
{
|
34851 |
+
"epoch": 0.96,
|
34852 |
+
"grad_norm": 1.6263184836466396,
|
34853 |
+
"learning_rate": 8.977427113795678e-08,
|
34854 |
+
"loss": 0.9195,
|
34855 |
+
"step": 4978
|
34856 |
+
},
|
34857 |
+
{
|
34858 |
+
"epoch": 0.96,
|
34859 |
+
"grad_norm": 1.6023902021963157,
|
34860 |
+
"learning_rate": 8.894234103132394e-08,
|
34861 |
+
"loss": 0.8682,
|
34862 |
+
"step": 4979
|
34863 |
+
},
|
34864 |
+
{
|
34865 |
+
"epoch": 0.96,
|
34866 |
+
"grad_norm": 1.6563911158006064,
|
34867 |
+
"learning_rate": 8.811426638141163e-08,
|
34868 |
+
"loss": 0.8095,
|
34869 |
+
"step": 4980
|
34870 |
+
},
|
34871 |
+
{
|
34872 |
+
"epoch": 0.96,
|
34873 |
+
"grad_norm": 1.7609044106395593,
|
34874 |
+
"learning_rate": 8.729004751034443e-08,
|
34875 |
+
"loss": 0.8567,
|
34876 |
+
"step": 4981
|
34877 |
+
},
|
34878 |
+
{
|
34879 |
+
"epoch": 0.96,
|
34880 |
+
"grad_norm": 1.4790167691488278,
|
34881 |
+
"learning_rate": 8.646968473874917e-08,
|
34882 |
+
"loss": 0.8171,
|
34883 |
+
"step": 4982
|
34884 |
+
},
|
34885 |
+
{
|
34886 |
+
"epoch": 0.96,
|
34887 |
+
"grad_norm": 1.6269543335652379,
|
34888 |
+
"learning_rate": 8.565317838575171e-08,
|
34889 |
+
"loss": 0.8258,
|
34890 |
+
"step": 4983
|
34891 |
+
},
|
34892 |
+
{
|
34893 |
+
"epoch": 0.96,
|
34894 |
+
"grad_norm": 1.5583935374943,
|
34895 |
+
"learning_rate": 8.484052876897797e-08,
|
34896 |
+
"loss": 0.8265,
|
34897 |
+
"step": 4984
|
34898 |
+
},
|
34899 |
+
{
|
34900 |
+
"epoch": 0.96,
|
34901 |
+
"grad_norm": 1.4943474875070648,
|
34902 |
+
"learning_rate": 8.403173620455174e-08,
|
34903 |
+
"loss": 0.7888,
|
34904 |
+
"step": 4985
|
34905 |
+
},
|
34906 |
+
{
|
34907 |
+
"epoch": 0.96,
|
34908 |
+
"grad_norm": 1.594762702711398,
|
34909 |
+
"learning_rate": 8.322680100710023e-08,
|
34910 |
+
"loss": 0.8098,
|
34911 |
+
"step": 4986
|
34912 |
+
},
|
34913 |
+
{
|
34914 |
+
"epoch": 0.96,
|
34915 |
+
"grad_norm": 1.6022569198318086,
|
34916 |
+
"learning_rate": 8.24257234897452e-08,
|
34917 |
+
"loss": 0.7609,
|
34918 |
+
"step": 4987
|
34919 |
+
},
|
34920 |
+
{
|
34921 |
+
"epoch": 0.96,
|
34922 |
+
"grad_norm": 1.6467693510359207,
|
34923 |
+
"learning_rate": 8.162850396411182e-08,
|
34924 |
+
"loss": 0.8345,
|
34925 |
+
"step": 4988
|
34926 |
+
},
|
34927 |
+
{
|
34928 |
+
"epoch": 0.96,
|
34929 |
+
"grad_norm": 1.751101015490329,
|
34930 |
+
"learning_rate": 8.08351427403209e-08,
|
34931 |
+
"loss": 0.8488,
|
34932 |
+
"step": 4989
|
34933 |
+
},
|
34934 |
+
{
|
34935 |
+
"epoch": 0.96,
|
34936 |
+
"grad_norm": 1.5795700884294022,
|
34937 |
+
"learning_rate": 8.004564012699667e-08,
|
34938 |
+
"loss": 0.8266,
|
34939 |
+
"step": 4990
|
34940 |
+
},
|
34941 |
+
{
|
34942 |
+
"epoch": 0.96,
|
34943 |
+
"grad_norm": 1.6452428063118647,
|
34944 |
+
"learning_rate": 7.925999643125904e-08,
|
34945 |
+
"loss": 0.7832,
|
34946 |
+
"step": 4991
|
34947 |
+
},
|
34948 |
+
{
|
34949 |
+
"epoch": 0.96,
|
34950 |
+
"grad_norm": 1.6461661712590487,
|
34951 |
+
"learning_rate": 7.847821195872796e-08,
|
34952 |
+
"loss": 0.9808,
|
34953 |
+
"step": 4992
|
34954 |
+
},
|
34955 |
+
{
|
34956 |
+
"epoch": 0.96,
|
34957 |
+
"grad_norm": 1.5657255170988507,
|
34958 |
+
"learning_rate": 7.770028701352239e-08,
|
34959 |
+
"loss": 0.8519,
|
34960 |
+
"step": 4993
|
34961 |
+
},
|
34962 |
+
{
|
34963 |
+
"epoch": 0.96,
|
34964 |
+
"grad_norm": 1.6852755964063066,
|
34965 |
+
"learning_rate": 7.692622189825916e-08,
|
34966 |
+
"loss": 0.8425,
|
34967 |
+
"step": 4994
|
34968 |
+
},
|
34969 |
+
{
|
34970 |
+
"epoch": 0.96,
|
34971 |
+
"grad_norm": 1.9054468178480464,
|
34972 |
+
"learning_rate": 7.615601691405405e-08,
|
34973 |
+
"loss": 0.8855,
|
34974 |
+
"step": 4995
|
34975 |
+
},
|
34976 |
+
{
|
34977 |
+
"epoch": 0.96,
|
34978 |
+
"grad_norm": 1.4668506651688153,
|
34979 |
+
"learning_rate": 7.538967236051963e-08,
|
34980 |
+
"loss": 0.8569,
|
34981 |
+
"step": 4996
|
34982 |
+
},
|
34983 |
+
{
|
34984 |
+
"epoch": 0.96,
|
34985 |
+
"grad_norm": 1.7272847342044952,
|
34986 |
+
"learning_rate": 7.462718853576966e-08,
|
34987 |
+
"loss": 0.8052,
|
34988 |
+
"step": 4997
|
34989 |
+
},
|
34990 |
+
{
|
34991 |
+
"epoch": 0.96,
|
34992 |
+
"grad_norm": 1.7114468698535032,
|
34993 |
+
"learning_rate": 7.386856573641576e-08,
|
34994 |
+
"loss": 0.7993,
|
34995 |
+
"step": 4998
|
34996 |
+
},
|
34997 |
+
{
|
34998 |
+
"epoch": 0.96,
|
34999 |
+
"grad_norm": 1.573996192998908,
|
35000 |
+
"learning_rate": 7.311380425756298e-08,
|
35001 |
+
"loss": 0.8533,
|
35002 |
+
"step": 4999
|
35003 |
+
},
|
35004 |
+
{
|
35005 |
+
"epoch": 0.96,
|
35006 |
+
"grad_norm": 1.6580048196864743,
|
35007 |
+
"learning_rate": 7.236290439282089e-08,
|
35008 |
+
"loss": 0.8353,
|
35009 |
+
"step": 5000
|
35010 |
}
|
35011 |
],
|
35012 |
"logging_steps": 1.0,
|
|
|
35014 |
"num_input_tokens_seen": 0,
|
35015 |
"num_train_epochs": 1,
|
35016 |
"save_steps": 500,
|
35017 |
+
"total_flos": 6750470532169728.0,
|
35018 |
"train_batch_size": 4,
|
35019 |
"trial_name": null,
|
35020 |
"trial_params": null
|