Training in progress, step 1500, checkpoint
Browse files- last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2300 -2
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6722a97b0b4e8ac164766b954c4f4bd20c3f22259dcc99abdd2a54bed1e54ebc
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ddc6b7a9bb4fcdb46cdb1830b37ea67dc4cc7e6c343ef4865cc27ca390beafb
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:478d375f061004cbf281130b047929a505f779d3bf30ae5917214ec805ad6ac7
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9fe32d49d4b0104f2453c1bd44d33e45a7f8786d8351e2cc57ab1ef97aceace
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8c9c9a7d2fcfa2e8fa40312006739b2c850b9a5ecfacd34f5a60173282093
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:259ffd7944316a19d3cd4a130f207a063579c77fad5d447554aeff12156085d6
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d29c1fe957781c3a62402feef79dfb38cf56b8fddb352092f6ffd32c6211e320
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:921a755d22277c2d84c9363b6dc0c6e459c1e6fbaddb89bc814ee9c5db4f54fb
|
3 |
+
size 150693
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1500
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec16c0b98fcebf6052a9aa927090800759b4fb6700367ad2c29354ecbf45f9f7
|
3 |
size 4976698672
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd85acac0b1cf3d6b603028d0abef6bbae49730ebe45add6807617156b350d1c
|
3 |
size 4999802720
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2df1f85a263fda844c3a5170fff9df97853e8127b0a5eddf0ad7744a2325916c
|
3 |
size 4915916176
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e480518cebd078a58ebf6e0cea1f57aa4919ad9372aba8cc8a04682ef0e504f
|
3 |
size 1168138808
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4f003069486a57c6ac033f30cf4c4213eb6b7d659bab68a5a50fdb8da7c4118
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a016ef89b4392d083b2c15a7cf06a39bc61a759f648cf6dc03f1c32b89a526aa
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b56fe0893036dc052d18d90feba4328b90ea71561942150b07406ac3d7a700e
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0c203d12c2c308dab785ed672c9ca27fb6a2f72acd1e1552d1516c7b0006013
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ace6290c890a8d1e173a6da04a3c0a74aa055e1dc2c0b019def7feb7e061c29
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9199,6 +9199,2304 @@
|
|
9199 |
"eval_samples_per_second": 4.941,
|
9200 |
"eval_steps_per_second": 1.235,
|
9201 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9202 |
}
|
9203 |
],
|
9204 |
"logging_steps": 2,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.710498182595681,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9199 |
"eval_samples_per_second": 4.941,
|
9200 |
"eval_steps_per_second": 1.235,
|
9201 |
"step": 1200
|
9202 |
+
},
|
9203 |
+
{
|
9204 |
+
"epoch": 1.3706792103200058,
|
9205 |
+
"grad_norm": 64.03519542961854,
|
9206 |
+
"learning_rate": 4.98902460615475e-08,
|
9207 |
+
"logits/chosen": -1.3389551639556885,
|
9208 |
+
"logits/rejected": -1.3822637796401978,
|
9209 |
+
"logps/chosen": -175.14820861816406,
|
9210 |
+
"logps/rejected": -222.5068359375,
|
9211 |
+
"loss": 0.4712,
|
9212 |
+
"rewards/accuracies": 0.75,
|
9213 |
+
"rewards/chosen": -0.35669347643852234,
|
9214 |
+
"rewards/margins": 1.1921041011810303,
|
9215 |
+
"rewards/rejected": -1.548797607421875,
|
9216 |
+
"step": 1202
|
9217 |
+
},
|
9218 |
+
{
|
9219 |
+
"epoch": 1.3729598745634666,
|
9220 |
+
"grad_norm": 51.71910925101713,
|
9221 |
+
"learning_rate": 4.956146819977166e-08,
|
9222 |
+
"logits/chosen": -1.253232717514038,
|
9223 |
+
"logits/rejected": -1.2863636016845703,
|
9224 |
+
"logps/chosen": -176.71702575683594,
|
9225 |
+
"logps/rejected": -210.63230895996094,
|
9226 |
+
"loss": 0.4178,
|
9227 |
+
"rewards/accuracies": 0.84375,
|
9228 |
+
"rewards/chosen": -0.32385319471359253,
|
9229 |
+
"rewards/margins": 1.246777057647705,
|
9230 |
+
"rewards/rejected": -1.570630431175232,
|
9231 |
+
"step": 1204
|
9232 |
+
},
|
9233 |
+
{
|
9234 |
+
"epoch": 1.3752405388069275,
|
9235 |
+
"grad_norm": 56.79692940311852,
|
9236 |
+
"learning_rate": 4.923341996611603e-08,
|
9237 |
+
"logits/chosen": -1.1557482481002808,
|
9238 |
+
"logits/rejected": -1.180600881576538,
|
9239 |
+
"logps/chosen": -150.3299102783203,
|
9240 |
+
"logps/rejected": -167.61911010742188,
|
9241 |
+
"loss": 0.4255,
|
9242 |
+
"rewards/accuracies": 0.6875,
|
9243 |
+
"rewards/chosen": -0.3654107451438904,
|
9244 |
+
"rewards/margins": 0.7367621064186096,
|
9245 |
+
"rewards/rejected": -1.1021727323532104,
|
9246 |
+
"step": 1206
|
9247 |
+
},
|
9248 |
+
{
|
9249 |
+
"epoch": 1.3775212030503885,
|
9250 |
+
"grad_norm": 57.06474376403373,
|
9251 |
+
"learning_rate": 4.890610610602437e-08,
|
9252 |
+
"logits/chosen": -1.297890067100525,
|
9253 |
+
"logits/rejected": -1.347840666770935,
|
9254 |
+
"logps/chosen": -209.23716735839844,
|
9255 |
+
"logps/rejected": -259.1905517578125,
|
9256 |
+
"loss": 0.3961,
|
9257 |
+
"rewards/accuracies": 0.875,
|
9258 |
+
"rewards/chosen": -0.45185887813568115,
|
9259 |
+
"rewards/margins": 1.451210856437683,
|
9260 |
+
"rewards/rejected": -1.9030694961547852,
|
9261 |
+
"step": 1208
|
9262 |
+
},
|
9263 |
+
{
|
9264 |
+
"epoch": 1.3798018672938492,
|
9265 |
+
"grad_norm": 60.29263036985363,
|
9266 |
+
"learning_rate": 4.8579531354317225e-08,
|
9267 |
+
"logits/chosen": -1.397212266921997,
|
9268 |
+
"logits/rejected": -1.3925597667694092,
|
9269 |
+
"logps/chosen": -183.3704833984375,
|
9270 |
+
"logps/rejected": -195.10496520996094,
|
9271 |
+
"loss": 0.4579,
|
9272 |
+
"rewards/accuracies": 0.75,
|
9273 |
+
"rewards/chosen": -0.7833220362663269,
|
9274 |
+
"rewards/margins": 0.7912861108779907,
|
9275 |
+
"rewards/rejected": -1.574608325958252,
|
9276 |
+
"step": 1210
|
9277 |
+
},
|
9278 |
+
{
|
9279 |
+
"epoch": 1.3820825315373102,
|
9280 |
+
"grad_norm": 52.32294118146283,
|
9281 |
+
"learning_rate": 4.825370043512339e-08,
|
9282 |
+
"logits/chosen": -1.3067998886108398,
|
9283 |
+
"logits/rejected": -1.3849916458129883,
|
9284 |
+
"logps/chosen": -162.16232299804688,
|
9285 |
+
"logps/rejected": -195.49609375,
|
9286 |
+
"loss": 0.4108,
|
9287 |
+
"rewards/accuracies": 0.875,
|
9288 |
+
"rewards/chosen": -0.33422791957855225,
|
9289 |
+
"rewards/margins": 1.0588434934616089,
|
9290 |
+
"rewards/rejected": -1.3930714130401611,
|
9291 |
+
"step": 1212
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 1.3843631957807712,
|
9295 |
+
"grad_norm": 59.66742993964118,
|
9296 |
+
"learning_rate": 4.792861806181171e-08,
|
9297 |
+
"logits/chosen": -1.30001962184906,
|
9298 |
+
"logits/rejected": -1.386979341506958,
|
9299 |
+
"logps/chosen": -170.89529418945312,
|
9300 |
+
"logps/rejected": -197.6066131591797,
|
9301 |
+
"loss": 0.4525,
|
9302 |
+
"rewards/accuracies": 0.8125,
|
9303 |
+
"rewards/chosen": -0.2925351560115814,
|
9304 |
+
"rewards/margins": 0.9441651105880737,
|
9305 |
+
"rewards/rejected": -1.2367002964019775,
|
9306 |
+
"step": 1214
|
9307 |
+
},
|
9308 |
+
{
|
9309 |
+
"epoch": 1.3866438600242321,
|
9310 |
+
"grad_norm": 65.32724813467337,
|
9311 |
+
"learning_rate": 4.760428893692273e-08,
|
9312 |
+
"logits/chosen": -1.2875810861587524,
|
9313 |
+
"logits/rejected": -1.3601633310317993,
|
9314 |
+
"logps/chosen": -160.88156127929688,
|
9315 |
+
"logps/rejected": -190.6903076171875,
|
9316 |
+
"loss": 0.4288,
|
9317 |
+
"rewards/accuracies": 0.71875,
|
9318 |
+
"rewards/chosen": -0.6599161624908447,
|
9319 |
+
"rewards/margins": 0.7817404270172119,
|
9320 |
+
"rewards/rejected": -1.4416565895080566,
|
9321 |
+
"step": 1216
|
9322 |
+
},
|
9323 |
+
{
|
9324 |
+
"epoch": 1.388924524267693,
|
9325 |
+
"grad_norm": 52.68799788674585,
|
9326 |
+
"learning_rate": 4.728071775210069e-08,
|
9327 |
+
"logits/chosen": -1.3100471496582031,
|
9328 |
+
"logits/rejected": -1.295668125152588,
|
9329 |
+
"logps/chosen": -157.36289978027344,
|
9330 |
+
"logps/rejected": -179.96127319335938,
|
9331 |
+
"loss": 0.4265,
|
9332 |
+
"rewards/accuracies": 0.75,
|
9333 |
+
"rewards/chosen": -0.36468327045440674,
|
9334 |
+
"rewards/margins": 0.9460710883140564,
|
9335 |
+
"rewards/rejected": -1.3107542991638184,
|
9336 |
+
"step": 1218
|
9337 |
+
},
|
9338 |
+
{
|
9339 |
+
"epoch": 1.3912051885111538,
|
9340 |
+
"grad_norm": 59.812629327754166,
|
9341 |
+
"learning_rate": 4.695790918802576e-08,
|
9342 |
+
"logits/chosen": -1.4171504974365234,
|
9343 |
+
"logits/rejected": -1.3959426879882812,
|
9344 |
+
"logps/chosen": -226.54220581054688,
|
9345 |
+
"logps/rejected": -242.77142333984375,
|
9346 |
+
"loss": 0.4516,
|
9347 |
+
"rewards/accuracies": 0.71875,
|
9348 |
+
"rewards/chosen": -0.6130508184432983,
|
9349 |
+
"rewards/margins": 1.1557624340057373,
|
9350 |
+
"rewards/rejected": -1.768813133239746,
|
9351 |
+
"step": 1220
|
9352 |
+
},
|
9353 |
+
{
|
9354 |
+
"epoch": 1.3934858527546148,
|
9355 |
+
"grad_norm": 66.91529385196273,
|
9356 |
+
"learning_rate": 4.663586791434628e-08,
|
9357 |
+
"logits/chosen": -1.1362406015396118,
|
9358 |
+
"logits/rejected": -1.2372556924819946,
|
9359 |
+
"logps/chosen": -182.88442993164062,
|
9360 |
+
"logps/rejected": -217.1130828857422,
|
9361 |
+
"loss": 0.4164,
|
9362 |
+
"rewards/accuracies": 0.78125,
|
9363 |
+
"rewards/chosen": -0.5246292352676392,
|
9364 |
+
"rewards/margins": 1.0273668766021729,
|
9365 |
+
"rewards/rejected": -1.551996111869812,
|
9366 |
+
"step": 1222
|
9367 |
+
},
|
9368 |
+
{
|
9369 |
+
"epoch": 1.3957665169980757,
|
9370 |
+
"grad_norm": 57.29717940934624,
|
9371 |
+
"learning_rate": 4.631459858961122e-08,
|
9372 |
+
"logits/chosen": -1.219807744026184,
|
9373 |
+
"logits/rejected": -1.2460401058197021,
|
9374 |
+
"logps/chosen": -153.93907165527344,
|
9375 |
+
"logps/rejected": -200.4837188720703,
|
9376 |
+
"loss": 0.4358,
|
9377 |
+
"rewards/accuracies": 0.84375,
|
9378 |
+
"rewards/chosen": -0.2589726746082306,
|
9379 |
+
"rewards/margins": 1.4164376258850098,
|
9380 |
+
"rewards/rejected": -1.675410509109497,
|
9381 |
+
"step": 1224
|
9382 |
+
},
|
9383 |
+
{
|
9384 |
+
"epoch": 1.3980471812415365,
|
9385 |
+
"grad_norm": 61.08923102786796,
|
9386 |
+
"learning_rate": 4.5994105861202715e-08,
|
9387 |
+
"logits/chosen": -1.3403871059417725,
|
9388 |
+
"logits/rejected": -1.3382725715637207,
|
9389 |
+
"logps/chosen": -147.61839294433594,
|
9390 |
+
"logps/rejected": -150.13453674316406,
|
9391 |
+
"loss": 0.4522,
|
9392 |
+
"rewards/accuracies": 0.875,
|
9393 |
+
"rewards/chosen": -0.403720498085022,
|
9394 |
+
"rewards/margins": 0.7644599080085754,
|
9395 |
+
"rewards/rejected": -1.1681804656982422,
|
9396 |
+
"step": 1226
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 1.4003278454849974,
|
9400 |
+
"grad_norm": 55.589268418781224,
|
9401 |
+
"learning_rate": 4.5674394365268965e-08,
|
9402 |
+
"logits/chosen": -1.1604863405227661,
|
9403 |
+
"logits/rejected": -1.2054895162582397,
|
9404 |
+
"logps/chosen": -169.14215087890625,
|
9405 |
+
"logps/rejected": -203.62513732910156,
|
9406 |
+
"loss": 0.4333,
|
9407 |
+
"rewards/accuracies": 0.78125,
|
9408 |
+
"rewards/chosen": -0.32466423511505127,
|
9409 |
+
"rewards/margins": 1.318765640258789,
|
9410 |
+
"rewards/rejected": -1.6434298753738403,
|
9411 |
+
"step": 1228
|
9412 |
+
},
|
9413 |
+
{
|
9414 |
+
"epoch": 1.4026085097284584,
|
9415 |
+
"grad_norm": 50.39174380587635,
|
9416 |
+
"learning_rate": 4.535546872665707e-08,
|
9417 |
+
"logits/chosen": -1.2403908967971802,
|
9418 |
+
"logits/rejected": -1.3284348249435425,
|
9419 |
+
"logps/chosen": -154.95285034179688,
|
9420 |
+
"logps/rejected": -183.48558044433594,
|
9421 |
+
"loss": 0.5428,
|
9422 |
+
"rewards/accuracies": 0.75,
|
9423 |
+
"rewards/chosen": -0.2970479428768158,
|
9424 |
+
"rewards/margins": 0.813206672668457,
|
9425 |
+
"rewards/rejected": -1.1102546453475952,
|
9426 |
+
"step": 1230
|
9427 |
+
},
|
9428 |
+
{
|
9429 |
+
"epoch": 1.4048891739719194,
|
9430 |
+
"grad_norm": 64.62636544867223,
|
9431 |
+
"learning_rate": 4.5037333558846145e-08,
|
9432 |
+
"logits/chosen": -1.1855900287628174,
|
9433 |
+
"logits/rejected": -1.1975244283676147,
|
9434 |
+
"logps/chosen": -102.4107437133789,
|
9435 |
+
"logps/rejected": -124.4788589477539,
|
9436 |
+
"loss": 0.4184,
|
9437 |
+
"rewards/accuracies": 0.84375,
|
9438 |
+
"rewards/chosen": -0.20104435086250305,
|
9439 |
+
"rewards/margins": 0.7573148608207703,
|
9440 |
+
"rewards/rejected": -0.9583592414855957,
|
9441 |
+
"step": 1232
|
9442 |
+
},
|
9443 |
+
{
|
9444 |
+
"epoch": 1.4071698382153803,
|
9445 |
+
"grad_norm": 65.48395677041327,
|
9446 |
+
"learning_rate": 4.471999346388069e-08,
|
9447 |
+
"logits/chosen": -1.2656984329223633,
|
9448 |
+
"logits/rejected": -1.2520796060562134,
|
9449 |
+
"logps/chosen": -145.40838623046875,
|
9450 |
+
"logps/rejected": -172.25161743164062,
|
9451 |
+
"loss": 0.4563,
|
9452 |
+
"rewards/accuracies": 0.875,
|
9453 |
+
"rewards/chosen": -0.48840075731277466,
|
9454 |
+
"rewards/margins": 0.857879102230072,
|
9455 |
+
"rewards/rejected": -1.3462798595428467,
|
9456 |
+
"step": 1234
|
9457 |
+
},
|
9458 |
+
{
|
9459 |
+
"epoch": 1.409450502458841,
|
9460 |
+
"grad_norm": 53.15387042970869,
|
9461 |
+
"learning_rate": 4.4403453032303765e-08,
|
9462 |
+
"logits/chosen": -1.393466591835022,
|
9463 |
+
"logits/rejected": -1.4557361602783203,
|
9464 |
+
"logps/chosen": -160.5977020263672,
|
9465 |
+
"logps/rejected": -189.77520751953125,
|
9466 |
+
"loss": 0.4181,
|
9467 |
+
"rewards/accuracies": 0.90625,
|
9468 |
+
"rewards/chosen": -0.379285603761673,
|
9469 |
+
"rewards/margins": 1.075732946395874,
|
9470 |
+
"rewards/rejected": -1.4550185203552246,
|
9471 |
+
"step": 1236
|
9472 |
+
},
|
9473 |
+
{
|
9474 |
+
"epoch": 1.411731166702302,
|
9475 |
+
"grad_norm": 63.15445502776712,
|
9476 |
+
"learning_rate": 4.4087716843090895e-08,
|
9477 |
+
"logits/chosen": -1.3475301265716553,
|
9478 |
+
"logits/rejected": -1.3905658721923828,
|
9479 |
+
"logps/chosen": -166.34042358398438,
|
9480 |
+
"logps/rejected": -213.61553955078125,
|
9481 |
+
"loss": 0.461,
|
9482 |
+
"rewards/accuracies": 0.75,
|
9483 |
+
"rewards/chosen": -0.3821715712547302,
|
9484 |
+
"rewards/margins": 0.9115235805511475,
|
9485 |
+
"rewards/rejected": -1.2936952114105225,
|
9486 |
+
"step": 1238
|
9487 |
+
},
|
9488 |
+
{
|
9489 |
+
"epoch": 1.414011830945763,
|
9490 |
+
"grad_norm": 52.37085158764497,
|
9491 |
+
"learning_rate": 4.3772789463583627e-08,
|
9492 |
+
"logits/chosen": -1.3224272727966309,
|
9493 |
+
"logits/rejected": -1.3503855466842651,
|
9494 |
+
"logps/chosen": -172.18899536132812,
|
9495 |
+
"logps/rejected": -193.6123504638672,
|
9496 |
+
"loss": 0.4386,
|
9497 |
+
"rewards/accuracies": 0.78125,
|
9498 |
+
"rewards/chosen": -0.44380372762680054,
|
9499 |
+
"rewards/margins": 0.857738733291626,
|
9500 |
+
"rewards/rejected": -1.3015424013137817,
|
9501 |
+
"step": 1240
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 1.4162924951892237,
|
9505 |
+
"grad_norm": 64.55355325183278,
|
9506 |
+
"learning_rate": 4.345867544942353e-08,
|
9507 |
+
"logits/chosen": -1.2631657123565674,
|
9508 |
+
"logits/rejected": -1.3294970989227295,
|
9509 |
+
"logps/chosen": -181.64718627929688,
|
9510 |
+
"logps/rejected": -230.25701904296875,
|
9511 |
+
"loss": 0.4121,
|
9512 |
+
"rewards/accuracies": 0.71875,
|
9513 |
+
"rewards/chosen": -0.5138639211654663,
|
9514 |
+
"rewards/margins": 0.8278074264526367,
|
9515 |
+
"rewards/rejected": -1.341671347618103,
|
9516 |
+
"step": 1242
|
9517 |
+
},
|
9518 |
+
{
|
9519 |
+
"epoch": 1.4185731594326847,
|
9520 |
+
"grad_norm": 65.60045840124785,
|
9521 |
+
"learning_rate": 4.314537934448628e-08,
|
9522 |
+
"logits/chosen": -1.2260847091674805,
|
9523 |
+
"logits/rejected": -1.2379428148269653,
|
9524 |
+
"logps/chosen": -175.8683624267578,
|
9525 |
+
"logps/rejected": -202.67051696777344,
|
9526 |
+
"loss": 0.4525,
|
9527 |
+
"rewards/accuracies": 0.875,
|
9528 |
+
"rewards/chosen": -0.4962596893310547,
|
9529 |
+
"rewards/margins": 1.1486889123916626,
|
9530 |
+
"rewards/rejected": -1.6449487209320068,
|
9531 |
+
"step": 1244
|
9532 |
+
},
|
9533 |
+
{
|
9534 |
+
"epoch": 1.4208538236761457,
|
9535 |
+
"grad_norm": 77.13313632492196,
|
9536 |
+
"learning_rate": 4.283290568081591e-08,
|
9537 |
+
"logits/chosen": -1.1557011604309082,
|
9538 |
+
"logits/rejected": -1.2198400497436523,
|
9539 |
+
"logps/chosen": -171.91201782226562,
|
9540 |
+
"logps/rejected": -200.71144104003906,
|
9541 |
+
"loss": 0.4431,
|
9542 |
+
"rewards/accuracies": 0.84375,
|
9543 |
+
"rewards/chosen": -0.4828857481479645,
|
9544 |
+
"rewards/margins": 0.8654166460037231,
|
9545 |
+
"rewards/rejected": -1.3483023643493652,
|
9546 |
+
"step": 1246
|
9547 |
+
},
|
9548 |
+
{
|
9549 |
+
"epoch": 1.4231344879196066,
|
9550 |
+
"grad_norm": 55.00661044224625,
|
9551 |
+
"learning_rate": 4.2521258978559314e-08,
|
9552 |
+
"logits/chosen": -1.258105993270874,
|
9553 |
+
"logits/rejected": -1.282645583152771,
|
9554 |
+
"logps/chosen": -180.73272705078125,
|
9555 |
+
"logps/rejected": -241.80764770507812,
|
9556 |
+
"loss": 0.4077,
|
9557 |
+
"rewards/accuracies": 0.875,
|
9558 |
+
"rewards/chosen": -0.5227749347686768,
|
9559 |
+
"rewards/margins": 1.5435858964920044,
|
9560 |
+
"rewards/rejected": -2.0663607120513916,
|
9561 |
+
"step": 1248
|
9562 |
+
},
|
9563 |
+
{
|
9564 |
+
"epoch": 1.4254151521630676,
|
9565 |
+
"grad_norm": 53.935655347865456,
|
9566 |
+
"learning_rate": 4.2210443745900804e-08,
|
9567 |
+
"logits/chosen": -1.1817071437835693,
|
9568 |
+
"logits/rejected": -1.247178077697754,
|
9569 |
+
"logps/chosen": -151.1819610595703,
|
9570 |
+
"logps/rejected": -170.9989776611328,
|
9571 |
+
"loss": 0.4273,
|
9572 |
+
"rewards/accuracies": 0.8125,
|
9573 |
+
"rewards/chosen": -0.3710220456123352,
|
9574 |
+
"rewards/margins": 0.9180817604064941,
|
9575 |
+
"rewards/rejected": -1.2891038656234741,
|
9576 |
+
"step": 1250
|
9577 |
+
},
|
9578 |
+
{
|
9579 |
+
"epoch": 1.4276958164065285,
|
9580 |
+
"grad_norm": 57.828569007499375,
|
9581 |
+
"learning_rate": 4.190046447899689e-08,
|
9582 |
+
"logits/chosen": -1.2078405618667603,
|
9583 |
+
"logits/rejected": -1.2860413789749146,
|
9584 |
+
"logps/chosen": -141.80389404296875,
|
9585 |
+
"logps/rejected": -168.6719512939453,
|
9586 |
+
"loss": 0.4219,
|
9587 |
+
"rewards/accuracies": 0.75,
|
9588 |
+
"rewards/chosen": -0.4949069023132324,
|
9589 |
+
"rewards/margins": 0.8605579137802124,
|
9590 |
+
"rewards/rejected": -1.3554648160934448,
|
9591 |
+
"step": 1252
|
9592 |
+
},
|
9593 |
+
{
|
9594 |
+
"epoch": 1.4299764806499893,
|
9595 |
+
"grad_norm": 49.820693081313166,
|
9596 |
+
"learning_rate": 4.159132566191129e-08,
|
9597 |
+
"logits/chosen": -1.3393913507461548,
|
9598 |
+
"logits/rejected": -1.393333911895752,
|
9599 |
+
"logps/chosen": -139.9639434814453,
|
9600 |
+
"logps/rejected": -185.2383575439453,
|
9601 |
+
"loss": 0.401,
|
9602 |
+
"rewards/accuracies": 0.78125,
|
9603 |
+
"rewards/chosen": -0.3605578541755676,
|
9604 |
+
"rewards/margins": 1.1692156791687012,
|
9605 |
+
"rewards/rejected": -1.5297735929489136,
|
9606 |
+
"step": 1254
|
9607 |
+
},
|
9608 |
+
{
|
9609 |
+
"epoch": 1.4322571448934502,
|
9610 |
+
"grad_norm": 56.66007237622535,
|
9611 |
+
"learning_rate": 4.1283031766550014e-08,
|
9612 |
+
"logits/chosen": -1.159570336341858,
|
9613 |
+
"logits/rejected": -1.2179524898529053,
|
9614 |
+
"logps/chosen": -167.4163818359375,
|
9615 |
+
"logps/rejected": -245.31614685058594,
|
9616 |
+
"loss": 0.4344,
|
9617 |
+
"rewards/accuracies": 0.75,
|
9618 |
+
"rewards/chosen": -0.5021023750305176,
|
9619 |
+
"rewards/margins": 1.504926085472107,
|
9620 |
+
"rewards/rejected": -2.007028341293335,
|
9621 |
+
"step": 1256
|
9622 |
+
},
|
9623 |
+
{
|
9624 |
+
"epoch": 1.4345378091369112,
|
9625 |
+
"grad_norm": 70.62489205877687,
|
9626 |
+
"learning_rate": 4.097558725259672e-08,
|
9627 |
+
"logits/chosen": -1.33053457736969,
|
9628 |
+
"logits/rejected": -1.388944149017334,
|
9629 |
+
"logps/chosen": -178.24188232421875,
|
9630 |
+
"logps/rejected": -234.7947998046875,
|
9631 |
+
"loss": 0.3998,
|
9632 |
+
"rewards/accuracies": 0.78125,
|
9633 |
+
"rewards/chosen": -0.40949881076812744,
|
9634 |
+
"rewards/margins": 1.050101399421692,
|
9635 |
+
"rewards/rejected": -1.4596002101898193,
|
9636 |
+
"step": 1258
|
9637 |
+
},
|
9638 |
+
{
|
9639 |
+
"epoch": 1.436818473380372,
|
9640 |
+
"grad_norm": 66.98990507182315,
|
9641 |
+
"learning_rate": 4.0668996567448154e-08,
|
9642 |
+
"logits/chosen": -1.414581060409546,
|
9643 |
+
"logits/rejected": -1.4014796018600464,
|
9644 |
+
"logps/chosen": -169.44393920898438,
|
9645 |
+
"logps/rejected": -187.62033081054688,
|
9646 |
+
"loss": 0.4344,
|
9647 |
+
"rewards/accuracies": 0.65625,
|
9648 |
+
"rewards/chosen": -0.3710756003856659,
|
9649 |
+
"rewards/margins": 0.5481195449829102,
|
9650 |
+
"rewards/rejected": -0.9191950559616089,
|
9651 |
+
"step": 1260
|
9652 |
+
},
|
9653 |
+
{
|
9654 |
+
"epoch": 1.439099137623833,
|
9655 |
+
"grad_norm": 57.85916556533107,
|
9656 |
+
"learning_rate": 4.0363264146149844e-08,
|
9657 |
+
"logits/chosen": -1.191425085067749,
|
9658 |
+
"logits/rejected": -1.2502682209014893,
|
9659 |
+
"logps/chosen": -190.24957275390625,
|
9660 |
+
"logps/rejected": -221.98013305664062,
|
9661 |
+
"loss": 0.4816,
|
9662 |
+
"rewards/accuracies": 0.875,
|
9663 |
+
"rewards/chosen": -0.5489203333854675,
|
9664 |
+
"rewards/margins": 1.0626386404037476,
|
9665 |
+
"rewards/rejected": -1.6115591526031494,
|
9666 |
+
"step": 1262
|
9667 |
+
},
|
9668 |
+
{
|
9669 |
+
"epoch": 1.4413798018672939,
|
9670 |
+
"grad_norm": 54.90589553967945,
|
9671 |
+
"learning_rate": 4.005839441133198e-08,
|
9672 |
+
"logits/chosen": -1.243033766746521,
|
9673 |
+
"logits/rejected": -1.3693134784698486,
|
9674 |
+
"logps/chosen": -153.51661682128906,
|
9675 |
+
"logps/rejected": -217.17115783691406,
|
9676 |
+
"loss": 0.4357,
|
9677 |
+
"rewards/accuracies": 0.75,
|
9678 |
+
"rewards/chosen": -0.41171663999557495,
|
9679 |
+
"rewards/margins": 1.114880084991455,
|
9680 |
+
"rewards/rejected": -1.5265967845916748,
|
9681 |
+
"step": 1264
|
9682 |
+
},
|
9683 |
+
{
|
9684 |
+
"epoch": 1.4436604661107548,
|
9685 |
+
"grad_norm": 62.949403049310924,
|
9686 |
+
"learning_rate": 3.9754391773145326e-08,
|
9687 |
+
"logits/chosen": -1.3396437168121338,
|
9688 |
+
"logits/rejected": -1.3314851522445679,
|
9689 |
+
"logps/chosen": -195.9013671875,
|
9690 |
+
"logps/rejected": -226.99224853515625,
|
9691 |
+
"loss": 0.4549,
|
9692 |
+
"rewards/accuracies": 0.78125,
|
9693 |
+
"rewards/chosen": -0.6362702250480652,
|
9694 |
+
"rewards/margins": 0.9720792174339294,
|
9695 |
+
"rewards/rejected": -1.6083494424819946,
|
9696 |
+
"step": 1266
|
9697 |
+
},
|
9698 |
+
{
|
9699 |
+
"epoch": 1.4459411303542158,
|
9700 |
+
"grad_norm": 109.10943345175065,
|
9701 |
+
"learning_rate": 3.945126062919756e-08,
|
9702 |
+
"logits/chosen": -1.4142718315124512,
|
9703 |
+
"logits/rejected": -1.3863128423690796,
|
9704 |
+
"logps/chosen": -232.01536560058594,
|
9705 |
+
"logps/rejected": -258.9195861816406,
|
9706 |
+
"loss": 0.4683,
|
9707 |
+
"rewards/accuracies": 0.84375,
|
9708 |
+
"rewards/chosen": -0.5452659130096436,
|
9709 |
+
"rewards/margins": 1.1544592380523682,
|
9710 |
+
"rewards/rejected": -1.6997252702713013,
|
9711 |
+
"step": 1268
|
9712 |
+
},
|
9713 |
+
{
|
9714 |
+
"epoch": 1.4482217945976765,
|
9715 |
+
"grad_norm": 66.47161188134956,
|
9716 |
+
"learning_rate": 3.914900536448959e-08,
|
9717 |
+
"logits/chosen": -1.277639389038086,
|
9718 |
+
"logits/rejected": -1.2443594932556152,
|
9719 |
+
"logps/chosen": -167.44473266601562,
|
9720 |
+
"logps/rejected": -201.05442810058594,
|
9721 |
+
"loss": 0.468,
|
9722 |
+
"rewards/accuracies": 0.75,
|
9723 |
+
"rewards/chosen": -0.40504151582717896,
|
9724 |
+
"rewards/margins": 1.4868779182434082,
|
9725 |
+
"rewards/rejected": -1.8919193744659424,
|
9726 |
+
"step": 1270
|
9727 |
+
},
|
9728 |
+
{
|
9729 |
+
"epoch": 1.4505024588411375,
|
9730 |
+
"grad_norm": 65.21617644771925,
|
9731 |
+
"learning_rate": 3.8847630351352045e-08,
|
9732 |
+
"logits/chosen": -1.2669049501419067,
|
9733 |
+
"logits/rejected": -1.2787154912948608,
|
9734 |
+
"logps/chosen": -244.93118286132812,
|
9735 |
+
"logps/rejected": -307.34344482421875,
|
9736 |
+
"loss": 0.4429,
|
9737 |
+
"rewards/accuracies": 0.78125,
|
9738 |
+
"rewards/chosen": -0.9488785862922668,
|
9739 |
+
"rewards/margins": 1.5935635566711426,
|
9740 |
+
"rewards/rejected": -2.5424418449401855,
|
9741 |
+
"step": 1272
|
9742 |
+
},
|
9743 |
+
{
|
9744 |
+
"epoch": 1.4527831230845984,
|
9745 |
+
"grad_norm": 67.80268214039134,
|
9746 |
+
"learning_rate": 3.854713994938221e-08,
|
9747 |
+
"logits/chosen": -1.3307723999023438,
|
9748 |
+
"logits/rejected": -1.400517463684082,
|
9749 |
+
"logps/chosen": -156.8104248046875,
|
9750 |
+
"logps/rejected": -182.17807006835938,
|
9751 |
+
"loss": 0.4631,
|
9752 |
+
"rewards/accuracies": 0.78125,
|
9753 |
+
"rewards/chosen": -0.2960550785064697,
|
9754 |
+
"rewards/margins": 0.9029641151428223,
|
9755 |
+
"rewards/rejected": -1.1990193128585815,
|
9756 |
+
"step": 1274
|
9757 |
+
},
|
9758 |
+
{
|
9759 |
+
"epoch": 1.4550637873280592,
|
9760 |
+
"grad_norm": 66.08251651441701,
|
9761 |
+
"learning_rate": 3.8247538505380816e-08,
|
9762 |
+
"logits/chosen": -1.3477903604507446,
|
9763 |
+
"logits/rejected": -1.4483450651168823,
|
9764 |
+
"logps/chosen": -194.99876403808594,
|
9765 |
+
"logps/rejected": -235.32986450195312,
|
9766 |
+
"loss": 0.4232,
|
9767 |
+
"rewards/accuracies": 0.78125,
|
9768 |
+
"rewards/chosen": -0.44531339406967163,
|
9769 |
+
"rewards/margins": 0.8939595222473145,
|
9770 |
+
"rewards/rejected": -1.3392727375030518,
|
9771 |
+
"step": 1276
|
9772 |
+
},
|
9773 |
+
{
|
9774 |
+
"epoch": 1.4573444515715201,
|
9775 |
+
"grad_norm": 64.19995573754183,
|
9776 |
+
"learning_rate": 3.794883035328921e-08,
|
9777 |
+
"logits/chosen": -1.2755396366119385,
|
9778 |
+
"logits/rejected": -1.3425655364990234,
|
9779 |
+
"logps/chosen": -186.4855499267578,
|
9780 |
+
"logps/rejected": -233.87648010253906,
|
9781 |
+
"loss": 0.4074,
|
9782 |
+
"rewards/accuracies": 0.84375,
|
9783 |
+
"rewards/chosen": -0.3420637249946594,
|
9784 |
+
"rewards/margins": 1.2022613286972046,
|
9785 |
+
"rewards/rejected": -1.5443251132965088,
|
9786 |
+
"step": 1278
|
9787 |
+
},
|
9788 |
+
{
|
9789 |
+
"epoch": 1.4596251158149811,
|
9790 |
+
"grad_norm": 51.505850800471386,
|
9791 |
+
"learning_rate": 3.765101981412665e-08,
|
9792 |
+
"logits/chosen": -1.0226508378982544,
|
9793 |
+
"logits/rejected": -1.1150188446044922,
|
9794 |
+
"logps/chosen": -165.54244995117188,
|
9795 |
+
"logps/rejected": -209.08641052246094,
|
9796 |
+
"loss": 0.4498,
|
9797 |
+
"rewards/accuracies": 0.78125,
|
9798 |
+
"rewards/chosen": -0.6313174962997437,
|
9799 |
+
"rewards/margins": 0.6761065721511841,
|
9800 |
+
"rewards/rejected": -1.3074240684509277,
|
9801 |
+
"step": 1280
|
9802 |
+
},
|
9803 |
+
{
|
9804 |
+
"epoch": 1.461905780058442,
|
9805 |
+
"grad_norm": 65.45552204323278,
|
9806 |
+
"learning_rate": 3.735411119592782e-08,
|
9807 |
+
"logits/chosen": -1.1841048002243042,
|
9808 |
+
"logits/rejected": -1.1225578784942627,
|
9809 |
+
"logps/chosen": -210.95909118652344,
|
9810 |
+
"logps/rejected": -228.45896911621094,
|
9811 |
+
"loss": 0.4092,
|
9812 |
+
"rewards/accuracies": 0.90625,
|
9813 |
+
"rewards/chosen": -0.6781859397888184,
|
9814 |
+
"rewards/margins": 1.1642651557922363,
|
9815 |
+
"rewards/rejected": -1.8424510955810547,
|
9816 |
+
"step": 1282
|
9817 |
+
},
|
9818 |
+
{
|
9819 |
+
"epoch": 1.464186444301903,
|
9820 |
+
"grad_norm": 54.509138851706474,
|
9821 |
+
"learning_rate": 3.705810879368047e-08,
|
9822 |
+
"logits/chosen": -1.260365605354309,
|
9823 |
+
"logits/rejected": -1.2909530401229858,
|
9824 |
+
"logps/chosen": -191.3780517578125,
|
9825 |
+
"logps/rejected": -205.57510375976562,
|
9826 |
+
"loss": 0.4331,
|
9827 |
+
"rewards/accuracies": 0.84375,
|
9828 |
+
"rewards/chosen": -0.3257947564125061,
|
9829 |
+
"rewards/margins": 1.1021668910980225,
|
9830 |
+
"rewards/rejected": -1.4279615879058838,
|
9831 |
+
"step": 1284
|
9832 |
+
},
|
9833 |
+
{
|
9834 |
+
"epoch": 1.4664671085453638,
|
9835 |
+
"grad_norm": 62.120618048817526,
|
9836 |
+
"learning_rate": 3.6763016889263345e-08,
|
9837 |
+
"logits/chosen": -1.233807921409607,
|
9838 |
+
"logits/rejected": -1.1972962617874146,
|
9839 |
+
"logps/chosen": -130.00839233398438,
|
9840 |
+
"logps/rejected": -157.5312957763672,
|
9841 |
+
"loss": 0.4483,
|
9842 |
+
"rewards/accuracies": 0.78125,
|
9843 |
+
"rewards/chosen": -0.3486970067024231,
|
9844 |
+
"rewards/margins": 0.8312156796455383,
|
9845 |
+
"rewards/rejected": -1.1799125671386719,
|
9846 |
+
"step": 1286
|
9847 |
+
},
|
9848 |
+
{
|
9849 |
+
"epoch": 1.4687477727888247,
|
9850 |
+
"grad_norm": 62.47302706565703,
|
9851 |
+
"learning_rate": 3.6468839751384206e-08,
|
9852 |
+
"logits/chosen": -1.2912284135818481,
|
9853 |
+
"logits/rejected": -1.286245584487915,
|
9854 |
+
"logps/chosen": -225.38636779785156,
|
9855 |
+
"logps/rejected": -253.08624267578125,
|
9856 |
+
"loss": 0.4193,
|
9857 |
+
"rewards/accuracies": 0.8125,
|
9858 |
+
"rewards/chosen": -0.7340028285980225,
|
9859 |
+
"rewards/margins": 1.2588945627212524,
|
9860 |
+
"rewards/rejected": -1.9928972721099854,
|
9861 |
+
"step": 1288
|
9862 |
+
},
|
9863 |
+
{
|
9864 |
+
"epoch": 1.4710284370322857,
|
9865 |
+
"grad_norm": 56.815010583661,
|
9866 |
+
"learning_rate": 3.6175581635518015e-08,
|
9867 |
+
"logits/chosen": -1.3371250629425049,
|
9868 |
+
"logits/rejected": -1.3072669506072998,
|
9869 |
+
"logps/chosen": -188.99107360839844,
|
9870 |
+
"logps/rejected": -214.9119873046875,
|
9871 |
+
"loss": 0.4364,
|
9872 |
+
"rewards/accuracies": 0.875,
|
9873 |
+
"rewards/chosen": -0.46143385767936707,
|
9874 |
+
"rewards/margins": 1.1111385822296143,
|
9875 |
+
"rewards/rejected": -1.5725722312927246,
|
9876 |
+
"step": 1290
|
9877 |
+
},
|
9878 |
+
{
|
9879 |
+
"epoch": 1.4733091012757464,
|
9880 |
+
"grad_norm": 74.02679784754338,
|
9881 |
+
"learning_rate": 3.5883246783845543e-08,
|
9882 |
+
"logits/chosen": -1.2495771646499634,
|
9883 |
+
"logits/rejected": -1.2960941791534424,
|
9884 |
+
"logps/chosen": -157.36351013183594,
|
9885 |
+
"logps/rejected": -196.3470916748047,
|
9886 |
+
"loss": 0.4401,
|
9887 |
+
"rewards/accuracies": 0.84375,
|
9888 |
+
"rewards/chosen": -0.23140710592269897,
|
9889 |
+
"rewards/margins": 1.0461037158966064,
|
9890 |
+
"rewards/rejected": -1.2775108814239502,
|
9891 |
+
"step": 1292
|
9892 |
+
},
|
9893 |
+
{
|
9894 |
+
"epoch": 1.4755897655192074,
|
9895 |
+
"grad_norm": 61.44266871054178,
|
9896 |
+
"learning_rate": 3.559183942519188e-08,
|
9897 |
+
"logits/chosen": -1.3195384740829468,
|
9898 |
+
"logits/rejected": -1.3464099168777466,
|
9899 |
+
"logps/chosen": -170.23886108398438,
|
9900 |
+
"logps/rejected": -182.47579956054688,
|
9901 |
+
"loss": 0.4044,
|
9902 |
+
"rewards/accuracies": 0.75,
|
9903 |
+
"rewards/chosen": -0.5699801445007324,
|
9904 |
+
"rewards/margins": 0.663261890411377,
|
9905 |
+
"rewards/rejected": -1.233242154121399,
|
9906 |
+
"step": 1294
|
9907 |
+
},
|
9908 |
+
{
|
9909 |
+
"epoch": 1.4778704297626684,
|
9910 |
+
"grad_norm": 65.18540943608312,
|
9911 |
+
"learning_rate": 3.530136377496525e-08,
|
9912 |
+
"logits/chosen": -1.278255581855774,
|
9913 |
+
"logits/rejected": -1.2963995933532715,
|
9914 |
+
"logps/chosen": -214.0897216796875,
|
9915 |
+
"logps/rejected": -238.54718017578125,
|
9916 |
+
"loss": 0.4458,
|
9917 |
+
"rewards/accuracies": 0.90625,
|
9918 |
+
"rewards/chosen": -0.5689273476600647,
|
9919 |
+
"rewards/margins": 1.0575151443481445,
|
9920 |
+
"rewards/rejected": -1.6264426708221436,
|
9921 |
+
"step": 1296
|
9922 |
+
},
|
9923 |
+
{
|
9924 |
+
"epoch": 1.4801510940061293,
|
9925 |
+
"grad_norm": 56.51869240720061,
|
9926 |
+
"learning_rate": 3.50118240350961e-08,
|
9927 |
+
"logits/chosen": -1.2410857677459717,
|
9928 |
+
"logits/rejected": -1.3088514804840088,
|
9929 |
+
"logps/chosen": -158.88926696777344,
|
9930 |
+
"logps/rejected": -195.86138916015625,
|
9931 |
+
"loss": 0.449,
|
9932 |
+
"rewards/accuracies": 0.625,
|
9933 |
+
"rewards/chosen": -0.5141651630401611,
|
9934 |
+
"rewards/margins": 0.7266778945922852,
|
9935 |
+
"rewards/rejected": -1.2408430576324463,
|
9936 |
+
"step": 1298
|
9937 |
+
},
|
9938 |
+
{
|
9939 |
+
"epoch": 1.4824317582495903,
|
9940 |
+
"grad_norm": 69.4034723256464,
|
9941 |
+
"learning_rate": 3.472322439397635e-08,
|
9942 |
+
"logits/chosen": -1.3286279439926147,
|
9943 |
+
"logits/rejected": -1.384574294090271,
|
9944 |
+
"logps/chosen": -219.34544372558594,
|
9945 |
+
"logps/rejected": -237.2283172607422,
|
9946 |
+
"loss": 0.4893,
|
9947 |
+
"rewards/accuracies": 0.53125,
|
9948 |
+
"rewards/chosen": -0.6901402473449707,
|
9949 |
+
"rewards/margins": 0.6859029531478882,
|
9950 |
+
"rewards/rejected": -1.3760432004928589,
|
9951 |
+
"step": 1300
|
9952 |
+
},
|
9953 |
+
{
|
9954 |
+
"epoch": 1.4824317582495903,
|
9955 |
+
"eval_logits/chosen": -1.3521403074264526,
|
9956 |
+
"eval_logits/rejected": -1.3340317010879517,
|
9957 |
+
"eval_logps/chosen": -131.97569274902344,
|
9958 |
+
"eval_logps/rejected": -138.84446716308594,
|
9959 |
+
"eval_loss": 0.5475608110427856,
|
9960 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
9961 |
+
"eval_rewards/chosen": -0.20777291059494019,
|
9962 |
+
"eval_rewards/margins": 0.47040116786956787,
|
9963 |
+
"eval_rewards/rejected": -0.6781739592552185,
|
9964 |
+
"eval_runtime": 21.6329,
|
9965 |
+
"eval_samples_per_second": 4.623,
|
9966 |
+
"eval_steps_per_second": 1.156,
|
9967 |
+
"step": 1300
|
9968 |
+
},
|
9969 |
+
{
|
9970 |
+
"epoch": 1.484712422493051,
|
9971 |
+
"grad_norm": 66.5561433557891,
|
9972 |
+
"learning_rate": 3.4435569026398645e-08,
|
9973 |
+
"logits/chosen": -1.1897640228271484,
|
9974 |
+
"logits/rejected": -1.3370938301086426,
|
9975 |
+
"logps/chosen": -141.83460998535156,
|
9976 |
+
"logps/rejected": -182.74176025390625,
|
9977 |
+
"loss": 0.4463,
|
9978 |
+
"rewards/accuracies": 0.8125,
|
9979 |
+
"rewards/chosen": -0.18475398421287537,
|
9980 |
+
"rewards/margins": 0.82102370262146,
|
9981 |
+
"rewards/rejected": -1.0057775974273682,
|
9982 |
+
"step": 1302
|
9983 |
+
},
|
9984 |
+
{
|
9985 |
+
"epoch": 1.486993086736512,
|
9986 |
+
"grad_norm": 48.88961410554484,
|
9987 |
+
"learning_rate": 3.4148862093496145e-08,
|
9988 |
+
"logits/chosen": -1.2648987770080566,
|
9989 |
+
"logits/rejected": -1.274294376373291,
|
9990 |
+
"logps/chosen": -161.31605529785156,
|
9991 |
+
"logps/rejected": -165.04278564453125,
|
9992 |
+
"loss": 0.3889,
|
9993 |
+
"rewards/accuracies": 0.75,
|
9994 |
+
"rewards/chosen": -0.4667995870113373,
|
9995 |
+
"rewards/margins": 0.7909737229347229,
|
9996 |
+
"rewards/rejected": -1.2577731609344482,
|
9997 |
+
"step": 1304
|
9998 |
+
},
|
9999 |
+
{
|
10000 |
+
"epoch": 1.489273750979973,
|
10001 |
+
"grad_norm": 67.73951681369277,
|
10002 |
+
"learning_rate": 3.386310774268214e-08,
|
10003 |
+
"logits/chosen": -1.2678455114364624,
|
10004 |
+
"logits/rejected": -1.320731520652771,
|
10005 |
+
"logps/chosen": -204.93576049804688,
|
10006 |
+
"logps/rejected": -220.75344848632812,
|
10007 |
+
"loss": 0.433,
|
10008 |
+
"rewards/accuracies": 0.84375,
|
10009 |
+
"rewards/chosen": -0.5178290605545044,
|
10010 |
+
"rewards/margins": 0.9241759777069092,
|
10011 |
+
"rewards/rejected": -1.4420050382614136,
|
10012 |
+
"step": 1306
|
10013 |
+
},
|
10014 |
+
{
|
10015 |
+
"epoch": 1.491554415223434,
|
10016 |
+
"grad_norm": 53.60885013196278,
|
10017 |
+
"learning_rate": 3.3578310107590255e-08,
|
10018 |
+
"logits/chosen": -1.2518330812454224,
|
10019 |
+
"logits/rejected": -1.3134666681289673,
|
10020 |
+
"logps/chosen": -128.73098754882812,
|
10021 |
+
"logps/rejected": -141.5028839111328,
|
10022 |
+
"loss": 0.4175,
|
10023 |
+
"rewards/accuracies": 0.78125,
|
10024 |
+
"rewards/chosen": -0.4798870086669922,
|
10025 |
+
"rewards/margins": 0.7654281854629517,
|
10026 |
+
"rewards/rejected": -1.2453151941299438,
|
10027 |
+
"step": 1308
|
10028 |
+
},
|
10029 |
+
{
|
10030 |
+
"epoch": 1.4938350794668946,
|
10031 |
+
"grad_norm": 64.42291386610013,
|
10032 |
+
"learning_rate": 3.329447330801455e-08,
|
10033 |
+
"logits/chosen": -1.2452740669250488,
|
10034 |
+
"logits/rejected": -1.2846417427062988,
|
10035 |
+
"logps/chosen": -127.42951202392578,
|
10036 |
+
"logps/rejected": -177.83033752441406,
|
10037 |
+
"loss": 0.4905,
|
10038 |
+
"rewards/accuracies": 0.78125,
|
10039 |
+
"rewards/chosen": -0.4574730694293976,
|
10040 |
+
"rewards/margins": 1.024438500404358,
|
10041 |
+
"rewards/rejected": -1.481911540031433,
|
10042 |
+
"step": 1310
|
10043 |
+
},
|
10044 |
+
{
|
10045 |
+
"epoch": 1.4961157437103556,
|
10046 |
+
"grad_norm": 60.99913378119905,
|
10047 |
+
"learning_rate": 3.3011601449849914e-08,
|
10048 |
+
"logits/chosen": -1.2262144088745117,
|
10049 |
+
"logits/rejected": -1.200211763381958,
|
10050 |
+
"logps/chosen": -175.07473754882812,
|
10051 |
+
"logps/rejected": -194.3573455810547,
|
10052 |
+
"loss": 0.419,
|
10053 |
+
"rewards/accuracies": 0.78125,
|
10054 |
+
"rewards/chosen": -0.5272756814956665,
|
10055 |
+
"rewards/margins": 1.1627264022827148,
|
10056 |
+
"rewards/rejected": -1.6900020837783813,
|
10057 |
+
"step": 1312
|
10058 |
+
},
|
10059 |
+
{
|
10060 |
+
"epoch": 1.4983964079538166,
|
10061 |
+
"grad_norm": 61.30357952779087,
|
10062 |
+
"learning_rate": 3.272969862503271e-08,
|
10063 |
+
"logits/chosen": -1.258878231048584,
|
10064 |
+
"logits/rejected": -1.3279513120651245,
|
10065 |
+
"logps/chosen": -169.57151794433594,
|
10066 |
+
"logps/rejected": -205.15086364746094,
|
10067 |
+
"loss": 0.4254,
|
10068 |
+
"rewards/accuracies": 0.9375,
|
10069 |
+
"rewards/chosen": -0.45883575081825256,
|
10070 |
+
"rewards/margins": 1.1377463340759277,
|
10071 |
+
"rewards/rejected": -1.5965821743011475,
|
10072 |
+
"step": 1314
|
10073 |
+
},
|
10074 |
+
{
|
10075 |
+
"epoch": 1.5006770721972775,
|
10076 |
+
"grad_norm": 60.7970290747735,
|
10077 |
+
"learning_rate": 3.2448768911481574e-08,
|
10078 |
+
"logits/chosen": -1.3344089984893799,
|
10079 |
+
"logits/rejected": -1.3924615383148193,
|
10080 |
+
"logps/chosen": -242.09625244140625,
|
10081 |
+
"logps/rejected": -284.8103942871094,
|
10082 |
+
"loss": 0.3898,
|
10083 |
+
"rewards/accuracies": 0.90625,
|
10084 |
+
"rewards/chosen": -0.42801302671432495,
|
10085 |
+
"rewards/margins": 1.6990736722946167,
|
10086 |
+
"rewards/rejected": -2.127086639404297,
|
10087 |
+
"step": 1316
|
10088 |
+
},
|
10089 |
+
{
|
10090 |
+
"epoch": 1.5029577364407385,
|
10091 |
+
"grad_norm": 58.23756711117375,
|
10092 |
+
"learning_rate": 3.216881637303839e-08,
|
10093 |
+
"logits/chosen": -1.2963494062423706,
|
10094 |
+
"logits/rejected": -1.2964308261871338,
|
10095 |
+
"logps/chosen": -194.5859375,
|
10096 |
+
"logps/rejected": -207.51177978515625,
|
10097 |
+
"loss": 0.4461,
|
10098 |
+
"rewards/accuracies": 0.84375,
|
10099 |
+
"rewards/chosen": -0.37401753664016724,
|
10100 |
+
"rewards/margins": 0.9556913375854492,
|
10101 |
+
"rewards/rejected": -1.3297089338302612,
|
10102 |
+
"step": 1318
|
10103 |
+
},
|
10104 |
+
{
|
10105 |
+
"epoch": 1.5052384006841992,
|
10106 |
+
"grad_norm": 52.85976726825495,
|
10107 |
+
"learning_rate": 3.188984505940955e-08,
|
10108 |
+
"logits/chosen": -1.310511589050293,
|
10109 |
+
"logits/rejected": -1.3698493242263794,
|
10110 |
+
"logps/chosen": -155.9166717529297,
|
10111 |
+
"logps/rejected": -185.9949951171875,
|
10112 |
+
"loss": 0.4608,
|
10113 |
+
"rewards/accuracies": 0.78125,
|
10114 |
+
"rewards/chosen": -0.28941965103149414,
|
10115 |
+
"rewards/margins": 0.9209386110305786,
|
10116 |
+
"rewards/rejected": -1.2103582620620728,
|
10117 |
+
"step": 1320
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 1.5075190649276602,
|
10121 |
+
"grad_norm": 65.51821056925988,
|
10122 |
+
"learning_rate": 3.161185900610737e-08,
|
10123 |
+
"logits/chosen": -1.2264206409454346,
|
10124 |
+
"logits/rejected": -1.2563592195510864,
|
10125 |
+
"logps/chosen": -169.9464111328125,
|
10126 |
+
"logps/rejected": -215.42799377441406,
|
10127 |
+
"loss": 0.4746,
|
10128 |
+
"rewards/accuracies": 0.84375,
|
10129 |
+
"rewards/chosen": -0.45204079151153564,
|
10130 |
+
"rewards/margins": 0.9063868522644043,
|
10131 |
+
"rewards/rejected": -1.35842764377594,
|
10132 |
+
"step": 1322
|
10133 |
+
},
|
10134 |
+
{
|
10135 |
+
"epoch": 1.509799729171121,
|
10136 |
+
"grad_norm": 65.41148602924677,
|
10137 |
+
"learning_rate": 3.1334862234391624e-08,
|
10138 |
+
"logits/chosen": -1.1124111413955688,
|
10139 |
+
"logits/rejected": -1.2182986736297607,
|
10140 |
+
"logps/chosen": -168.18359375,
|
10141 |
+
"logps/rejected": -212.33782958984375,
|
10142 |
+
"loss": 0.3879,
|
10143 |
+
"rewards/accuracies": 0.9375,
|
10144 |
+
"rewards/chosen": -0.4647515118122101,
|
10145 |
+
"rewards/margins": 1.2126293182373047,
|
10146 |
+
"rewards/rejected": -1.677380919456482,
|
10147 |
+
"step": 1324
|
10148 |
+
},
|
10149 |
+
{
|
10150 |
+
"epoch": 1.512080393414582,
|
10151 |
+
"grad_norm": 56.95611387521237,
|
10152 |
+
"learning_rate": 3.105885875121151e-08,
|
10153 |
+
"logits/chosen": -1.2453256845474243,
|
10154 |
+
"logits/rejected": -1.3663585186004639,
|
10155 |
+
"logps/chosen": -183.23095703125,
|
10156 |
+
"logps/rejected": -234.28765869140625,
|
10157 |
+
"loss": 0.4144,
|
10158 |
+
"rewards/accuracies": 0.90625,
|
10159 |
+
"rewards/chosen": -0.22449856996536255,
|
10160 |
+
"rewards/margins": 1.4339529275894165,
|
10161 |
+
"rewards/rejected": -1.6584514379501343,
|
10162 |
+
"step": 1326
|
10163 |
+
},
|
10164 |
+
{
|
10165 |
+
"epoch": 1.5143610576580429,
|
10166 |
+
"grad_norm": 75.28120333419699,
|
10167 |
+
"learning_rate": 3.078385254914764e-08,
|
10168 |
+
"logits/chosen": -1.246031403541565,
|
10169 |
+
"logits/rejected": -1.3311541080474854,
|
10170 |
+
"logps/chosen": -181.19082641601562,
|
10171 |
+
"logps/rejected": -224.01809692382812,
|
10172 |
+
"loss": 0.4438,
|
10173 |
+
"rewards/accuracies": 0.71875,
|
10174 |
+
"rewards/chosen": -0.5463556051254272,
|
10175 |
+
"rewards/margins": 1.012133240699768,
|
10176 |
+
"rewards/rejected": -1.5584888458251953,
|
10177 |
+
"step": 1328
|
10178 |
+
},
|
10179 |
+
{
|
10180 |
+
"epoch": 1.5166417219015038,
|
10181 |
+
"grad_norm": 59.21659370389,
|
10182 |
+
"learning_rate": 3.0509847606354214e-08,
|
10183 |
+
"logits/chosen": -1.1559014320373535,
|
10184 |
+
"logits/rejected": -1.175613522529602,
|
10185 |
+
"logps/chosen": -175.91024780273438,
|
10186 |
+
"logps/rejected": -201.29910278320312,
|
10187 |
+
"loss": 0.4106,
|
10188 |
+
"rewards/accuracies": 0.9375,
|
10189 |
+
"rewards/chosen": -0.5821335315704346,
|
10190 |
+
"rewards/margins": 1.0530939102172852,
|
10191 |
+
"rewards/rejected": -1.6352273225784302,
|
10192 |
+
"step": 1330
|
10193 |
+
},
|
10194 |
+
{
|
10195 |
+
"epoch": 1.5189223861449648,
|
10196 |
+
"grad_norm": 65.02090257303733,
|
10197 |
+
"learning_rate": 3.0236847886501535e-08,
|
10198 |
+
"logits/chosen": -1.3365192413330078,
|
10199 |
+
"logits/rejected": -1.3719249963760376,
|
10200 |
+
"logps/chosen": -195.5978546142578,
|
10201 |
+
"logps/rejected": -219.9347686767578,
|
10202 |
+
"loss": 0.3962,
|
10203 |
+
"rewards/accuracies": 0.90625,
|
10204 |
+
"rewards/chosen": -0.3096795678138733,
|
10205 |
+
"rewards/margins": 1.1052062511444092,
|
10206 |
+
"rewards/rejected": -1.4148855209350586,
|
10207 |
+
"step": 1332
|
10208 |
+
},
|
10209 |
+
{
|
10210 |
+
"epoch": 1.5212030503884257,
|
10211 |
+
"grad_norm": 59.28308867818603,
|
10212 |
+
"learning_rate": 2.9964857338718716e-08,
|
10213 |
+
"logits/chosen": -1.357577919960022,
|
10214 |
+
"logits/rejected": -1.2968313694000244,
|
10215 |
+
"logps/chosen": -199.23211669921875,
|
10216 |
+
"logps/rejected": -185.07896423339844,
|
10217 |
+
"loss": 0.4879,
|
10218 |
+
"rewards/accuracies": 0.71875,
|
10219 |
+
"rewards/chosen": -0.36101239919662476,
|
10220 |
+
"rewards/margins": 0.6830317974090576,
|
10221 |
+
"rewards/rejected": -1.0440441370010376,
|
10222 |
+
"step": 1334
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 1.5234837146318867,
|
10226 |
+
"grad_norm": 55.43183849143864,
|
10227 |
+
"learning_rate": 2.9693879897536432e-08,
|
10228 |
+
"logits/chosen": -1.1980834007263184,
|
10229 |
+
"logits/rejected": -1.229064702987671,
|
10230 |
+
"logps/chosen": -199.63746643066406,
|
10231 |
+
"logps/rejected": -221.99234008789062,
|
10232 |
+
"loss": 0.4335,
|
10233 |
+
"rewards/accuracies": 0.84375,
|
10234 |
+
"rewards/chosen": -0.5939749479293823,
|
10235 |
+
"rewards/margins": 1.0861446857452393,
|
10236 |
+
"rewards/rejected": -1.6801198720932007,
|
10237 |
+
"step": 1336
|
10238 |
+
},
|
10239 |
+
{
|
10240 |
+
"epoch": 1.5257643788753474,
|
10241 |
+
"grad_norm": 46.58400293343511,
|
10242 |
+
"learning_rate": 2.9423919482830118e-08,
|
10243 |
+
"logits/chosen": -1.1741948127746582,
|
10244 |
+
"logits/rejected": -1.2695064544677734,
|
10245 |
+
"logps/chosen": -147.90426635742188,
|
10246 |
+
"logps/rejected": -210.15591430664062,
|
10247 |
+
"loss": 0.4373,
|
10248 |
+
"rewards/accuracies": 0.90625,
|
10249 |
+
"rewards/chosen": -0.16375023126602173,
|
10250 |
+
"rewards/margins": 1.4425245523452759,
|
10251 |
+
"rewards/rejected": -1.6062746047973633,
|
10252 |
+
"step": 1338
|
10253 |
+
},
|
10254 |
+
{
|
10255 |
+
"epoch": 1.5280450431188084,
|
10256 |
+
"grad_norm": 60.486372599547316,
|
10257 |
+
"learning_rate": 2.9154979999763197e-08,
|
10258 |
+
"logits/chosen": -1.2853294610977173,
|
10259 |
+
"logits/rejected": -1.3705867528915405,
|
10260 |
+
"logps/chosen": -159.66326904296875,
|
10261 |
+
"logps/rejected": -226.9650421142578,
|
10262 |
+
"loss": 0.4299,
|
10263 |
+
"rewards/accuracies": 0.90625,
|
10264 |
+
"rewards/chosen": -0.5052796006202698,
|
10265 |
+
"rewards/margins": 1.3695988655090332,
|
10266 |
+
"rewards/rejected": -1.8748785257339478,
|
10267 |
+
"step": 1340
|
10268 |
+
},
|
10269 |
+
{
|
10270 |
+
"epoch": 1.5303257073622691,
|
10271 |
+
"grad_norm": 66.91951190930519,
|
10272 |
+
"learning_rate": 2.8887065338730633e-08,
|
10273 |
+
"logits/chosen": -1.1936756372451782,
|
10274 |
+
"logits/rejected": -1.2785755395889282,
|
10275 |
+
"logps/chosen": -156.64866638183594,
|
10276 |
+
"logps/rejected": -195.68453979492188,
|
10277 |
+
"loss": 0.4813,
|
10278 |
+
"rewards/accuracies": 0.8125,
|
10279 |
+
"rewards/chosen": -0.30220431089401245,
|
10280 |
+
"rewards/margins": 0.8970733880996704,
|
10281 |
+
"rewards/rejected": -1.1992775201797485,
|
10282 |
+
"step": 1342
|
10283 |
+
},
|
10284 |
+
{
|
10285 |
+
"epoch": 1.53260637160573,
|
10286 |
+
"grad_norm": 60.327377050918805,
|
10287 |
+
"learning_rate": 2.86201793753026e-08,
|
10288 |
+
"logits/chosen": -1.2364442348480225,
|
10289 |
+
"logits/rejected": -1.2909033298492432,
|
10290 |
+
"logps/chosen": -191.64169311523438,
|
10291 |
+
"logps/rejected": -228.86416625976562,
|
10292 |
+
"loss": 0.4307,
|
10293 |
+
"rewards/accuracies": 0.8125,
|
10294 |
+
"rewards/chosen": -0.5236424207687378,
|
10295 |
+
"rewards/margins": 0.9988542795181274,
|
10296 |
+
"rewards/rejected": -1.5224968194961548,
|
10297 |
+
"step": 1344
|
10298 |
+
},
|
10299 |
+
{
|
10300 |
+
"epoch": 1.534887035849191,
|
10301 |
+
"grad_norm": 71.82683810198446,
|
10302 |
+
"learning_rate": 2.835432597016848e-08,
|
10303 |
+
"logits/chosen": -1.19918692111969,
|
10304 |
+
"logits/rejected": -1.2287514209747314,
|
10305 |
+
"logps/chosen": -203.8031768798828,
|
10306 |
+
"logps/rejected": -217.00051879882812,
|
10307 |
+
"loss": 0.4555,
|
10308 |
+
"rewards/accuracies": 0.9375,
|
10309 |
+
"rewards/chosen": -0.5840819478034973,
|
10310 |
+
"rewards/margins": 1.0689098834991455,
|
10311 |
+
"rewards/rejected": -1.6529918909072876,
|
10312 |
+
"step": 1346
|
10313 |
+
},
|
10314 |
+
{
|
10315 |
+
"epoch": 1.537167700092652,
|
10316 |
+
"grad_norm": 51.67351577452576,
|
10317 |
+
"learning_rate": 2.8089508969081e-08,
|
10318 |
+
"logits/chosen": -1.1072896718978882,
|
10319 |
+
"logits/rejected": -1.1964483261108398,
|
10320 |
+
"logps/chosen": -167.41482543945312,
|
10321 |
+
"logps/rejected": -227.43304443359375,
|
10322 |
+
"loss": 0.3874,
|
10323 |
+
"rewards/accuracies": 0.84375,
|
10324 |
+
"rewards/chosen": -0.4981079697608948,
|
10325 |
+
"rewards/margins": 1.5203238725662231,
|
10326 |
+
"rewards/rejected": -2.0184319019317627,
|
10327 |
+
"step": 1348
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 1.539448364336113,
|
10331 |
+
"grad_norm": 54.16559868864558,
|
10332 |
+
"learning_rate": 2.7825732202800544e-08,
|
10333 |
+
"logits/chosen": -1.1472342014312744,
|
10334 |
+
"logits/rejected": -1.2177824974060059,
|
10335 |
+
"logps/chosen": -162.28453063964844,
|
10336 |
+
"logps/rejected": -183.14820861816406,
|
10337 |
+
"loss": 0.4325,
|
10338 |
+
"rewards/accuracies": 0.75,
|
10339 |
+
"rewards/chosen": -0.6399708986282349,
|
10340 |
+
"rewards/margins": 0.8657874464988708,
|
10341 |
+
"rewards/rejected": -1.5057581663131714,
|
10342 |
+
"step": 1350
|
10343 |
+
},
|
10344 |
+
{
|
10345 |
+
"epoch": 1.541729028579574,
|
10346 |
+
"grad_norm": 59.09514477278818,
|
10347 |
+
"learning_rate": 2.756299948703982e-08,
|
10348 |
+
"logits/chosen": -1.1995211839675903,
|
10349 |
+
"logits/rejected": -1.2396866083145142,
|
10350 |
+
"logps/chosen": -132.65550231933594,
|
10351 |
+
"logps/rejected": -141.451416015625,
|
10352 |
+
"loss": 0.4078,
|
10353 |
+
"rewards/accuracies": 0.75,
|
10354 |
+
"rewards/chosen": -0.38874107599258423,
|
10355 |
+
"rewards/margins": 0.7118159532546997,
|
10356 |
+
"rewards/rejected": -1.1005568504333496,
|
10357 |
+
"step": 1352
|
10358 |
+
},
|
10359 |
+
{
|
10360 |
+
"epoch": 1.5440096928230347,
|
10361 |
+
"grad_norm": 61.53811528707539,
|
10362 |
+
"learning_rate": 2.7301314622408612e-08,
|
10363 |
+
"logits/chosen": -1.2403637170791626,
|
10364 |
+
"logits/rejected": -1.3120653629302979,
|
10365 |
+
"logps/chosen": -156.661865234375,
|
10366 |
+
"logps/rejected": -218.12490844726562,
|
10367 |
+
"loss": 0.4467,
|
10368 |
+
"rewards/accuracies": 0.8125,
|
10369 |
+
"rewards/chosen": -0.468991219997406,
|
10370 |
+
"rewards/margins": 1.6298167705535889,
|
10371 |
+
"rewards/rejected": -2.0988078117370605,
|
10372 |
+
"step": 1354
|
10373 |
+
},
|
10374 |
+
{
|
10375 |
+
"epoch": 1.5462903570664956,
|
10376 |
+
"grad_norm": 68.28115300743727,
|
10377 |
+
"learning_rate": 2.704068139435881e-08,
|
10378 |
+
"logits/chosen": -1.2020457983016968,
|
10379 |
+
"logits/rejected": -1.2723631858825684,
|
10380 |
+
"logps/chosen": -167.51483154296875,
|
10381 |
+
"logps/rejected": -186.7481231689453,
|
10382 |
+
"loss": 0.4608,
|
10383 |
+
"rewards/accuracies": 0.71875,
|
10384 |
+
"rewards/chosen": -0.5097277760505676,
|
10385 |
+
"rewards/margins": 0.8848594427108765,
|
10386 |
+
"rewards/rejected": -1.3945870399475098,
|
10387 |
+
"step": 1356
|
10388 |
+
},
|
10389 |
+
{
|
10390 |
+
"epoch": 1.5485710213099564,
|
10391 |
+
"grad_norm": 65.06182702724148,
|
10392 |
+
"learning_rate": 2.6781103573129703e-08,
|
10393 |
+
"logits/chosen": -1.270340085029602,
|
10394 |
+
"logits/rejected": -1.2233667373657227,
|
10395 |
+
"logps/chosen": -141.70269775390625,
|
10396 |
+
"logps/rejected": -159.1035919189453,
|
10397 |
+
"loss": 0.4297,
|
10398 |
+
"rewards/accuracies": 0.875,
|
10399 |
+
"rewards/chosen": -0.3412969708442688,
|
10400 |
+
"rewards/margins": 0.8257958292961121,
|
10401 |
+
"rewards/rejected": -1.1670928001403809,
|
10402 |
+
"step": 1358
|
10403 |
+
},
|
10404 |
+
{
|
10405 |
+
"epoch": 1.5508516855534173,
|
10406 |
+
"grad_norm": 54.785688494455215,
|
10407 |
+
"learning_rate": 2.652258491369329e-08,
|
10408 |
+
"logits/chosen": -1.2447706460952759,
|
10409 |
+
"logits/rejected": -1.2511292695999146,
|
10410 |
+
"logps/chosen": -171.25672912597656,
|
10411 |
+
"logps/rejected": -208.33084106445312,
|
10412 |
+
"loss": 0.4368,
|
10413 |
+
"rewards/accuracies": 0.875,
|
10414 |
+
"rewards/chosen": -0.3487054705619812,
|
10415 |
+
"rewards/margins": 1.1124207973480225,
|
10416 |
+
"rewards/rejected": -1.4611263275146484,
|
10417 |
+
"step": 1360
|
10418 |
+
},
|
10419 |
+
{
|
10420 |
+
"epoch": 1.5531323497968783,
|
10421 |
+
"grad_norm": 61.41628077442576,
|
10422 |
+
"learning_rate": 2.626512915570015e-08,
|
10423 |
+
"logits/chosen": -1.328946590423584,
|
10424 |
+
"logits/rejected": -1.3554866313934326,
|
10425 |
+
"logps/chosen": -125.71770477294922,
|
10426 |
+
"logps/rejected": -141.460693359375,
|
10427 |
+
"loss": 0.4368,
|
10428 |
+
"rewards/accuracies": 0.8125,
|
10429 |
+
"rewards/chosen": -0.13910508155822754,
|
10430 |
+
"rewards/margins": 0.7971990704536438,
|
10431 |
+
"rewards/rejected": -0.9363042116165161,
|
10432 |
+
"step": 1362
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 1.5554130140403393,
|
10436 |
+
"grad_norm": 75.66249491591283,
|
10437 |
+
"learning_rate": 2.6008740023425247e-08,
|
10438 |
+
"logits/chosen": -1.188770055770874,
|
10439 |
+
"logits/rejected": -1.2130908966064453,
|
10440 |
+
"logps/chosen": -183.416748046875,
|
10441 |
+
"logps/rejected": -207.94090270996094,
|
10442 |
+
"loss": 0.4306,
|
10443 |
+
"rewards/accuracies": 0.71875,
|
10444 |
+
"rewards/chosen": -0.651962399482727,
|
10445 |
+
"rewards/margins": 1.215145230293274,
|
10446 |
+
"rewards/rejected": -1.867107629776001,
|
10447 |
+
"step": 1364
|
10448 |
+
},
|
10449 |
+
{
|
10450 |
+
"epoch": 1.5576936782838002,
|
10451 |
+
"grad_norm": 55.08493508678333,
|
10452 |
+
"learning_rate": 2.5753421225714055e-08,
|
10453 |
+
"logits/chosen": -1.2770978212356567,
|
10454 |
+
"logits/rejected": -1.3901137113571167,
|
10455 |
+
"logps/chosen": -182.26524353027344,
|
10456 |
+
"logps/rejected": -213.17454528808594,
|
10457 |
+
"loss": 0.4494,
|
10458 |
+
"rewards/accuracies": 0.78125,
|
10459 |
+
"rewards/chosen": -0.602812647819519,
|
10460 |
+
"rewards/margins": 1.0171971321105957,
|
10461 |
+
"rewards/rejected": -1.6200097799301147,
|
10462 |
+
"step": 1366
|
10463 |
+
},
|
10464 |
+
{
|
10465 |
+
"epoch": 1.5599743425272612,
|
10466 |
+
"grad_norm": 61.38622790534087,
|
10467 |
+
"learning_rate": 2.549917645592893e-08,
|
10468 |
+
"logits/chosen": -1.0256890058517456,
|
10469 |
+
"logits/rejected": -1.0421488285064697,
|
10470 |
+
"logps/chosen": -125.034423828125,
|
10471 |
+
"logps/rejected": -140.7981414794922,
|
10472 |
+
"loss": 0.4129,
|
10473 |
+
"rewards/accuracies": 0.78125,
|
10474 |
+
"rewards/chosen": -0.5211226344108582,
|
10475 |
+
"rewards/margins": 0.7981542348861694,
|
10476 |
+
"rewards/rejected": -1.3192768096923828,
|
10477 |
+
"step": 1368
|
10478 |
+
},
|
10479 |
+
{
|
10480 |
+
"epoch": 1.562255006770722,
|
10481 |
+
"grad_norm": 56.63177895037392,
|
10482 |
+
"learning_rate": 2.524600939189566e-08,
|
10483 |
+
"logits/chosen": -1.155221939086914,
|
10484 |
+
"logits/rejected": -1.1944361925125122,
|
10485 |
+
"logps/chosen": -161.79409790039062,
|
10486 |
+
"logps/rejected": -194.83041381835938,
|
10487 |
+
"loss": 0.4127,
|
10488 |
+
"rewards/accuracies": 0.84375,
|
10489 |
+
"rewards/chosen": -0.42054063081741333,
|
10490 |
+
"rewards/margins": 1.119522213935852,
|
10491 |
+
"rewards/rejected": -1.5400630235671997,
|
10492 |
+
"step": 1370
|
10493 |
+
},
|
10494 |
+
{
|
10495 |
+
"epoch": 1.564535671014183,
|
10496 |
+
"grad_norm": 83.86033988088741,
|
10497 |
+
"learning_rate": 2.4993923695850305e-08,
|
10498 |
+
"logits/chosen": -1.293369174003601,
|
10499 |
+
"logits/rejected": -1.3639140129089355,
|
10500 |
+
"logps/chosen": -198.75289916992188,
|
10501 |
+
"logps/rejected": -229.84117126464844,
|
10502 |
+
"loss": 0.4988,
|
10503 |
+
"rewards/accuracies": 0.8125,
|
10504 |
+
"rewards/chosen": -0.41869914531707764,
|
10505 |
+
"rewards/margins": 0.7414001226425171,
|
10506 |
+
"rewards/rejected": -1.1600991487503052,
|
10507 |
+
"step": 1372
|
10508 |
+
},
|
10509 |
+
{
|
10510 |
+
"epoch": 1.5668163352576436,
|
10511 |
+
"grad_norm": 62.24749551918009,
|
10512 |
+
"learning_rate": 2.4742923014386154e-08,
|
10513 |
+
"logits/chosen": -1.3067548274993896,
|
10514 |
+
"logits/rejected": -1.3476440906524658,
|
10515 |
+
"logps/chosen": -224.86627197265625,
|
10516 |
+
"logps/rejected": -235.1451416015625,
|
10517 |
+
"loss": 0.4154,
|
10518 |
+
"rewards/accuracies": 0.875,
|
10519 |
+
"rewards/chosen": -0.6012443900108337,
|
10520 |
+
"rewards/margins": 0.9985144138336182,
|
10521 |
+
"rewards/rejected": -1.5997586250305176,
|
10522 |
+
"step": 1374
|
10523 |
+
},
|
10524 |
+
{
|
10525 |
+
"epoch": 1.5690969995011046,
|
10526 |
+
"grad_norm": 64.51543232805054,
|
10527 |
+
"learning_rate": 2.4493010978401063e-08,
|
10528 |
+
"logits/chosen": -1.0690737962722778,
|
10529 |
+
"logits/rejected": -1.084768295288086,
|
10530 |
+
"logps/chosen": -163.8816375732422,
|
10531 |
+
"logps/rejected": -171.3943634033203,
|
10532 |
+
"loss": 0.4249,
|
10533 |
+
"rewards/accuracies": 0.875,
|
10534 |
+
"rewards/chosen": -0.611855685710907,
|
10535 |
+
"rewards/margins": 0.8946461081504822,
|
10536 |
+
"rewards/rejected": -1.5065017938613892,
|
10537 |
+
"step": 1376
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 1.5713776637445656,
|
10541 |
+
"grad_norm": 54.357068938946284,
|
10542 |
+
"learning_rate": 2.4244191203044805e-08,
|
10543 |
+
"logits/chosen": -1.4049066305160522,
|
10544 |
+
"logits/rejected": -1.4406367540359497,
|
10545 |
+
"logps/chosen": -193.67234802246094,
|
10546 |
+
"logps/rejected": -223.92030334472656,
|
10547 |
+
"loss": 0.3912,
|
10548 |
+
"rewards/accuracies": 0.8125,
|
10549 |
+
"rewards/chosen": -0.3962157368659973,
|
10550 |
+
"rewards/margins": 1.082966685295105,
|
10551 |
+
"rewards/rejected": -1.479182481765747,
|
10552 |
+
"step": 1378
|
10553 |
+
},
|
10554 |
+
{
|
10555 |
+
"epoch": 1.5736583279880265,
|
10556 |
+
"grad_norm": 56.319793805013724,
|
10557 |
+
"learning_rate": 2.399646728766691e-08,
|
10558 |
+
"logits/chosen": -1.3638195991516113,
|
10559 |
+
"logits/rejected": -1.3412432670593262,
|
10560 |
+
"logps/chosen": -164.48809814453125,
|
10561 |
+
"logps/rejected": -196.62823486328125,
|
10562 |
+
"loss": 0.4493,
|
10563 |
+
"rewards/accuracies": 0.78125,
|
10564 |
+
"rewards/chosen": -0.2889966666698456,
|
10565 |
+
"rewards/margins": 1.261388897895813,
|
10566 |
+
"rewards/rejected": -1.5503859519958496,
|
10567 |
+
"step": 1380
|
10568 |
+
},
|
10569 |
+
{
|
10570 |
+
"epoch": 1.5759389922314875,
|
10571 |
+
"grad_norm": 60.4303807612328,
|
10572 |
+
"learning_rate": 2.3749842815764498e-08,
|
10573 |
+
"logits/chosen": -1.1395071744918823,
|
10574 |
+
"logits/rejected": -1.1399792432785034,
|
10575 |
+
"logps/chosen": -139.91824340820312,
|
10576 |
+
"logps/rejected": -184.93896484375,
|
10577 |
+
"loss": 0.4426,
|
10578 |
+
"rewards/accuracies": 0.75,
|
10579 |
+
"rewards/chosen": -0.37107953429222107,
|
10580 |
+
"rewards/margins": 0.8352210521697998,
|
10581 |
+
"rewards/rejected": -1.2063006162643433,
|
10582 |
+
"step": 1382
|
10583 |
+
},
|
10584 |
+
{
|
10585 |
+
"epoch": 1.5782196564749484,
|
10586 |
+
"grad_norm": 55.142567366980046,
|
10587 |
+
"learning_rate": 2.3504321354930568e-08,
|
10588 |
+
"logits/chosen": -1.1650046110153198,
|
10589 |
+
"logits/rejected": -1.2775689363479614,
|
10590 |
+
"logps/chosen": -144.93995666503906,
|
10591 |
+
"logps/rejected": -177.63087463378906,
|
10592 |
+
"loss": 0.4272,
|
10593 |
+
"rewards/accuracies": 0.78125,
|
10594 |
+
"rewards/chosen": -0.5145533084869385,
|
10595 |
+
"rewards/margins": 0.9500089287757874,
|
10596 |
+
"rewards/rejected": -1.4645624160766602,
|
10597 |
+
"step": 1384
|
10598 |
+
},
|
10599 |
+
{
|
10600 |
+
"epoch": 1.5805003207184094,
|
10601 |
+
"grad_norm": 63.43007453665504,
|
10602 |
+
"learning_rate": 2.3259906456802213e-08,
|
10603 |
+
"logits/chosen": -1.216495394706726,
|
10604 |
+
"logits/rejected": -1.2768280506134033,
|
10605 |
+
"logps/chosen": -169.9758758544922,
|
10606 |
+
"logps/rejected": -194.25665283203125,
|
10607 |
+
"loss": 0.4427,
|
10608 |
+
"rewards/accuracies": 0.78125,
|
10609 |
+
"rewards/chosen": -0.43023961782455444,
|
10610 |
+
"rewards/margins": 0.8201103210449219,
|
10611 |
+
"rewards/rejected": -1.250349998474121,
|
10612 |
+
"step": 1386
|
10613 |
+
},
|
10614 |
+
{
|
10615 |
+
"epoch": 1.5827809849618701,
|
10616 |
+
"grad_norm": 58.56739556116542,
|
10617 |
+
"learning_rate": 2.301660165700936e-08,
|
10618 |
+
"logits/chosen": -1.291746735572815,
|
10619 |
+
"logits/rejected": -1.3648316860198975,
|
10620 |
+
"logps/chosen": -166.92015075683594,
|
10621 |
+
"logps/rejected": -194.9810028076172,
|
10622 |
+
"loss": 0.4506,
|
10623 |
+
"rewards/accuracies": 0.8125,
|
10624 |
+
"rewards/chosen": -0.5265369415283203,
|
10625 |
+
"rewards/margins": 0.8070346713066101,
|
10626 |
+
"rewards/rejected": -1.3335715532302856,
|
10627 |
+
"step": 1388
|
10628 |
+
},
|
10629 |
+
{
|
10630 |
+
"epoch": 1.585061649205331,
|
10631 |
+
"grad_norm": 54.69042761154998,
|
10632 |
+
"learning_rate": 2.2774410475123608e-08,
|
10633 |
+
"logits/chosen": -1.3127899169921875,
|
10634 |
+
"logits/rejected": -1.2719758749008179,
|
10635 |
+
"logps/chosen": -178.777587890625,
|
10636 |
+
"logps/rejected": -247.97511291503906,
|
10637 |
+
"loss": 0.3964,
|
10638 |
+
"rewards/accuracies": 0.875,
|
10639 |
+
"rewards/chosen": -0.19407829642295837,
|
10640 |
+
"rewards/margins": 1.4336225986480713,
|
10641 |
+
"rewards/rejected": -1.6277010440826416,
|
10642 |
+
"step": 1390
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 1.5873423134487918,
|
10646 |
+
"grad_norm": 62.874447838260565,
|
10647 |
+
"learning_rate": 2.2533336414607317e-08,
|
10648 |
+
"logits/chosen": -1.2624969482421875,
|
10649 |
+
"logits/rejected": -1.319579839706421,
|
10650 |
+
"logps/chosen": -169.37161254882812,
|
10651 |
+
"logps/rejected": -186.8485870361328,
|
10652 |
+
"loss": 0.4139,
|
10653 |
+
"rewards/accuracies": 0.90625,
|
10654 |
+
"rewards/chosen": -0.39721712470054626,
|
10655 |
+
"rewards/margins": 0.8743160367012024,
|
10656 |
+
"rewards/rejected": -1.2715333700180054,
|
10657 |
+
"step": 1392
|
10658 |
+
},
|
10659 |
+
{
|
10660 |
+
"epoch": 1.5896229776922528,
|
10661 |
+
"grad_norm": 56.69647177405222,
|
10662 |
+
"learning_rate": 2.2293382962762853e-08,
|
10663 |
+
"logits/chosen": -1.1312240362167358,
|
10664 |
+
"logits/rejected": -1.1108318567276,
|
10665 |
+
"logps/chosen": -149.8402557373047,
|
10666 |
+
"logps/rejected": -147.10037231445312,
|
10667 |
+
"loss": 0.4255,
|
10668 |
+
"rewards/accuracies": 0.71875,
|
10669 |
+
"rewards/chosen": -0.5710242986679077,
|
10670 |
+
"rewards/margins": 0.5461159348487854,
|
10671 |
+
"rewards/rejected": -1.117140293121338,
|
10672 |
+
"step": 1394
|
10673 |
+
},
|
10674 |
+
{
|
10675 |
+
"epoch": 1.5919036419357138,
|
10676 |
+
"grad_norm": 71.94947354276687,
|
10677 |
+
"learning_rate": 2.2054553590682268e-08,
|
10678 |
+
"logits/chosen": -1.1476179361343384,
|
10679 |
+
"logits/rejected": -1.2181971073150635,
|
10680 |
+
"logps/chosen": -152.3661346435547,
|
10681 |
+
"logps/rejected": -192.77369689941406,
|
10682 |
+
"loss": 0.4157,
|
10683 |
+
"rewards/accuracies": 0.875,
|
10684 |
+
"rewards/chosen": -0.5518044233322144,
|
10685 |
+
"rewards/margins": 1.2432973384857178,
|
10686 |
+
"rewards/rejected": -1.7951017618179321,
|
10687 |
+
"step": 1396
|
10688 |
+
},
|
10689 |
+
{
|
10690 |
+
"epoch": 1.5941843061791747,
|
10691 |
+
"grad_norm": 65.43519539314455,
|
10692 |
+
"learning_rate": 2.1816851753197018e-08,
|
10693 |
+
"logits/chosen": -1.2672624588012695,
|
10694 |
+
"logits/rejected": -1.3876826763153076,
|
10695 |
+
"logps/chosen": -198.90475463867188,
|
10696 |
+
"logps/rejected": -248.56439208984375,
|
10697 |
+
"loss": 0.479,
|
10698 |
+
"rewards/accuracies": 0.875,
|
10699 |
+
"rewards/chosen": -0.7569680213928223,
|
10700 |
+
"rewards/margins": 1.3200610876083374,
|
10701 |
+
"rewards/rejected": -2.077029228210449,
|
10702 |
+
"step": 1398
|
10703 |
+
},
|
10704 |
+
{
|
10705 |
+
"epoch": 1.5964649704226357,
|
10706 |
+
"grad_norm": 55.75209984482193,
|
10707 |
+
"learning_rate": 2.1580280888827997e-08,
|
10708 |
+
"logits/chosen": -1.3570483922958374,
|
10709 |
+
"logits/rejected": -1.39362633228302,
|
10710 |
+
"logps/chosen": -168.9151153564453,
|
10711 |
+
"logps/rejected": -199.656982421875,
|
10712 |
+
"loss": 0.4361,
|
10713 |
+
"rewards/accuracies": 0.8125,
|
10714 |
+
"rewards/chosen": -0.38282498717308044,
|
10715 |
+
"rewards/margins": 1.1389210224151611,
|
10716 |
+
"rewards/rejected": -1.521746039390564,
|
10717 |
+
"step": 1400
|
10718 |
+
},
|
10719 |
+
{
|
10720 |
+
"epoch": 1.5964649704226357,
|
10721 |
+
"eval_logits/chosen": -1.349047064781189,
|
10722 |
+
"eval_logits/rejected": -1.3316236734390259,
|
10723 |
+
"eval_logps/chosen": -131.90457153320312,
|
10724 |
+
"eval_logps/rejected": -138.97027587890625,
|
10725 |
+
"eval_loss": 0.5412697792053223,
|
10726 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
10727 |
+
"eval_rewards/chosen": -0.2006600797176361,
|
10728 |
+
"eval_rewards/margins": 0.49009186029434204,
|
10729 |
+
"eval_rewards/rejected": -0.6907519698143005,
|
10730 |
+
"eval_runtime": 20.6376,
|
10731 |
+
"eval_samples_per_second": 4.846,
|
10732 |
+
"eval_steps_per_second": 1.211,
|
10733 |
+
"step": 1400
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 1.5987456346660966,
|
10737 |
+
"grad_norm": 58.426952642835886,
|
10738 |
+
"learning_rate": 2.1344844419735752e-08,
|
10739 |
+
"logits/chosen": -1.3734134435653687,
|
10740 |
+
"logits/rejected": -1.4403023719787598,
|
10741 |
+
"logps/chosen": -138.4149627685547,
|
10742 |
+
"logps/rejected": -162.69033813476562,
|
10743 |
+
"loss": 0.4393,
|
10744 |
+
"rewards/accuracies": 0.875,
|
10745 |
+
"rewards/chosen": -0.2428184598684311,
|
10746 |
+
"rewards/margins": 0.8137100338935852,
|
10747 |
+
"rewards/rejected": -1.0565285682678223,
|
10748 |
+
"step": 1402
|
10749 |
+
},
|
10750 |
+
{
|
10751 |
+
"epoch": 1.6010262989095574,
|
10752 |
+
"grad_norm": 57.12690162473469,
|
10753 |
+
"learning_rate": 2.1110545751671073e-08,
|
10754 |
+
"logits/chosen": -1.2013407945632935,
|
10755 |
+
"logits/rejected": -1.3104689121246338,
|
10756 |
+
"logps/chosen": -137.76437377929688,
|
10757 |
+
"logps/rejected": -174.31373596191406,
|
10758 |
+
"loss": 0.4367,
|
10759 |
+
"rewards/accuracies": 0.875,
|
10760 |
+
"rewards/chosen": -0.14584028720855713,
|
10761 |
+
"rewards/margins": 0.999373197555542,
|
10762 |
+
"rewards/rejected": -1.1452134847640991,
|
10763 |
+
"step": 1404
|
10764 |
+
},
|
10765 |
+
{
|
10766 |
+
"epoch": 1.6033069631530183,
|
10767 |
+
"grad_norm": 60.35122974906749,
|
10768 |
+
"learning_rate": 2.0877388273925644e-08,
|
10769 |
+
"logits/chosen": -1.227086067199707,
|
10770 |
+
"logits/rejected": -1.1707607507705688,
|
10771 |
+
"logps/chosen": -209.45407104492188,
|
10772 |
+
"logps/rejected": -257.14776611328125,
|
10773 |
+
"loss": 0.3969,
|
10774 |
+
"rewards/accuracies": 0.8125,
|
10775 |
+
"rewards/chosen": -0.581037163734436,
|
10776 |
+
"rewards/margins": 1.283540964126587,
|
10777 |
+
"rewards/rejected": -1.864578127861023,
|
10778 |
+
"step": 1406
|
10779 |
+
},
|
10780 |
+
{
|
10781 |
+
"epoch": 1.605587627396479,
|
10782 |
+
"grad_norm": 52.46325406296021,
|
10783 |
+
"learning_rate": 2.0645375359283045e-08,
|
10784 |
+
"logits/chosen": -1.3980076313018799,
|
10785 |
+
"logits/rejected": -1.4242044687271118,
|
10786 |
+
"logps/chosen": -116.77565002441406,
|
10787 |
+
"logps/rejected": -137.52145385742188,
|
10788 |
+
"loss": 0.4733,
|
10789 |
+
"rewards/accuracies": 0.8125,
|
10790 |
+
"rewards/chosen": -0.23290672898292542,
|
10791 |
+
"rewards/margins": 0.7732049226760864,
|
10792 |
+
"rewards/rejected": -1.0061116218566895,
|
10793 |
+
"step": 1408
|
10794 |
+
},
|
10795 |
+
{
|
10796 |
+
"epoch": 1.60786829163994,
|
10797 |
+
"grad_norm": 55.90140225780788,
|
10798 |
+
"learning_rate": 2.0414510363970018e-08,
|
10799 |
+
"logits/chosen": -1.1187829971313477,
|
10800 |
+
"logits/rejected": -1.1768170595169067,
|
10801 |
+
"logps/chosen": -150.73451232910156,
|
10802 |
+
"logps/rejected": -189.43748474121094,
|
10803 |
+
"loss": 0.3741,
|
10804 |
+
"rewards/accuracies": 0.84375,
|
10805 |
+
"rewards/chosen": -0.3249818682670593,
|
10806 |
+
"rewards/margins": 0.9408416748046875,
|
10807 |
+
"rewards/rejected": -1.2658236026763916,
|
10808 |
+
"step": 1410
|
10809 |
+
},
|
10810 |
+
{
|
10811 |
+
"epoch": 1.610148955883401,
|
10812 |
+
"grad_norm": 58.50925426124599,
|
10813 |
+
"learning_rate": 2.0184796627607725e-08,
|
10814 |
+
"logits/chosen": -1.1422569751739502,
|
10815 |
+
"logits/rejected": -1.1473525762557983,
|
10816 |
+
"logps/chosen": -188.11888122558594,
|
10817 |
+
"logps/rejected": -191.87713623046875,
|
10818 |
+
"loss": 0.4537,
|
10819 |
+
"rewards/accuracies": 0.78125,
|
10820 |
+
"rewards/chosen": -0.35315239429473877,
|
10821 |
+
"rewards/margins": 0.663144588470459,
|
10822 |
+
"rewards/rejected": -1.0162967443466187,
|
10823 |
+
"step": 1412
|
10824 |
+
},
|
10825 |
+
{
|
10826 |
+
"epoch": 1.612429620126862,
|
10827 |
+
"grad_norm": 64.2495432668556,
|
10828 |
+
"learning_rate": 1.9956237473163718e-08,
|
10829 |
+
"logits/chosen": -1.2085424661636353,
|
10830 |
+
"logits/rejected": -1.306652545928955,
|
10831 |
+
"logps/chosen": -97.00778198242188,
|
10832 |
+
"logps/rejected": -127.79579162597656,
|
10833 |
+
"loss": 0.4977,
|
10834 |
+
"rewards/accuracies": 0.75,
|
10835 |
+
"rewards/chosen": -0.26015713810920715,
|
10836 |
+
"rewards/margins": 0.6060620546340942,
|
10837 |
+
"rewards/rejected": -0.866219162940979,
|
10838 |
+
"step": 1414
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 1.614710284370323,
|
10842 |
+
"grad_norm": 50.97231528427863,
|
10843 |
+
"learning_rate": 1.9728836206903654e-08,
|
10844 |
+
"logits/chosen": -1.4538676738739014,
|
10845 |
+
"logits/rejected": -1.437317967414856,
|
10846 |
+
"logps/chosen": -138.0614776611328,
|
10847 |
+
"logps/rejected": -137.07269287109375,
|
10848 |
+
"loss": 0.4251,
|
10849 |
+
"rewards/accuracies": 0.8125,
|
10850 |
+
"rewards/chosen": -0.09943617880344391,
|
10851 |
+
"rewards/margins": 0.7697718143463135,
|
10852 |
+
"rewards/rejected": -0.8692080974578857,
|
10853 |
+
"step": 1416
|
10854 |
+
},
|
10855 |
+
{
|
10856 |
+
"epoch": 1.616990948613784,
|
10857 |
+
"grad_norm": 51.24642546995866,
|
10858 |
+
"learning_rate": 1.9502596118343552e-08,
|
10859 |
+
"logits/chosen": -1.0932207107543945,
|
10860 |
+
"logits/rejected": -1.1693406105041504,
|
10861 |
+
"logps/chosen": -126.78893280029297,
|
10862 |
+
"logps/rejected": -140.98338317871094,
|
10863 |
+
"loss": 0.4273,
|
10864 |
+
"rewards/accuracies": 0.6875,
|
10865 |
+
"rewards/chosen": -0.1916692703962326,
|
10866 |
+
"rewards/margins": 0.7681133151054382,
|
10867 |
+
"rewards/rejected": -0.959782600402832,
|
10868 |
+
"step": 1418
|
10869 |
+
},
|
10870 |
+
{
|
10871 |
+
"epoch": 1.6192716128572446,
|
10872 |
+
"grad_norm": 61.40277169360943,
|
10873 |
+
"learning_rate": 1.9277520480202203e-08,
|
10874 |
+
"logits/chosen": -1.3312891721725464,
|
10875 |
+
"logits/rejected": -1.2821072340011597,
|
10876 |
+
"logps/chosen": -151.71487426757812,
|
10877 |
+
"logps/rejected": -167.5812225341797,
|
10878 |
+
"loss": 0.4401,
|
10879 |
+
"rewards/accuracies": 0.9375,
|
10880 |
+
"rewards/chosen": -0.4683433473110199,
|
10881 |
+
"rewards/margins": 1.1215391159057617,
|
10882 |
+
"rewards/rejected": -1.589882493019104,
|
10883 |
+
"step": 1420
|
10884 |
+
},
|
10885 |
+
{
|
10886 |
+
"epoch": 1.6215522771007056,
|
10887 |
+
"grad_norm": 54.3969390887107,
|
10888 |
+
"learning_rate": 1.9053612548353803e-08,
|
10889 |
+
"logits/chosen": -1.3286281824111938,
|
10890 |
+
"logits/rejected": -1.467494249343872,
|
10891 |
+
"logps/chosen": -185.03489685058594,
|
10892 |
+
"logps/rejected": -245.1704559326172,
|
10893 |
+
"loss": 0.3985,
|
10894 |
+
"rewards/accuracies": 0.84375,
|
10895 |
+
"rewards/chosen": -0.6573659181594849,
|
10896 |
+
"rewards/margins": 1.2262755632400513,
|
10897 |
+
"rewards/rejected": -1.8836416006088257,
|
10898 |
+
"step": 1422
|
10899 |
+
},
|
10900 |
+
{
|
10901 |
+
"epoch": 1.6238329413441663,
|
10902 |
+
"grad_norm": 54.259477393424476,
|
10903 |
+
"learning_rate": 1.8830875561780902e-08,
|
10904 |
+
"logits/chosen": -1.2293699979782104,
|
10905 |
+
"logits/rejected": -1.3463877439498901,
|
10906 |
+
"logps/chosen": -140.27874755859375,
|
10907 |
+
"logps/rejected": -198.82611083984375,
|
10908 |
+
"loss": 0.3951,
|
10909 |
+
"rewards/accuracies": 0.8125,
|
10910 |
+
"rewards/chosen": -0.30968916416168213,
|
10911 |
+
"rewards/margins": 0.9538030624389648,
|
10912 |
+
"rewards/rejected": -1.2634921073913574,
|
10913 |
+
"step": 1424
|
10914 |
+
},
|
10915 |
+
{
|
10916 |
+
"epoch": 1.6261136055876273,
|
10917 |
+
"grad_norm": 61.69027813672776,
|
10918 |
+
"learning_rate": 1.8609312742527493e-08,
|
10919 |
+
"logits/chosen": -1.2697855234146118,
|
10920 |
+
"logits/rejected": -1.2990537881851196,
|
10921 |
+
"logps/chosen": -155.84657287597656,
|
10922 |
+
"logps/rejected": -212.16445922851562,
|
10923 |
+
"loss": 0.4447,
|
10924 |
+
"rewards/accuracies": 0.78125,
|
10925 |
+
"rewards/chosen": -0.38760051131248474,
|
10926 |
+
"rewards/margins": 0.9349652528762817,
|
10927 |
+
"rewards/rejected": -1.3225656747817993,
|
10928 |
+
"step": 1426
|
10929 |
+
},
|
10930 |
+
{
|
10931 |
+
"epoch": 1.6283942698310883,
|
10932 |
+
"grad_norm": 54.55311985521747,
|
10933 |
+
"learning_rate": 1.8388927295652446e-08,
|
10934 |
+
"logits/chosen": -1.285940170288086,
|
10935 |
+
"logits/rejected": -1.2539726495742798,
|
10936 |
+
"logps/chosen": -165.970947265625,
|
10937 |
+
"logps/rejected": -213.34823608398438,
|
10938 |
+
"loss": 0.399,
|
10939 |
+
"rewards/accuracies": 0.90625,
|
10940 |
+
"rewards/chosen": -0.4254089295864105,
|
10941 |
+
"rewards/margins": 1.3802815675735474,
|
10942 |
+
"rewards/rejected": -1.8056904077529907,
|
10943 |
+
"step": 1428
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 1.6306749340745492,
|
10947 |
+
"grad_norm": 76.43067475489244,
|
10948 |
+
"learning_rate": 1.81697224091831e-08,
|
10949 |
+
"logits/chosen": -1.3153434991836548,
|
10950 |
+
"logits/rejected": -1.3224899768829346,
|
10951 |
+
"logps/chosen": -164.8282470703125,
|
10952 |
+
"logps/rejected": -173.127685546875,
|
10953 |
+
"loss": 0.4394,
|
10954 |
+
"rewards/accuracies": 0.875,
|
10955 |
+
"rewards/chosen": -0.13270916044712067,
|
10956 |
+
"rewards/margins": 0.787804901599884,
|
10957 |
+
"rewards/rejected": -0.9205139875411987,
|
10958 |
+
"step": 1430
|
10959 |
+
},
|
10960 |
+
{
|
10961 |
+
"epoch": 1.6329555983180102,
|
10962 |
+
"grad_norm": 63.28082306297566,
|
10963 |
+
"learning_rate": 1.7951701254069208e-08,
|
10964 |
+
"logits/chosen": -1.2579729557037354,
|
10965 |
+
"logits/rejected": -1.3529393672943115,
|
10966 |
+
"logps/chosen": -172.70582580566406,
|
10967 |
+
"logps/rejected": -199.5102081298828,
|
10968 |
+
"loss": 0.4179,
|
10969 |
+
"rewards/accuracies": 0.875,
|
10970 |
+
"rewards/chosen": -0.26907747983932495,
|
10971 |
+
"rewards/margins": 0.9893758296966553,
|
10972 |
+
"rewards/rejected": -1.258453369140625,
|
10973 |
+
"step": 1432
|
10974 |
+
},
|
10975 |
+
{
|
10976 |
+
"epoch": 1.6352362625614711,
|
10977 |
+
"grad_norm": 66.45448871459682,
|
10978 |
+
"learning_rate": 1.773486698413701e-08,
|
10979 |
+
"logits/chosen": -1.321483850479126,
|
10980 |
+
"logits/rejected": -1.2988489866256714,
|
10981 |
+
"logps/chosen": -278.5541076660156,
|
10982 |
+
"logps/rejected": -317.18048095703125,
|
10983 |
+
"loss": 0.4615,
|
10984 |
+
"rewards/accuracies": 0.6875,
|
10985 |
+
"rewards/chosen": -0.9866227507591248,
|
10986 |
+
"rewards/margins": 1.5252459049224854,
|
10987 |
+
"rewards/rejected": -2.511868715286255,
|
10988 |
+
"step": 1434
|
10989 |
+
},
|
10990 |
+
{
|
10991 |
+
"epoch": 1.6375169268049319,
|
10992 |
+
"grad_norm": 61.23114152941888,
|
10993 |
+
"learning_rate": 1.751922273604366e-08,
|
10994 |
+
"logits/chosen": -1.2703336477279663,
|
10995 |
+
"logits/rejected": -1.3587698936462402,
|
10996 |
+
"logps/chosen": -119.15101623535156,
|
10997 |
+
"logps/rejected": -158.48219299316406,
|
10998 |
+
"loss": 0.4167,
|
10999 |
+
"rewards/accuracies": 0.875,
|
11000 |
+
"rewards/chosen": -0.3894941508769989,
|
11001 |
+
"rewards/margins": 0.8275178074836731,
|
11002 |
+
"rewards/rejected": -1.2170119285583496,
|
11003 |
+
"step": 1436
|
11004 |
+
},
|
11005 |
+
{
|
11006 |
+
"epoch": 1.6397975910483928,
|
11007 |
+
"grad_norm": 55.95939622635403,
|
11008 |
+
"learning_rate": 1.7304771629231797e-08,
|
11009 |
+
"logits/chosen": -1.3791868686676025,
|
11010 |
+
"logits/rejected": -1.32236909866333,
|
11011 |
+
"logps/chosen": -139.13572692871094,
|
11012 |
+
"logps/rejected": -141.41555786132812,
|
11013 |
+
"loss": 0.4648,
|
11014 |
+
"rewards/accuracies": 0.84375,
|
11015 |
+
"rewards/chosen": -0.30967453122138977,
|
11016 |
+
"rewards/margins": 0.7233924269676208,
|
11017 |
+
"rewards/rejected": -1.033066987991333,
|
11018 |
+
"step": 1438
|
11019 |
+
},
|
11020 |
+
{
|
11021 |
+
"epoch": 1.6420782552918538,
|
11022 |
+
"grad_norm": 65.12174124488925,
|
11023 |
+
"learning_rate": 1.709151676588446e-08,
|
11024 |
+
"logits/chosen": -1.3115848302841187,
|
11025 |
+
"logits/rejected": -1.3686813116073608,
|
11026 |
+
"logps/chosen": -187.90061950683594,
|
11027 |
+
"logps/rejected": -219.3175506591797,
|
11028 |
+
"loss": 0.4063,
|
11029 |
+
"rewards/accuracies": 0.8125,
|
11030 |
+
"rewards/chosen": -0.2774468958377838,
|
11031 |
+
"rewards/margins": 1.2934999465942383,
|
11032 |
+
"rewards/rejected": -1.5709468126296997,
|
11033 |
+
"step": 1440
|
11034 |
+
},
|
11035 |
+
{
|
11036 |
+
"epoch": 1.6443589195353145,
|
11037 |
+
"grad_norm": 63.019089490249065,
|
11038 |
+
"learning_rate": 1.687946123088021e-08,
|
11039 |
+
"logits/chosen": -1.148698091506958,
|
11040 |
+
"logits/rejected": -1.2162479162216187,
|
11041 |
+
"logps/chosen": -128.07449340820312,
|
11042 |
+
"logps/rejected": -160.94265747070312,
|
11043 |
+
"loss": 0.3965,
|
11044 |
+
"rewards/accuracies": 0.78125,
|
11045 |
+
"rewards/chosen": -0.44137704372406006,
|
11046 |
+
"rewards/margins": 0.892095685005188,
|
11047 |
+
"rewards/rejected": -1.3334728479385376,
|
11048 |
+
"step": 1442
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 1.6466395837787755,
|
11052 |
+
"grad_norm": 53.81256325707842,
|
11053 |
+
"learning_rate": 1.6668608091748494e-08,
|
11054 |
+
"logits/chosen": -1.3144904375076294,
|
11055 |
+
"logits/rejected": -1.3196699619293213,
|
11056 |
+
"logps/chosen": -147.97418212890625,
|
11057 |
+
"logps/rejected": -184.45281982421875,
|
11058 |
+
"loss": 0.4082,
|
11059 |
+
"rewards/accuracies": 0.78125,
|
11060 |
+
"rewards/chosen": -0.3717328608036041,
|
11061 |
+
"rewards/margins": 1.0338945388793945,
|
11062 |
+
"rewards/rejected": -1.4056274890899658,
|
11063 |
+
"step": 1444
|
11064 |
+
},
|
11065 |
+
{
|
11066 |
+
"epoch": 1.6489202480222365,
|
11067 |
+
"grad_norm": 58.43605410338442,
|
11068 |
+
"learning_rate": 1.6458960398625288e-08,
|
11069 |
+
"logits/chosen": -1.3495270013809204,
|
11070 |
+
"logits/rejected": -1.3430432081222534,
|
11071 |
+
"logps/chosen": -221.313232421875,
|
11072 |
+
"logps/rejected": -240.23922729492188,
|
11073 |
+
"loss": 0.4216,
|
11074 |
+
"rewards/accuracies": 0.875,
|
11075 |
+
"rewards/chosen": -0.4131534695625305,
|
11076 |
+
"rewards/margins": 1.1750929355621338,
|
11077 |
+
"rewards/rejected": -1.588246464729309,
|
11078 |
+
"step": 1446
|
11079 |
+
},
|
11080 |
+
{
|
11081 |
+
"epoch": 1.6512009122656974,
|
11082 |
+
"grad_norm": 62.53542903290901,
|
11083 |
+
"learning_rate": 1.6250521184208888e-08,
|
11084 |
+
"logits/chosen": -1.2720297574996948,
|
11085 |
+
"logits/rejected": -1.2884955406188965,
|
11086 |
+
"logps/chosen": -170.361328125,
|
11087 |
+
"logps/rejected": -195.9408721923828,
|
11088 |
+
"loss": 0.4433,
|
11089 |
+
"rewards/accuracies": 0.84375,
|
11090 |
+
"rewards/chosen": -0.25558391213417053,
|
11091 |
+
"rewards/margins": 0.8461555242538452,
|
11092 |
+
"rewards/rejected": -1.1017394065856934,
|
11093 |
+
"step": 1448
|
11094 |
+
},
|
11095 |
+
{
|
11096 |
+
"epoch": 1.6534815765091584,
|
11097 |
+
"grad_norm": 59.021319623999084,
|
11098 |
+
"learning_rate": 1.60432934637162e-08,
|
11099 |
+
"logits/chosen": -1.2962281703948975,
|
11100 |
+
"logits/rejected": -1.3130199909210205,
|
11101 |
+
"logps/chosen": -220.19346618652344,
|
11102 |
+
"logps/rejected": -234.11521911621094,
|
11103 |
+
"loss": 0.3885,
|
11104 |
+
"rewards/accuracies": 0.9375,
|
11105 |
+
"rewards/chosen": -0.5436195731163025,
|
11106 |
+
"rewards/margins": 0.8701571226119995,
|
11107 |
+
"rewards/rejected": -1.4137766361236572,
|
11108 |
+
"step": 1450
|
11109 |
+
},
|
11110 |
+
{
|
11111 |
+
"epoch": 1.6557622407526194,
|
11112 |
+
"grad_norm": 65.21677604603016,
|
11113 |
+
"learning_rate": 1.5837280234839012e-08,
|
11114 |
+
"logits/chosen": -1.258408546447754,
|
11115 |
+
"logits/rejected": -1.2448734045028687,
|
11116 |
+
"logps/chosen": -189.3031005859375,
|
11117 |
+
"logps/rejected": -186.64773559570312,
|
11118 |
+
"loss": 0.4388,
|
11119 |
+
"rewards/accuracies": 0.75,
|
11120 |
+
"rewards/chosen": -0.5159119367599487,
|
11121 |
+
"rewards/margins": 0.5587047338485718,
|
11122 |
+
"rewards/rejected": -1.0746166706085205,
|
11123 |
+
"step": 1452
|
11124 |
+
},
|
11125 |
+
{
|
11126 |
+
"epoch": 1.65804290499608,
|
11127 |
+
"grad_norm": 56.877107879066756,
|
11128 |
+
"learning_rate": 1.5632484477700635e-08,
|
11129 |
+
"logits/chosen": -1.3145238161087036,
|
11130 |
+
"logits/rejected": -1.3254127502441406,
|
11131 |
+
"logps/chosen": -225.0052490234375,
|
11132 |
+
"logps/rejected": -260.56292724609375,
|
11133 |
+
"loss": 0.4098,
|
11134 |
+
"rewards/accuracies": 0.84375,
|
11135 |
+
"rewards/chosen": -0.6009418964385986,
|
11136 |
+
"rewards/margins": 1.1089880466461182,
|
11137 |
+
"rewards/rejected": -1.7099300622940063,
|
11138 |
+
"step": 1454
|
11139 |
+
},
|
11140 |
+
{
|
11141 |
+
"epoch": 1.660323569239541,
|
11142 |
+
"grad_norm": 68.97770948668654,
|
11143 |
+
"learning_rate": 1.542890915481282e-08,
|
11144 |
+
"logits/chosen": -1.2907415628433228,
|
11145 |
+
"logits/rejected": -1.2642382383346558,
|
11146 |
+
"logps/chosen": -122.03399658203125,
|
11147 |
+
"logps/rejected": -136.55223083496094,
|
11148 |
+
"loss": 0.4316,
|
11149 |
+
"rewards/accuracies": 0.90625,
|
11150 |
+
"rewards/chosen": -0.2291896939277649,
|
11151 |
+
"rewards/margins": 0.7178550362586975,
|
11152 |
+
"rewards/rejected": -0.9470447897911072,
|
11153 |
+
"step": 1456
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 1.6626042334830018,
|
11157 |
+
"grad_norm": 68.21135169118118,
|
11158 |
+
"learning_rate": 1.5226557211032908e-08,
|
11159 |
+
"logits/chosen": -1.2149584293365479,
|
11160 |
+
"logits/rejected": -1.2694858312606812,
|
11161 |
+
"logps/chosen": -163.82122802734375,
|
11162 |
+
"logps/rejected": -217.37213134765625,
|
11163 |
+
"loss": 0.4581,
|
11164 |
+
"rewards/accuracies": 0.78125,
|
11165 |
+
"rewards/chosen": -0.4557928442955017,
|
11166 |
+
"rewards/margins": 0.9242541193962097,
|
11167 |
+
"rewards/rejected": -1.3800469636917114,
|
11168 |
+
"step": 1458
|
11169 |
+
},
|
11170 |
+
{
|
11171 |
+
"epoch": 1.6648848977264628,
|
11172 |
+
"grad_norm": 58.468740016047576,
|
11173 |
+
"learning_rate": 1.5025431573521207e-08,
|
11174 |
+
"logits/chosen": -1.3966403007507324,
|
11175 |
+
"logits/rejected": -1.395541787147522,
|
11176 |
+
"logps/chosen": -136.16111755371094,
|
11177 |
+
"logps/rejected": -158.5441436767578,
|
11178 |
+
"loss": 0.4215,
|
11179 |
+
"rewards/accuracies": 0.875,
|
11180 |
+
"rewards/chosen": -0.3195006549358368,
|
11181 |
+
"rewards/margins": 0.7229774594306946,
|
11182 |
+
"rewards/rejected": -1.042478084564209,
|
11183 |
+
"step": 1460
|
11184 |
+
},
|
11185 |
+
{
|
11186 |
+
"epoch": 1.6671655619699237,
|
11187 |
+
"grad_norm": 56.594315997333034,
|
11188 |
+
"learning_rate": 1.4825535151698653e-08,
|
11189 |
+
"logits/chosen": -1.3145086765289307,
|
11190 |
+
"logits/rejected": -1.3318628072738647,
|
11191 |
+
"logps/chosen": -218.93898010253906,
|
11192 |
+
"logps/rejected": -239.53721618652344,
|
11193 |
+
"loss": 0.4041,
|
11194 |
+
"rewards/accuracies": 0.8125,
|
11195 |
+
"rewards/chosen": -0.4259910583496094,
|
11196 |
+
"rewards/margins": 0.9239952564239502,
|
11197 |
+
"rewards/rejected": -1.34998619556427,
|
11198 |
+
"step": 1462
|
11199 |
+
},
|
11200 |
+
{
|
11201 |
+
"epoch": 1.6694462262133847,
|
11202 |
+
"grad_norm": 55.89445679724181,
|
11203 |
+
"learning_rate": 1.4626870837204775e-08,
|
11204 |
+
"logits/chosen": -1.379180908203125,
|
11205 |
+
"logits/rejected": -1.4072362184524536,
|
11206 |
+
"logps/chosen": -153.11817932128906,
|
11207 |
+
"logps/rejected": -182.13790893554688,
|
11208 |
+
"loss": 0.4185,
|
11209 |
+
"rewards/accuracies": 0.75,
|
11210 |
+
"rewards/chosen": -0.38719913363456726,
|
11211 |
+
"rewards/margins": 0.9092380404472351,
|
11212 |
+
"rewards/rejected": -1.2964370250701904,
|
11213 |
+
"step": 1464
|
11214 |
+
},
|
11215 |
+
{
|
11216 |
+
"epoch": 1.6717268904568456,
|
11217 |
+
"grad_norm": 64.47758334862267,
|
11218 |
+
"learning_rate": 1.4429441503855722e-08,
|
11219 |
+
"logits/chosen": -1.3387432098388672,
|
11220 |
+
"logits/rejected": -1.436204433441162,
|
11221 |
+
"logps/chosen": -214.61618041992188,
|
11222 |
+
"logps/rejected": -248.8955078125,
|
11223 |
+
"loss": 0.4463,
|
11224 |
+
"rewards/accuracies": 0.84375,
|
11225 |
+
"rewards/chosen": -0.5446640849113464,
|
11226 |
+
"rewards/margins": 1.1796414852142334,
|
11227 |
+
"rewards/rejected": -1.7243056297302246,
|
11228 |
+
"step": 1466
|
11229 |
+
},
|
11230 |
+
{
|
11231 |
+
"epoch": 1.6740075547003066,
|
11232 |
+
"grad_norm": 59.93784188124436,
|
11233 |
+
"learning_rate": 1.4233250007602871e-08,
|
11234 |
+
"logits/chosen": -1.1467586755752563,
|
11235 |
+
"logits/rejected": -1.1875630617141724,
|
11236 |
+
"logps/chosen": -198.951416015625,
|
11237 |
+
"logps/rejected": -230.94252014160156,
|
11238 |
+
"loss": 0.4599,
|
11239 |
+
"rewards/accuracies": 0.8125,
|
11240 |
+
"rewards/chosen": -0.7742232084274292,
|
11241 |
+
"rewards/margins": 1.2332063913345337,
|
11242 |
+
"rewards/rejected": -2.007429599761963,
|
11243 |
+
"step": 1468
|
11244 |
+
},
|
11245 |
+
{
|
11246 |
+
"epoch": 1.6762882189437673,
|
11247 |
+
"grad_norm": 62.25982751081324,
|
11248 |
+
"learning_rate": 1.4038299186491442e-08,
|
11249 |
+
"logits/chosen": -1.1409118175506592,
|
11250 |
+
"logits/rejected": -1.2762134075164795,
|
11251 |
+
"logps/chosen": -144.74981689453125,
|
11252 |
+
"logps/rejected": -227.7673797607422,
|
11253 |
+
"loss": 0.4217,
|
11254 |
+
"rewards/accuracies": 0.6875,
|
11255 |
+
"rewards/chosen": -0.43610525131225586,
|
11256 |
+
"rewards/margins": 1.684708833694458,
|
11257 |
+
"rewards/rejected": -2.120814085006714,
|
11258 |
+
"step": 1470
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 1.6785688831872283,
|
11262 |
+
"grad_norm": 49.66084963571848,
|
11263 |
+
"learning_rate": 1.3844591860619382e-08,
|
11264 |
+
"logits/chosen": -1.3702046871185303,
|
11265 |
+
"logits/rejected": -1.4003832340240479,
|
11266 |
+
"logps/chosen": -164.3496856689453,
|
11267 |
+
"logps/rejected": -176.85682678222656,
|
11268 |
+
"loss": 0.4076,
|
11269 |
+
"rewards/accuracies": 0.71875,
|
11270 |
+
"rewards/chosen": -0.23472319543361664,
|
11271 |
+
"rewards/margins": 0.8279229402542114,
|
11272 |
+
"rewards/rejected": -1.0626461505889893,
|
11273 |
+
"step": 1472
|
11274 |
+
},
|
11275 |
+
{
|
11276 |
+
"epoch": 1.680849547430689,
|
11277 |
+
"grad_norm": 52.98373196788465,
|
11278 |
+
"learning_rate": 1.3652130832096653e-08,
|
11279 |
+
"logits/chosen": -1.1784981489181519,
|
11280 |
+
"logits/rejected": -1.2945374250411987,
|
11281 |
+
"logps/chosen": -185.53562927246094,
|
11282 |
+
"logps/rejected": -221.29660034179688,
|
11283 |
+
"loss": 0.4167,
|
11284 |
+
"rewards/accuracies": 0.9375,
|
11285 |
+
"rewards/chosen": -0.5693493485450745,
|
11286 |
+
"rewards/margins": 1.0049147605895996,
|
11287 |
+
"rewards/rejected": -1.5742641687393188,
|
11288 |
+
"step": 1474
|
11289 |
+
},
|
11290 |
+
{
|
11291 |
+
"epoch": 1.68313021167415,
|
11292 |
+
"grad_norm": 76.34629112032515,
|
11293 |
+
"learning_rate": 1.3460918885004658e-08,
|
11294 |
+
"logits/chosen": -1.2638037204742432,
|
11295 |
+
"logits/rejected": -1.3342554569244385,
|
11296 |
+
"logps/chosen": -177.4965362548828,
|
11297 |
+
"logps/rejected": -229.96853637695312,
|
11298 |
+
"loss": 0.4293,
|
11299 |
+
"rewards/accuracies": 0.8125,
|
11300 |
+
"rewards/chosen": -0.4774022698402405,
|
11301 |
+
"rewards/margins": 1.2457822561264038,
|
11302 |
+
"rewards/rejected": -1.723184585571289,
|
11303 |
+
"step": 1476
|
11304 |
+
},
|
11305 |
+
{
|
11306 |
+
"epoch": 1.685410875917611,
|
11307 |
+
"grad_norm": 66.57532589289495,
|
11308 |
+
"learning_rate": 1.3270958785355979e-08,
|
11309 |
+
"logits/chosen": -1.2002267837524414,
|
11310 |
+
"logits/rejected": -1.2736998796463013,
|
11311 |
+
"logps/chosen": -172.23741149902344,
|
11312 |
+
"logps/rejected": -227.90280151367188,
|
11313 |
+
"loss": 0.4668,
|
11314 |
+
"rewards/accuracies": 0.84375,
|
11315 |
+
"rewards/chosen": -0.5302340984344482,
|
11316 |
+
"rewards/margins": 1.142404556274414,
|
11317 |
+
"rewards/rejected": -1.6726385354995728,
|
11318 |
+
"step": 1478
|
11319 |
+
},
|
11320 |
+
{
|
11321 |
+
"epoch": 1.687691540161072,
|
11322 |
+
"grad_norm": 69.95077709646252,
|
11323 |
+
"learning_rate": 1.308225328105439e-08,
|
11324 |
+
"logits/chosen": -1.376049518585205,
|
11325 |
+
"logits/rejected": -1.4532899856567383,
|
11326 |
+
"logps/chosen": -178.05099487304688,
|
11327 |
+
"logps/rejected": -243.27200317382812,
|
11328 |
+
"loss": 0.4221,
|
11329 |
+
"rewards/accuracies": 0.875,
|
11330 |
+
"rewards/chosen": -0.3648741543292999,
|
11331 |
+
"rewards/margins": 1.095149040222168,
|
11332 |
+
"rewards/rejected": -1.460023045539856,
|
11333 |
+
"step": 1480
|
11334 |
+
},
|
11335 |
+
{
|
11336 |
+
"epoch": 1.6899722044045329,
|
11337 |
+
"grad_norm": 64.49691609932415,
|
11338 |
+
"learning_rate": 1.2894805101854989e-08,
|
11339 |
+
"logits/chosen": -1.3072996139526367,
|
11340 |
+
"logits/rejected": -1.2989863157272339,
|
11341 |
+
"logps/chosen": -166.4363555908203,
|
11342 |
+
"logps/rejected": -176.7548828125,
|
11343 |
+
"loss": 0.4753,
|
11344 |
+
"rewards/accuracies": 0.78125,
|
11345 |
+
"rewards/chosen": -0.39158201217651367,
|
11346 |
+
"rewards/margins": 0.8482180833816528,
|
11347 |
+
"rewards/rejected": -1.239800214767456,
|
11348 |
+
"step": 1482
|
11349 |
+
},
|
11350 |
+
{
|
11351 |
+
"epoch": 1.6922528686479938,
|
11352 |
+
"grad_norm": 59.42081736231207,
|
11353 |
+
"learning_rate": 1.270861695932489e-08,
|
11354 |
+
"logits/chosen": -1.3978495597839355,
|
11355 |
+
"logits/rejected": -1.4046682119369507,
|
11356 |
+
"logps/chosen": -214.11187744140625,
|
11357 |
+
"logps/rejected": -258.4549865722656,
|
11358 |
+
"loss": 0.4494,
|
11359 |
+
"rewards/accuracies": 0.625,
|
11360 |
+
"rewards/chosen": -0.5993155241012573,
|
11361 |
+
"rewards/margins": 0.9518192410469055,
|
11362 |
+
"rewards/rejected": -1.5511348247528076,
|
11363 |
+
"step": 1484
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 1.6945335328914546,
|
11367 |
+
"grad_norm": 56.004460149491955,
|
11368 |
+
"learning_rate": 1.2523691546803872e-08,
|
11369 |
+
"logits/chosen": -1.1605501174926758,
|
11370 |
+
"logits/rejected": -1.2564440965652466,
|
11371 |
+
"logps/chosen": -139.624267578125,
|
11372 |
+
"logps/rejected": -178.62393188476562,
|
11373 |
+
"loss": 0.4557,
|
11374 |
+
"rewards/accuracies": 0.71875,
|
11375 |
+
"rewards/chosen": -0.27934566140174866,
|
11376 |
+
"rewards/margins": 0.7967109680175781,
|
11377 |
+
"rewards/rejected": -1.0760565996170044,
|
11378 |
+
"step": 1486
|
11379 |
+
},
|
11380 |
+
{
|
11381 |
+
"epoch": 1.6968141971349155,
|
11382 |
+
"grad_norm": 53.58208037132539,
|
11383 |
+
"learning_rate": 1.234003153936548e-08,
|
11384 |
+
"logits/chosen": -1.1957886219024658,
|
11385 |
+
"logits/rejected": -1.3672311305999756,
|
11386 |
+
"logps/chosen": -163.6760711669922,
|
11387 |
+
"logps/rejected": -232.0875244140625,
|
11388 |
+
"loss": 0.4126,
|
11389 |
+
"rewards/accuracies": 0.84375,
|
11390 |
+
"rewards/chosen": -0.378174364566803,
|
11391 |
+
"rewards/margins": 1.1063618659973145,
|
11392 |
+
"rewards/rejected": -1.4845364093780518,
|
11393 |
+
"step": 1488
|
11394 |
+
},
|
11395 |
+
{
|
11396 |
+
"epoch": 1.6990948613783763,
|
11397 |
+
"grad_norm": 75.62123712030618,
|
11398 |
+
"learning_rate": 1.2157639593778268e-08,
|
11399 |
+
"logits/chosen": -1.249180555343628,
|
11400 |
+
"logits/rejected": -1.257821798324585,
|
11401 |
+
"logps/chosen": -143.6417236328125,
|
11402 |
+
"logps/rejected": -205.5145721435547,
|
11403 |
+
"loss": 0.4895,
|
11404 |
+
"rewards/accuracies": 0.78125,
|
11405 |
+
"rewards/chosen": -0.3994945287704468,
|
11406 |
+
"rewards/margins": 1.2723394632339478,
|
11407 |
+
"rewards/rejected": -1.671833872795105,
|
11408 |
+
"step": 1490
|
11409 |
+
},
|
11410 |
+
{
|
11411 |
+
"epoch": 1.7013755256218372,
|
11412 |
+
"grad_norm": 64.99974592800548,
|
11413 |
+
"learning_rate": 1.1976518348467424e-08,
|
11414 |
+
"logits/chosen": -1.2328625917434692,
|
11415 |
+
"logits/rejected": -1.2472069263458252,
|
11416 |
+
"logps/chosen": -199.8415069580078,
|
11417 |
+
"logps/rejected": -222.4342803955078,
|
11418 |
+
"loss": 0.4444,
|
11419 |
+
"rewards/accuracies": 0.84375,
|
11420 |
+
"rewards/chosen": -0.544438898563385,
|
11421 |
+
"rewards/margins": 1.3196830749511719,
|
11422 |
+
"rewards/rejected": -1.8641220331192017,
|
11423 |
+
"step": 1492
|
11424 |
+
},
|
11425 |
+
{
|
11426 |
+
"epoch": 1.7036561898652982,
|
11427 |
+
"grad_norm": 66.84615124590252,
|
11428 |
+
"learning_rate": 1.1796670423476574e-08,
|
11429 |
+
"logits/chosen": -1.364052414894104,
|
11430 |
+
"logits/rejected": -1.3560256958007812,
|
11431 |
+
"logps/chosen": -133.53671264648438,
|
11432 |
+
"logps/rejected": -152.27175903320312,
|
11433 |
+
"loss": 0.4364,
|
11434 |
+
"rewards/accuracies": 0.75,
|
11435 |
+
"rewards/chosen": -0.3934406042098999,
|
11436 |
+
"rewards/margins": 0.749383270740509,
|
11437 |
+
"rewards/rejected": -1.1428238153457642,
|
11438 |
+
"step": 1494
|
11439 |
+
},
|
11440 |
+
{
|
11441 |
+
"epoch": 1.7059368541087592,
|
11442 |
+
"grad_norm": 63.13360469846886,
|
11443 |
+
"learning_rate": 1.1618098420429879e-08,
|
11444 |
+
"logits/chosen": -1.2544901371002197,
|
11445 |
+
"logits/rejected": -1.303771734237671,
|
11446 |
+
"logps/chosen": -166.77374267578125,
|
11447 |
+
"logps/rejected": -190.30552673339844,
|
11448 |
+
"loss": 0.4538,
|
11449 |
+
"rewards/accuracies": 0.84375,
|
11450 |
+
"rewards/chosen": -0.4510793685913086,
|
11451 |
+
"rewards/margins": 0.7863295674324036,
|
11452 |
+
"rewards/rejected": -1.2374088764190674,
|
11453 |
+
"step": 1496
|
11454 |
+
},
|
11455 |
+
{
|
11456 |
+
"epoch": 1.7082175183522201,
|
11457 |
+
"grad_norm": 50.3590196948944,
|
11458 |
+
"learning_rate": 1.1440804922494441e-08,
|
11459 |
+
"logits/chosen": -1.352832317352295,
|
11460 |
+
"logits/rejected": -1.357743263244629,
|
11461 |
+
"logps/chosen": -193.49298095703125,
|
11462 |
+
"logps/rejected": -207.95184326171875,
|
11463 |
+
"loss": 0.3787,
|
11464 |
+
"rewards/accuracies": 0.875,
|
11465 |
+
"rewards/chosen": -0.36873170733451843,
|
11466 |
+
"rewards/margins": 1.0819979906082153,
|
11467 |
+
"rewards/rejected": -1.4507297277450562,
|
11468 |
+
"step": 1498
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 1.710498182595681,
|
11472 |
+
"grad_norm": 59.01395721554923,
|
11473 |
+
"learning_rate": 1.1264792494342856e-08,
|
11474 |
+
"logits/chosen": -1.2877607345581055,
|
11475 |
+
"logits/rejected": -1.3114897012710571,
|
11476 |
+
"logps/chosen": -158.8022003173828,
|
11477 |
+
"logps/rejected": -197.77500915527344,
|
11478 |
+
"loss": 0.4406,
|
11479 |
+
"rewards/accuracies": 0.875,
|
11480 |
+
"rewards/chosen": -0.48249971866607666,
|
11481 |
+
"rewards/margins": 0.8965498208999634,
|
11482 |
+
"rewards/rejected": -1.37904953956604,
|
11483 |
+
"step": 1500
|
11484 |
+
},
|
11485 |
+
{
|
11486 |
+
"epoch": 1.710498182595681,
|
11487 |
+
"eval_logits/chosen": -1.3420703411102295,
|
11488 |
+
"eval_logits/rejected": -1.3242188692092896,
|
11489 |
+
"eval_logps/chosen": -132.36380004882812,
|
11490 |
+
"eval_logps/rejected": -138.97621154785156,
|
11491 |
+
"eval_loss": 0.5477466583251953,
|
11492 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
11493 |
+
"eval_rewards/chosen": -0.24658337235450745,
|
11494 |
+
"eval_rewards/margins": 0.44476309418678284,
|
11495 |
+
"eval_rewards/rejected": -0.6913464069366455,
|
11496 |
+
"eval_runtime": 21.5135,
|
11497 |
+
"eval_samples_per_second": 4.648,
|
11498 |
+
"eval_steps_per_second": 1.162,
|
11499 |
+
"step": 1500
|
11500 |
}
|
11501 |
],
|
11502 |
"logging_steps": 2,
|