Training in progress, step 1500, checkpoint
Browse files- last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2300 -2
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a5116dee8628efc144342ca4a6de9fe3583889a8ccce615c6b0a81dbeaed3c7
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cd980325951d8231000ff67edace7a12644e719c501ac636a0ce98206dbb63c
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c527bb7cf51aca43a29bba25edc1d4ae9e7da745bea4785d0690e02b42d02dc4
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f0838a35df82986324cb75bb54a537d38f605c4a6e5e657ffc60e9f19ed81d2
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8c9c9a7d2fcfa2e8fa40312006739b2c850b9a5ecfacd34f5a60173282093
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:259ffd7944316a19d3cd4a130f207a063579c77fad5d447554aeff12156085d6
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d29c1fe957781c3a62402feef79dfb38cf56b8fddb352092f6ffd32c6211e320
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:921a755d22277c2d84c9363b6dc0c6e459c1e6fbaddb89bc814ee9c5db4f54fb
|
3 |
+
size 150693
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1500
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99de85720481c98cc093f3faf5805a4ff05d5df419d49b8575ed63ce236d5815
|
3 |
size 4976698672
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:368f761161ebad7292a8dbdeca4656fb602262d1f2495446f32f49896062f7dc
|
3 |
size 4999802720
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:306982a5a2f0fc8003fdb3eebf34d6850d83379bebc04fbe40d7a6bb9f8b6a5c
|
3 |
size 4915916176
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa2c5d55d66df705380544c1b8076cf199a0a6e6da3583e847a01a69fbf8edb4
|
3 |
size 1168138808
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4f003069486a57c6ac033f30cf4c4213eb6b7d659bab68a5a50fdb8da7c4118
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a016ef89b4392d083b2c15a7cf06a39bc61a759f648cf6dc03f1c32b89a526aa
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b56fe0893036dc052d18d90feba4328b90ea71561942150b07406ac3d7a700e
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0c203d12c2c308dab785ed672c9ca27fb6a2f72acd1e1552d1516c7b0006013
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0eed8fec4102664205a804b0fbc28ba65f44e3fb811cdaf695f0e9321c6fe0b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9199,6 +9199,2304 @@
|
|
9199 |
"eval_samples_per_second": 3.581,
|
9200 |
"eval_steps_per_second": 0.895,
|
9201 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9202 |
}
|
9203 |
],
|
9204 |
"logging_steps": 2,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.7372421281216068,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9199 |
"eval_samples_per_second": 3.581,
|
9200 |
"eval_steps_per_second": 0.895,
|
9201 |
"step": 1200
|
9202 |
+
},
|
9203 |
+
{
|
9204 |
+
"epoch": 1.3921100253347811,
|
9205 |
+
"grad_norm": 65.19042296707491,
|
9206 |
+
"learning_rate": 4.70189974343236e-08,
|
9207 |
+
"logits/chosen": -1.2034111022949219,
|
9208 |
+
"logits/rejected": -1.2350322008132935,
|
9209 |
+
"logps/chosen": -121.24800872802734,
|
9210 |
+
"logps/rejected": -157.0181427001953,
|
9211 |
+
"loss": 0.389,
|
9212 |
+
"rewards/accuracies": 0.875,
|
9213 |
+
"rewards/chosen": -0.05601517856121063,
|
9214 |
+
"rewards/margins": 1.3547430038452148,
|
9215 |
+
"rewards/rejected": -1.4107582569122314,
|
9216 |
+
"step": 1202
|
9217 |
+
},
|
9218 |
+
{
|
9219 |
+
"epoch": 1.3944263481722765,
|
9220 |
+
"grad_norm": 53.022218131457315,
|
9221 |
+
"learning_rate": 4.669166489076283e-08,
|
9222 |
+
"logits/chosen": -1.1960185766220093,
|
9223 |
+
"logits/rejected": -1.2297847270965576,
|
9224 |
+
"logps/chosen": -147.0485382080078,
|
9225 |
+
"logps/rejected": -177.79165649414062,
|
9226 |
+
"loss": 0.3619,
|
9227 |
+
"rewards/accuracies": 0.84375,
|
9228 |
+
"rewards/chosen": -0.17010337114334106,
|
9229 |
+
"rewards/margins": 1.6398181915283203,
|
9230 |
+
"rewards/rejected": -1.8099215030670166,
|
9231 |
+
"step": 1204
|
9232 |
+
},
|
9233 |
+
{
|
9234 |
+
"epoch": 1.396742671009772,
|
9235 |
+
"grad_norm": 51.968933285305766,
|
9236 |
+
"learning_rate": 4.636512834750479e-08,
|
9237 |
+
"logits/chosen": -1.1823878288269043,
|
9238 |
+
"logits/rejected": -1.1940593719482422,
|
9239 |
+
"logps/chosen": -165.0301055908203,
|
9240 |
+
"logps/rejected": -192.39288330078125,
|
9241 |
+
"loss": 0.3919,
|
9242 |
+
"rewards/accuracies": 0.8125,
|
9243 |
+
"rewards/chosen": -0.2851025462150574,
|
9244 |
+
"rewards/margins": 1.3472487926483154,
|
9245 |
+
"rewards/rejected": -1.6323513984680176,
|
9246 |
+
"step": 1206
|
9247 |
+
},
|
9248 |
+
{
|
9249 |
+
"epoch": 1.3990589938472675,
|
9250 |
+
"grad_norm": 70.67490625711265,
|
9251 |
+
"learning_rate": 4.6039392680394705e-08,
|
9252 |
+
"logits/chosen": -1.1791980266571045,
|
9253 |
+
"logits/rejected": -1.2245995998382568,
|
9254 |
+
"logps/chosen": -144.21392822265625,
|
9255 |
+
"logps/rejected": -177.3154754638672,
|
9256 |
+
"loss": 0.4364,
|
9257 |
+
"rewards/accuracies": 0.78125,
|
9258 |
+
"rewards/chosen": -0.42649248242378235,
|
9259 |
+
"rewards/margins": 1.543526291847229,
|
9260 |
+
"rewards/rejected": -1.9700188636779785,
|
9261 |
+
"step": 1208
|
9262 |
+
},
|
9263 |
+
{
|
9264 |
+
"epoch": 1.401375316684763,
|
9265 |
+
"grad_norm": 73.92072443542028,
|
9266 |
+
"learning_rate": 4.5714462753319025e-08,
|
9267 |
+
"logits/chosen": -1.1179996728897095,
|
9268 |
+
"logits/rejected": -1.1189749240875244,
|
9269 |
+
"logps/chosen": -176.80593872070312,
|
9270 |
+
"logps/rejected": -192.028564453125,
|
9271 |
+
"loss": 0.3727,
|
9272 |
+
"rewards/accuracies": 0.8125,
|
9273 |
+
"rewards/chosen": -1.0178598165512085,
|
9274 |
+
"rewards/margins": 1.1851011514663696,
|
9275 |
+
"rewards/rejected": -2.202960968017578,
|
9276 |
+
"step": 1210
|
9277 |
+
},
|
9278 |
+
{
|
9279 |
+
"epoch": 1.4036916395222585,
|
9280 |
+
"grad_norm": 77.10308260730022,
|
9281 |
+
"learning_rate": 4.539034341813285e-08,
|
9282 |
+
"logits/chosen": -1.1620545387268066,
|
9283 |
+
"logits/rejected": -1.1280796527862549,
|
9284 |
+
"logps/chosen": -148.05978393554688,
|
9285 |
+
"logps/rejected": -170.8760986328125,
|
9286 |
+
"loss": 0.4417,
|
9287 |
+
"rewards/accuracies": 0.84375,
|
9288 |
+
"rewards/chosen": -0.6205931901931763,
|
9289 |
+
"rewards/margins": 0.9719365835189819,
|
9290 |
+
"rewards/rejected": -1.5925298929214478,
|
9291 |
+
"step": 1212
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 1.406007962359754,
|
9295 |
+
"grad_norm": 50.96217046302329,
|
9296 |
+
"learning_rate": 4.50670395145876e-08,
|
9297 |
+
"logits/chosen": -1.2354716062545776,
|
9298 |
+
"logits/rejected": -1.2715418338775635,
|
9299 |
+
"logps/chosen": -197.6702423095703,
|
9300 |
+
"logps/rejected": -234.24781799316406,
|
9301 |
+
"loss": 0.416,
|
9302 |
+
"rewards/accuracies": 0.8125,
|
9303 |
+
"rewards/chosen": -0.38070574402809143,
|
9304 |
+
"rewards/margins": 1.9455530643463135,
|
9305 |
+
"rewards/rejected": -2.326258897781372,
|
9306 |
+
"step": 1214
|
9307 |
+
},
|
9308 |
+
{
|
9309 |
+
"epoch": 1.4083242851972493,
|
9310 |
+
"grad_norm": 60.92763813478579,
|
9311 |
+
"learning_rate": 4.474455587025869e-08,
|
9312 |
+
"logits/chosen": -1.2651827335357666,
|
9313 |
+
"logits/rejected": -1.2328460216522217,
|
9314 |
+
"logps/chosen": -184.45912170410156,
|
9315 |
+
"logps/rejected": -197.0801239013672,
|
9316 |
+
"loss": 0.3824,
|
9317 |
+
"rewards/accuracies": 0.875,
|
9318 |
+
"rewards/chosen": -0.584815502166748,
|
9319 |
+
"rewards/margins": 1.4052482843399048,
|
9320 |
+
"rewards/rejected": -1.9900637865066528,
|
9321 |
+
"step": 1216
|
9322 |
+
},
|
9323 |
+
{
|
9324 |
+
"epoch": 1.410640608034745,
|
9325 |
+
"grad_norm": 63.32401043366279,
|
9326 |
+
"learning_rate": 4.4422897300473315e-08,
|
9327 |
+
"logits/chosen": -1.3017557859420776,
|
9328 |
+
"logits/rejected": -1.2864493131637573,
|
9329 |
+
"logps/chosen": -124.99530792236328,
|
9330 |
+
"logps/rejected": -148.06082153320312,
|
9331 |
+
"loss": 0.3893,
|
9332 |
+
"rewards/accuracies": 0.875,
|
9333 |
+
"rewards/chosen": -0.2559690475463867,
|
9334 |
+
"rewards/margins": 1.1134750843048096,
|
9335 |
+
"rewards/rejected": -1.3694441318511963,
|
9336 |
+
"step": 1218
|
9337 |
+
},
|
9338 |
+
{
|
9339 |
+
"epoch": 1.4129569308722403,
|
9340 |
+
"grad_norm": 53.98313466834894,
|
9341 |
+
"learning_rate": 4.4102068608238685e-08,
|
9342 |
+
"logits/chosen": -1.230201244354248,
|
9343 |
+
"logits/rejected": -1.2914016246795654,
|
9344 |
+
"logps/chosen": -171.6476287841797,
|
9345 |
+
"logps/rejected": -189.03538513183594,
|
9346 |
+
"loss": 0.4396,
|
9347 |
+
"rewards/accuracies": 0.90625,
|
9348 |
+
"rewards/chosen": -0.6881774067878723,
|
9349 |
+
"rewards/margins": 0.9222534894943237,
|
9350 |
+
"rewards/rejected": -1.6104308366775513,
|
9351 |
+
"step": 1220
|
9352 |
+
},
|
9353 |
+
{
|
9354 |
+
"epoch": 1.415273253709736,
|
9355 |
+
"grad_norm": 62.04575224759104,
|
9356 |
+
"learning_rate": 4.3782074584170346e-08,
|
9357 |
+
"logits/chosen": -1.214889645576477,
|
9358 |
+
"logits/rejected": -1.1926887035369873,
|
9359 |
+
"logps/chosen": -92.45348358154297,
|
9360 |
+
"logps/rejected": -105.89907836914062,
|
9361 |
+
"loss": 0.4592,
|
9362 |
+
"rewards/accuracies": 0.71875,
|
9363 |
+
"rewards/chosen": -0.3113905191421509,
|
9364 |
+
"rewards/margins": 0.9316319823265076,
|
9365 |
+
"rewards/rejected": -1.2430225610733032,
|
9366 |
+
"step": 1222
|
9367 |
+
},
|
9368 |
+
{
|
9369 |
+
"epoch": 1.4175895765472313,
|
9370 |
+
"grad_norm": 71.40412638311601,
|
9371 |
+
"learning_rate": 4.34629200064205e-08,
|
9372 |
+
"logits/chosen": -1.1961030960083008,
|
9373 |
+
"logits/rejected": -1.2155812978744507,
|
9374 |
+
"logps/chosen": -170.02508544921875,
|
9375 |
+
"logps/rejected": -210.41592407226562,
|
9376 |
+
"loss": 0.4078,
|
9377 |
+
"rewards/accuracies": 0.84375,
|
9378 |
+
"rewards/chosen": -0.856181263923645,
|
9379 |
+
"rewards/margins": 1.6567095518112183,
|
9380 |
+
"rewards/rejected": -2.5128908157348633,
|
9381 |
+
"step": 1224
|
9382 |
+
},
|
9383 |
+
{
|
9384 |
+
"epoch": 1.4199058993847267,
|
9385 |
+
"grad_norm": 57.3723784957176,
|
9386 |
+
"learning_rate": 4.314460964060672e-08,
|
9387 |
+
"logits/chosen": -1.1995205879211426,
|
9388 |
+
"logits/rejected": -1.306661605834961,
|
9389 |
+
"logps/chosen": -158.38504028320312,
|
9390 |
+
"logps/rejected": -204.92669677734375,
|
9391 |
+
"loss": 0.4169,
|
9392 |
+
"rewards/accuracies": 0.75,
|
9393 |
+
"rewards/chosen": -0.4841935634613037,
|
9394 |
+
"rewards/margins": 1.3874504566192627,
|
9395 |
+
"rewards/rejected": -1.8716439008712769,
|
9396 |
+
"step": 1226
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 1.4222222222222223,
|
9400 |
+
"grad_norm": 51.26821420581517,
|
9401 |
+
"learning_rate": 4.2827148239740875e-08,
|
9402 |
+
"logits/chosen": -1.1911481618881226,
|
9403 |
+
"logits/rejected": -1.2252691984176636,
|
9404 |
+
"logps/chosen": -149.15687561035156,
|
9405 |
+
"logps/rejected": -168.63299560546875,
|
9406 |
+
"loss": 0.3545,
|
9407 |
+
"rewards/accuracies": 0.84375,
|
9408 |
+
"rewards/chosen": 0.06090724095702171,
|
9409 |
+
"rewards/margins": 1.1758480072021484,
|
9410 |
+
"rewards/rejected": -1.114940881729126,
|
9411 |
+
"step": 1228
|
9412 |
+
},
|
9413 |
+
{
|
9414 |
+
"epoch": 1.4245385450597177,
|
9415 |
+
"grad_norm": 59.18176805705484,
|
9416 |
+
"learning_rate": 4.251054054415808e-08,
|
9417 |
+
"logits/chosen": -1.2053039073944092,
|
9418 |
+
"logits/rejected": -1.26601243019104,
|
9419 |
+
"logps/chosen": -124.14604187011719,
|
9420 |
+
"logps/rejected": -133.4225616455078,
|
9421 |
+
"loss": 0.5098,
|
9422 |
+
"rewards/accuracies": 0.78125,
|
9423 |
+
"rewards/chosen": -0.6462215781211853,
|
9424 |
+
"rewards/margins": 0.5801703333854675,
|
9425 |
+
"rewards/rejected": -1.2263920307159424,
|
9426 |
+
"step": 1230
|
9427 |
+
},
|
9428 |
+
{
|
9429 |
+
"epoch": 1.4268548678972133,
|
9430 |
+
"grad_norm": 59.54196160924442,
|
9431 |
+
"learning_rate": 4.219479128144583e-08,
|
9432 |
+
"logits/chosen": -1.1135673522949219,
|
9433 |
+
"logits/rejected": -1.1362190246582031,
|
9434 |
+
"logps/chosen": -168.93687438964844,
|
9435 |
+
"logps/rejected": -226.7947235107422,
|
9436 |
+
"loss": 0.4097,
|
9437 |
+
"rewards/accuracies": 0.8125,
|
9438 |
+
"rewards/chosen": -0.9602434039115906,
|
9439 |
+
"rewards/margins": 2.3180015087127686,
|
9440 |
+
"rewards/rejected": -3.278245210647583,
|
9441 |
+
"step": 1232
|
9442 |
+
},
|
9443 |
+
{
|
9444 |
+
"epoch": 1.4291711907347087,
|
9445 |
+
"grad_norm": 56.10855119361768,
|
9446 |
+
"learning_rate": 4.187990516637361e-08,
|
9447 |
+
"logits/chosen": -1.3297936916351318,
|
9448 |
+
"logits/rejected": -1.3411719799041748,
|
9449 |
+
"logps/chosen": -160.88560485839844,
|
9450 |
+
"logps/rejected": -200.78402709960938,
|
9451 |
+
"loss": 0.4441,
|
9452 |
+
"rewards/accuracies": 0.6875,
|
9453 |
+
"rewards/chosen": -0.3891071081161499,
|
9454 |
+
"rewards/margins": 1.3050543069839478,
|
9455 |
+
"rewards/rejected": -1.6941611766815186,
|
9456 |
+
"step": 1234
|
9457 |
+
},
|
9458 |
+
{
|
9459 |
+
"epoch": 1.431487513572204,
|
9460 |
+
"grad_norm": 63.476091563634505,
|
9461 |
+
"learning_rate": 4.156588690082229e-08,
|
9462 |
+
"logits/chosen": -1.2897419929504395,
|
9463 |
+
"logits/rejected": -1.2645704746246338,
|
9464 |
+
"logps/chosen": -194.49856567382812,
|
9465 |
+
"logps/rejected": -206.52557373046875,
|
9466 |
+
"loss": 0.4213,
|
9467 |
+
"rewards/accuracies": 0.875,
|
9468 |
+
"rewards/chosen": -0.4005884528160095,
|
9469 |
+
"rewards/margins": 1.0420472621917725,
|
9470 |
+
"rewards/rejected": -1.4426357746124268,
|
9471 |
+
"step": 1236
|
9472 |
+
},
|
9473 |
+
{
|
9474 |
+
"epoch": 1.4338038364096997,
|
9475 |
+
"grad_norm": 64.48058226856965,
|
9476 |
+
"learning_rate": 4.125274117371401e-08,
|
9477 |
+
"logits/chosen": -1.2089612483978271,
|
9478 |
+
"logits/rejected": -1.1553493738174438,
|
9479 |
+
"logps/chosen": -168.6042938232422,
|
9480 |
+
"logps/rejected": -183.2893524169922,
|
9481 |
+
"loss": 0.4148,
|
9482 |
+
"rewards/accuracies": 0.75,
|
9483 |
+
"rewards/chosen": -0.5347051024436951,
|
9484 |
+
"rewards/margins": 1.0337334871292114,
|
9485 |
+
"rewards/rejected": -1.5684385299682617,
|
9486 |
+
"step": 1238
|
9487 |
+
},
|
9488 |
+
{
|
9489 |
+
"epoch": 1.436120159247195,
|
9490 |
+
"grad_norm": 66.24493235516152,
|
9491 |
+
"learning_rate": 4.094047266094225e-08,
|
9492 |
+
"logits/chosen": -1.1589419841766357,
|
9493 |
+
"logits/rejected": -1.2252132892608643,
|
9494 |
+
"logps/chosen": -121.9848403930664,
|
9495 |
+
"logps/rejected": -142.36962890625,
|
9496 |
+
"loss": 0.4173,
|
9497 |
+
"rewards/accuracies": 0.8125,
|
9498 |
+
"rewards/chosen": -0.47534143924713135,
|
9499 |
+
"rewards/margins": 0.8780463933944702,
|
9500 |
+
"rewards/rejected": -1.353387713432312,
|
9501 |
+
"step": 1240
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 1.4384364820846907,
|
9505 |
+
"grad_norm": 67.05029588369739,
|
9506 |
+
"learning_rate": 4.062908602530186e-08,
|
9507 |
+
"logits/chosen": -1.2876590490341187,
|
9508 |
+
"logits/rejected": -1.2815279960632324,
|
9509 |
+
"logps/chosen": -125.93046569824219,
|
9510 |
+
"logps/rejected": -150.712890625,
|
9511 |
+
"loss": 0.4631,
|
9512 |
+
"rewards/accuracies": 0.78125,
|
9513 |
+
"rewards/chosen": -0.10989043861627579,
|
9514 |
+
"rewards/margins": 1.1261409521102905,
|
9515 |
+
"rewards/rejected": -1.2360315322875977,
|
9516 |
+
"step": 1242
|
9517 |
+
},
|
9518 |
+
{
|
9519 |
+
"epoch": 1.440752804922186,
|
9520 |
+
"grad_norm": 72.90739630648197,
|
9521 |
+
"learning_rate": 4.031858591641948e-08,
|
9522 |
+
"logits/chosen": -1.2130520343780518,
|
9523 |
+
"logits/rejected": -1.2716223001480103,
|
9524 |
+
"logps/chosen": -173.3944549560547,
|
9525 |
+
"logps/rejected": -214.772705078125,
|
9526 |
+
"loss": 0.4677,
|
9527 |
+
"rewards/accuracies": 0.90625,
|
9528 |
+
"rewards/chosen": -0.10449859499931335,
|
9529 |
+
"rewards/margins": 1.398302435874939,
|
9530 |
+
"rewards/rejected": -1.5028009414672852,
|
9531 |
+
"step": 1244
|
9532 |
+
},
|
9533 |
+
{
|
9534 |
+
"epoch": 1.4430691277596814,
|
9535 |
+
"grad_norm": 53.298120215279184,
|
9536 |
+
"learning_rate": 4.000897697068417e-08,
|
9537 |
+
"logits/chosen": -1.2028003931045532,
|
9538 |
+
"logits/rejected": -1.2138316631317139,
|
9539 |
+
"logps/chosen": -153.28302001953125,
|
9540 |
+
"logps/rejected": -213.52073669433594,
|
9541 |
+
"loss": 0.4146,
|
9542 |
+
"rewards/accuracies": 0.8125,
|
9543 |
+
"rewards/chosen": -0.801752507686615,
|
9544 |
+
"rewards/margins": 2.9270172119140625,
|
9545 |
+
"rewards/rejected": -3.7287697792053223,
|
9546 |
+
"step": 1246
|
9547 |
+
},
|
9548 |
+
{
|
9549 |
+
"epoch": 1.445385450597177,
|
9550 |
+
"grad_norm": 65.76302946420111,
|
9551 |
+
"learning_rate": 3.970026381117813e-08,
|
9552 |
+
"logits/chosen": -1.2451378107070923,
|
9553 |
+
"logits/rejected": -1.2875595092773438,
|
9554 |
+
"logps/chosen": -148.5320587158203,
|
9555 |
+
"logps/rejected": -175.6725616455078,
|
9556 |
+
"loss": 0.4043,
|
9557 |
+
"rewards/accuracies": 0.84375,
|
9558 |
+
"rewards/chosen": -0.14613214135169983,
|
9559 |
+
"rewards/margins": 1.1353800296783447,
|
9560 |
+
"rewards/rejected": -1.2815121412277222,
|
9561 |
+
"step": 1248
|
9562 |
+
},
|
9563 |
+
{
|
9564 |
+
"epoch": 1.4477017734346724,
|
9565 |
+
"grad_norm": 60.49195421908293,
|
9566 |
+
"learning_rate": 3.93924510476076e-08,
|
9567 |
+
"logits/chosen": -1.1492172479629517,
|
9568 |
+
"logits/rejected": -1.1965818405151367,
|
9569 |
+
"logps/chosen": -182.40252685546875,
|
9570 |
+
"logps/rejected": -212.2118682861328,
|
9571 |
+
"loss": 0.3858,
|
9572 |
+
"rewards/accuracies": 0.90625,
|
9573 |
+
"rewards/chosen": -0.37888991832733154,
|
9574 |
+
"rewards/margins": 1.5161809921264648,
|
9575 |
+
"rewards/rejected": -1.8950707912445068,
|
9576 |
+
"step": 1250
|
9577 |
+
},
|
9578 |
+
{
|
9579 |
+
"epoch": 1.450018096272168,
|
9580 |
+
"grad_norm": 58.233537771407626,
|
9581 |
+
"learning_rate": 3.9085543276234246e-08,
|
9582 |
+
"logits/chosen": -1.165425419807434,
|
9583 |
+
"logits/rejected": -1.2350255250930786,
|
9584 |
+
"logps/chosen": -173.421630859375,
|
9585 |
+
"logps/rejected": -217.11502075195312,
|
9586 |
+
"loss": 0.395,
|
9587 |
+
"rewards/accuracies": 0.875,
|
9588 |
+
"rewards/chosen": -0.4715797007083893,
|
9589 |
+
"rewards/margins": 1.3812612295150757,
|
9590 |
+
"rewards/rejected": -1.8528410196304321,
|
9591 |
+
"step": 1252
|
9592 |
+
},
|
9593 |
+
{
|
9594 |
+
"epoch": 1.4523344191096634,
|
9595 |
+
"grad_norm": 56.04571021565187,
|
9596 |
+
"learning_rate": 3.8779545079806244e-08,
|
9597 |
+
"logits/chosen": -1.2306987047195435,
|
9598 |
+
"logits/rejected": -1.2661701440811157,
|
9599 |
+
"logps/chosen": -158.64395141601562,
|
9600 |
+
"logps/rejected": -163.93408203125,
|
9601 |
+
"loss": 0.4244,
|
9602 |
+
"rewards/accuracies": 0.78125,
|
9603 |
+
"rewards/chosen": -0.8128874897956848,
|
9604 |
+
"rewards/margins": 1.2007758617401123,
|
9605 |
+
"rewards/rejected": -2.0136635303497314,
|
9606 |
+
"step": 1254
|
9607 |
+
},
|
9608 |
+
{
|
9609 |
+
"epoch": 1.4546507419471588,
|
9610 |
+
"grad_norm": 56.87831566114625,
|
9611 |
+
"learning_rate": 3.847446102749009e-08,
|
9612 |
+
"logits/chosen": -1.2190110683441162,
|
9613 |
+
"logits/rejected": -1.2493897676467896,
|
9614 |
+
"logps/chosen": -165.40530395507812,
|
9615 |
+
"logps/rejected": -196.738037109375,
|
9616 |
+
"loss": 0.4215,
|
9617 |
+
"rewards/accuracies": 0.875,
|
9618 |
+
"rewards/chosen": -0.03989005833864212,
|
9619 |
+
"rewards/margins": 1.3989439010620117,
|
9620 |
+
"rewards/rejected": -1.4388341903686523,
|
9621 |
+
"step": 1256
|
9622 |
+
},
|
9623 |
+
{
|
9624 |
+
"epoch": 1.4569670647846544,
|
9625 |
+
"grad_norm": 50.40656011526863,
|
9626 |
+
"learning_rate": 3.817029567480228e-08,
|
9627 |
+
"logits/chosen": -1.1830198764801025,
|
9628 |
+
"logits/rejected": -1.1742397546768188,
|
9629 |
+
"logps/chosen": -136.02699279785156,
|
9630 |
+
"logps/rejected": -155.1636199951172,
|
9631 |
+
"loss": 0.4128,
|
9632 |
+
"rewards/accuracies": 0.90625,
|
9633 |
+
"rewards/chosen": -0.21887874603271484,
|
9634 |
+
"rewards/margins": 1.521628737449646,
|
9635 |
+
"rewards/rejected": -1.7405076026916504,
|
9636 |
+
"step": 1258
|
9637 |
+
},
|
9638 |
+
{
|
9639 |
+
"epoch": 1.4592833876221498,
|
9640 |
+
"grad_norm": 57.01905107655371,
|
9641 |
+
"learning_rate": 3.7867053563541195e-08,
|
9642 |
+
"logits/chosen": -0.9882857799530029,
|
9643 |
+
"logits/rejected": -0.9833186864852905,
|
9644 |
+
"logps/chosen": -144.3942413330078,
|
9645 |
+
"logps/rejected": -162.635498046875,
|
9646 |
+
"loss": 0.3927,
|
9647 |
+
"rewards/accuracies": 0.9375,
|
9648 |
+
"rewards/chosen": -0.3512459993362427,
|
9649 |
+
"rewards/margins": 1.1930773258209229,
|
9650 |
+
"rewards/rejected": -1.544323205947876,
|
9651 |
+
"step": 1260
|
9652 |
+
},
|
9653 |
+
{
|
9654 |
+
"epoch": 1.4615997104596454,
|
9655 |
+
"grad_norm": 59.931801696827705,
|
9656 |
+
"learning_rate": 3.756473922171941e-08,
|
9657 |
+
"logits/chosen": -1.2637214660644531,
|
9658 |
+
"logits/rejected": -1.3037135601043701,
|
9659 |
+
"logps/chosen": -177.24203491210938,
|
9660 |
+
"logps/rejected": -203.71690368652344,
|
9661 |
+
"loss": 0.4226,
|
9662 |
+
"rewards/accuracies": 0.8125,
|
9663 |
+
"rewards/chosen": -0.2562026083469391,
|
9664 |
+
"rewards/margins": 1.4607383012771606,
|
9665 |
+
"rewards/rejected": -1.716940999031067,
|
9666 |
+
"step": 1262
|
9667 |
+
},
|
9668 |
+
{
|
9669 |
+
"epoch": 1.4639160332971408,
|
9670 |
+
"grad_norm": 61.29727113734488,
|
9671 |
+
"learning_rate": 3.726335716349611e-08,
|
9672 |
+
"logits/chosen": -1.0998203754425049,
|
9673 |
+
"logits/rejected": -1.1926376819610596,
|
9674 |
+
"logps/chosen": -140.4119415283203,
|
9675 |
+
"logps/rejected": -204.82289123535156,
|
9676 |
+
"loss": 0.3865,
|
9677 |
+
"rewards/accuracies": 0.875,
|
9678 |
+
"rewards/chosen": -0.2265455424785614,
|
9679 |
+
"rewards/margins": 1.8351831436157227,
|
9680 |
+
"rewards/rejected": -2.0617284774780273,
|
9681 |
+
"step": 1264
|
9682 |
+
},
|
9683 |
+
{
|
9684 |
+
"epoch": 1.4662323561346362,
|
9685 |
+
"grad_norm": 52.33658884441956,
|
9686 |
+
"learning_rate": 3.696291188910954e-08,
|
9687 |
+
"logits/chosen": -1.334247350692749,
|
9688 |
+
"logits/rejected": -1.2990857362747192,
|
9689 |
+
"logps/chosen": -211.4185791015625,
|
9690 |
+
"logps/rejected": -239.36732482910156,
|
9691 |
+
"loss": 0.4207,
|
9692 |
+
"rewards/accuracies": 0.8125,
|
9693 |
+
"rewards/chosen": -0.521153450012207,
|
9694 |
+
"rewards/margins": 1.7276406288146973,
|
9695 |
+
"rewards/rejected": -2.2487940788269043,
|
9696 |
+
"step": 1266
|
9697 |
+
},
|
9698 |
+
{
|
9699 |
+
"epoch": 1.4685486789721318,
|
9700 |
+
"grad_norm": 55.99376810708936,
|
9701 |
+
"learning_rate": 3.666340788480986e-08,
|
9702 |
+
"logits/chosen": -1.2197870016098022,
|
9703 |
+
"logits/rejected": -1.2413604259490967,
|
9704 |
+
"logps/chosen": -152.36419677734375,
|
9705 |
+
"logps/rejected": -199.79225158691406,
|
9706 |
+
"loss": 0.4336,
|
9707 |
+
"rewards/accuracies": 0.78125,
|
9708 |
+
"rewards/chosen": -0.41305410861968994,
|
9709 |
+
"rewards/margins": 1.4322149753570557,
|
9710 |
+
"rewards/rejected": -1.845268964767456,
|
9711 |
+
"step": 1268
|
9712 |
+
},
|
9713 |
+
{
|
9714 |
+
"epoch": 1.4708650018096272,
|
9715 |
+
"grad_norm": 48.41879744298975,
|
9716 |
+
"learning_rate": 3.636484962279226e-08,
|
9717 |
+
"logits/chosen": -1.319197416305542,
|
9718 |
+
"logits/rejected": -1.4153599739074707,
|
9719 |
+
"logps/chosen": -138.763427734375,
|
9720 |
+
"logps/rejected": -166.7461395263672,
|
9721 |
+
"loss": 0.3822,
|
9722 |
+
"rewards/accuracies": 0.875,
|
9723 |
+
"rewards/chosen": 0.17914807796478271,
|
9724 |
+
"rewards/margins": 1.2840946912765503,
|
9725 |
+
"rewards/rejected": -1.1049466133117676,
|
9726 |
+
"step": 1270
|
9727 |
+
},
|
9728 |
+
{
|
9729 |
+
"epoch": 1.4731813246471228,
|
9730 |
+
"grad_norm": 61.194603768661054,
|
9731 |
+
"learning_rate": 3.6067241561130114e-08,
|
9732 |
+
"logits/chosen": -1.2439590692520142,
|
9733 |
+
"logits/rejected": -1.183296799659729,
|
9734 |
+
"logps/chosen": -141.58709716796875,
|
9735 |
+
"logps/rejected": -141.97996520996094,
|
9736 |
+
"loss": 0.409,
|
9737 |
+
"rewards/accuracies": 0.8125,
|
9738 |
+
"rewards/chosen": 0.017182359471917152,
|
9739 |
+
"rewards/margins": 0.84832763671875,
|
9740 |
+
"rewards/rejected": -0.8311452269554138,
|
9741 |
+
"step": 1272
|
9742 |
+
},
|
9743 |
+
{
|
9744 |
+
"epoch": 1.4754976474846182,
|
9745 |
+
"grad_norm": 77.92386005527986,
|
9746 |
+
"learning_rate": 3.5770588143708315e-08,
|
9747 |
+
"logits/chosen": -1.2311725616455078,
|
9748 |
+
"logits/rejected": -1.2159764766693115,
|
9749 |
+
"logps/chosen": -155.29443359375,
|
9750 |
+
"logps/rejected": -178.33035278320312,
|
9751 |
+
"loss": 0.4345,
|
9752 |
+
"rewards/accuracies": 0.90625,
|
9753 |
+
"rewards/chosen": 0.0964183509349823,
|
9754 |
+
"rewards/margins": 1.3752447366714478,
|
9755 |
+
"rewards/rejected": -1.278826355934143,
|
9756 |
+
"step": 1274
|
9757 |
+
},
|
9758 |
+
{
|
9759 |
+
"epoch": 1.4778139703221136,
|
9760 |
+
"grad_norm": 59.69529021775264,
|
9761 |
+
"learning_rate": 3.5474893800157e-08,
|
9762 |
+
"logits/chosen": -1.2245632410049438,
|
9763 |
+
"logits/rejected": -1.216321349143982,
|
9764 |
+
"logps/chosen": -146.5887451171875,
|
9765 |
+
"logps/rejected": -148.22378540039062,
|
9766 |
+
"loss": 0.4262,
|
9767 |
+
"rewards/accuracies": 0.78125,
|
9768 |
+
"rewards/chosen": -0.34651532769203186,
|
9769 |
+
"rewards/margins": 0.8327122926712036,
|
9770 |
+
"rewards/rejected": -1.179227590560913,
|
9771 |
+
"step": 1276
|
9772 |
+
},
|
9773 |
+
{
|
9774 |
+
"epoch": 1.4801302931596092,
|
9775 |
+
"grad_norm": 75.38031016102305,
|
9776 |
+
"learning_rate": 3.5180162945785554e-08,
|
9777 |
+
"logits/chosen": -1.2859201431274414,
|
9778 |
+
"logits/rejected": -1.3334230184555054,
|
9779 |
+
"logps/chosen": -122.11463928222656,
|
9780 |
+
"logps/rejected": -150.13040161132812,
|
9781 |
+
"loss": 0.4738,
|
9782 |
+
"rewards/accuracies": 0.875,
|
9783 |
+
"rewards/chosen": -0.1191844493150711,
|
9784 |
+
"rewards/margins": 1.2066011428833008,
|
9785 |
+
"rewards/rejected": -1.3257856369018555,
|
9786 |
+
"step": 1278
|
9787 |
+
},
|
9788 |
+
{
|
9789 |
+
"epoch": 1.4824466159971046,
|
9790 |
+
"grad_norm": 70.9685656111324,
|
9791 |
+
"learning_rate": 3.488639998151633e-08,
|
9792 |
+
"logits/chosen": -1.1641169786453247,
|
9793 |
+
"logits/rejected": -1.2118412256240845,
|
9794 |
+
"logps/chosen": -177.6825714111328,
|
9795 |
+
"logps/rejected": -220.609619140625,
|
9796 |
+
"loss": 0.452,
|
9797 |
+
"rewards/accuracies": 0.8125,
|
9798 |
+
"rewards/chosen": -0.5036740303039551,
|
9799 |
+
"rewards/margins": 2.158677101135254,
|
9800 |
+
"rewards/rejected": -2.662351131439209,
|
9801 |
+
"step": 1280
|
9802 |
+
},
|
9803 |
+
{
|
9804 |
+
"epoch": 1.4847629388346002,
|
9805 |
+
"grad_norm": 56.844036643554055,
|
9806 |
+
"learning_rate": 3.45936092938193e-08,
|
9807 |
+
"logits/chosen": -1.2509068250656128,
|
9808 |
+
"logits/rejected": -1.3152053356170654,
|
9809 |
+
"logps/chosen": -156.49288940429688,
|
9810 |
+
"logps/rejected": -175.0799102783203,
|
9811 |
+
"loss": 0.4025,
|
9812 |
+
"rewards/accuracies": 0.8125,
|
9813 |
+
"rewards/chosen": -0.10208474099636078,
|
9814 |
+
"rewards/margins": 1.4737757444381714,
|
9815 |
+
"rewards/rejected": -1.5758603811264038,
|
9816 |
+
"step": 1282
|
9817 |
+
},
|
9818 |
+
{
|
9819 |
+
"epoch": 1.4870792616720956,
|
9820 |
+
"grad_norm": 63.04162868208357,
|
9821 |
+
"learning_rate": 3.4301795254646396e-08,
|
9822 |
+
"logits/chosen": -1.1289265155792236,
|
9823 |
+
"logits/rejected": -1.1296483278274536,
|
9824 |
+
"logps/chosen": -92.31997680664062,
|
9825 |
+
"logps/rejected": -125.47240447998047,
|
9826 |
+
"loss": 0.4319,
|
9827 |
+
"rewards/accuracies": 0.9375,
|
9828 |
+
"rewards/chosen": -0.2308368980884552,
|
9829 |
+
"rewards/margins": 1.562235951423645,
|
9830 |
+
"rewards/rejected": -1.7930728197097778,
|
9831 |
+
"step": 1284
|
9832 |
+
},
|
9833 |
+
{
|
9834 |
+
"epoch": 1.489395584509591,
|
9835 |
+
"grad_norm": 53.7512285130868,
|
9836 |
+
"learning_rate": 3.4010962221366125e-08,
|
9837 |
+
"logits/chosen": -1.2739626169204712,
|
9838 |
+
"logits/rejected": -1.3384690284729004,
|
9839 |
+
"logps/chosen": -133.05880737304688,
|
9840 |
+
"logps/rejected": -184.07568359375,
|
9841 |
+
"loss": 0.4018,
|
9842 |
+
"rewards/accuracies": 0.875,
|
9843 |
+
"rewards/chosen": -0.1107388436794281,
|
9844 |
+
"rewards/margins": 1.6195769309997559,
|
9845 |
+
"rewards/rejected": -1.7303158044815063,
|
9846 |
+
"step": 1286
|
9847 |
+
},
|
9848 |
+
{
|
9849 |
+
"epoch": 1.4917119073470864,
|
9850 |
+
"grad_norm": 69.01008947683725,
|
9851 |
+
"learning_rate": 3.3721114536698635e-08,
|
9852 |
+
"logits/chosen": -1.3090903759002686,
|
9853 |
+
"logits/rejected": -1.3190556764602661,
|
9854 |
+
"logps/chosen": -157.00119018554688,
|
9855 |
+
"logps/rejected": -156.64285278320312,
|
9856 |
+
"loss": 0.435,
|
9857 |
+
"rewards/accuracies": 0.84375,
|
9858 |
+
"rewards/chosen": -0.31143975257873535,
|
9859 |
+
"rewards/margins": 0.8066626787185669,
|
9860 |
+
"rewards/rejected": -1.1181025505065918,
|
9861 |
+
"step": 1288
|
9862 |
+
},
|
9863 |
+
{
|
9864 |
+
"epoch": 1.494028230184582,
|
9865 |
+
"grad_norm": 50.25709967553361,
|
9866 |
+
"learning_rate": 3.343225652865095e-08,
|
9867 |
+
"logits/chosen": -1.1199434995651245,
|
9868 |
+
"logits/rejected": -1.2268508672714233,
|
9869 |
+
"logps/chosen": -122.3958740234375,
|
9870 |
+
"logps/rejected": -160.63177490234375,
|
9871 |
+
"loss": 0.3879,
|
9872 |
+
"rewards/accuracies": 0.84375,
|
9873 |
+
"rewards/chosen": -0.30211785435676575,
|
9874 |
+
"rewards/margins": 1.6221270561218262,
|
9875 |
+
"rewards/rejected": -1.9242448806762695,
|
9876 |
+
"step": 1290
|
9877 |
+
},
|
9878 |
+
{
|
9879 |
+
"epoch": 1.4963445530220776,
|
9880 |
+
"grad_norm": 52.88445623952638,
|
9881 |
+
"learning_rate": 3.3144392510452125e-08,
|
9882 |
+
"logits/chosen": -1.2343542575836182,
|
9883 |
+
"logits/rejected": -1.3168511390686035,
|
9884 |
+
"logps/chosen": -92.0683822631836,
|
9885 |
+
"logps/rejected": -115.30619812011719,
|
9886 |
+
"loss": 0.4227,
|
9887 |
+
"rewards/accuracies": 0.8125,
|
9888 |
+
"rewards/chosen": -0.022456973791122437,
|
9889 |
+
"rewards/margins": 1.0297763347625732,
|
9890 |
+
"rewards/rejected": -1.052233338356018,
|
9891 |
+
"step": 1292
|
9892 |
+
},
|
9893 |
+
{
|
9894 |
+
"epoch": 1.498660875859573,
|
9895 |
+
"grad_norm": 58.861501596082704,
|
9896 |
+
"learning_rate": 3.285752678048892e-08,
|
9897 |
+
"logits/chosen": -1.1001089811325073,
|
9898 |
+
"logits/rejected": -1.200326919555664,
|
9899 |
+
"logps/chosen": -122.45906829833984,
|
9900 |
+
"logps/rejected": -142.00970458984375,
|
9901 |
+
"loss": 0.3886,
|
9902 |
+
"rewards/accuracies": 0.75,
|
9903 |
+
"rewards/chosen": -0.28310656547546387,
|
9904 |
+
"rewards/margins": 0.9431065320968628,
|
9905 |
+
"rewards/rejected": -1.226212978363037,
|
9906 |
+
"step": 1294
|
9907 |
+
},
|
9908 |
+
{
|
9909 |
+
"epoch": 1.5009771986970684,
|
9910 |
+
"grad_norm": 56.84546595956147,
|
9911 |
+
"learning_rate": 3.2571663622241875e-08,
|
9912 |
+
"logits/chosen": -1.2022103071212769,
|
9913 |
+
"logits/rejected": -1.2175830602645874,
|
9914 |
+
"logps/chosen": -152.65174865722656,
|
9915 |
+
"logps/rejected": -191.4637908935547,
|
9916 |
+
"loss": 0.4328,
|
9917 |
+
"rewards/accuracies": 0.84375,
|
9918 |
+
"rewards/chosen": -0.23669162392616272,
|
9919 |
+
"rewards/margins": 1.503944993019104,
|
9920 |
+
"rewards/rejected": -1.7406367063522339,
|
9921 |
+
"step": 1296
|
9922 |
+
},
|
9923 |
+
{
|
9924 |
+
"epoch": 1.5032935215345637,
|
9925 |
+
"grad_norm": 75.28276112082713,
|
9926 |
+
"learning_rate": 3.2286807304220874e-08,
|
9927 |
+
"logits/chosen": -1.2572470903396606,
|
9928 |
+
"logits/rejected": -1.3082858324050903,
|
9929 |
+
"logps/chosen": -178.1489715576172,
|
9930 |
+
"logps/rejected": -204.2378692626953,
|
9931 |
+
"loss": 0.3727,
|
9932 |
+
"rewards/accuracies": 0.875,
|
9933 |
+
"rewards/chosen": -0.29775533080101013,
|
9934 |
+
"rewards/margins": 1.6656044721603394,
|
9935 |
+
"rewards/rejected": -1.9633598327636719,
|
9936 |
+
"step": 1298
|
9937 |
+
},
|
9938 |
+
{
|
9939 |
+
"epoch": 1.5056098443720594,
|
9940 |
+
"grad_norm": 58.03906016771635,
|
9941 |
+
"learning_rate": 3.200296207990174e-08,
|
9942 |
+
"logits/chosen": -1.234593391418457,
|
9943 |
+
"logits/rejected": -1.1559195518493652,
|
9944 |
+
"logps/chosen": -146.87179565429688,
|
9945 |
+
"logps/rejected": -168.06884765625,
|
9946 |
+
"loss": 0.4228,
|
9947 |
+
"rewards/accuracies": 0.75,
|
9948 |
+
"rewards/chosen": -0.2712811231613159,
|
9949 |
+
"rewards/margins": 1.2952864170074463,
|
9950 |
+
"rewards/rejected": -1.5665674209594727,
|
9951 |
+
"step": 1300
|
9952 |
+
},
|
9953 |
+
{
|
9954 |
+
"epoch": 1.5056098443720594,
|
9955 |
+
"eval_logits/chosen": -1.2275954484939575,
|
9956 |
+
"eval_logits/rejected": -1.222235918045044,
|
9957 |
+
"eval_logps/chosen": -142.21490478515625,
|
9958 |
+
"eval_logps/rejected": -147.02261352539062,
|
9959 |
+
"eval_loss": 0.5889570713043213,
|
9960 |
+
"eval_rewards/accuracies": 0.7599999904632568,
|
9961 |
+
"eval_rewards/chosen": -0.6583734154701233,
|
9962 |
+
"eval_rewards/margins": 0.6485320329666138,
|
9963 |
+
"eval_rewards/rejected": -1.3069055080413818,
|
9964 |
+
"eval_runtime": 28.0973,
|
9965 |
+
"eval_samples_per_second": 3.559,
|
9966 |
+
"eval_steps_per_second": 0.89,
|
9967 |
+
"step": 1300
|
9968 |
+
},
|
9969 |
+
{
|
9970 |
+
"epoch": 1.507926167209555,
|
9971 |
+
"grad_norm": 53.8054068622811,
|
9972 |
+
"learning_rate": 3.172013218766273e-08,
|
9973 |
+
"logits/chosen": -1.1949838399887085,
|
9974 |
+
"logits/rejected": -1.1276732683181763,
|
9975 |
+
"logps/chosen": -116.6605453491211,
|
9976 |
+
"logps/rejected": -129.42701721191406,
|
9977 |
+
"loss": 0.4054,
|
9978 |
+
"rewards/accuracies": 0.84375,
|
9979 |
+
"rewards/chosen": -0.26029157638549805,
|
9980 |
+
"rewards/margins": 0.894656777381897,
|
9981 |
+
"rewards/rejected": -1.1549484729766846,
|
9982 |
+
"step": 1302
|
9983 |
+
},
|
9984 |
+
{
|
9985 |
+
"epoch": 1.5102424900470504,
|
9986 |
+
"grad_norm": 53.44469670494882,
|
9987 |
+
"learning_rate": 3.143832185072103e-08,
|
9988 |
+
"logits/chosen": -1.330610752105713,
|
9989 |
+
"logits/rejected": -1.3352289199829102,
|
9990 |
+
"logps/chosen": -129.06997680664062,
|
9991 |
+
"logps/rejected": -138.2757568359375,
|
9992 |
+
"loss": 0.4228,
|
9993 |
+
"rewards/accuracies": 0.8125,
|
9994 |
+
"rewards/chosen": -0.19665758311748505,
|
9995 |
+
"rewards/margins": 0.8899365067481995,
|
9996 |
+
"rewards/rejected": -1.0865941047668457,
|
9997 |
+
"step": 1304
|
9998 |
+
},
|
9999 |
+
{
|
10000 |
+
"epoch": 1.5125588128845457,
|
10001 |
+
"grad_norm": 52.12869653110471,
|
10002 |
+
"learning_rate": 3.115753527706986e-08,
|
10003 |
+
"logits/chosen": -1.2492622137069702,
|
10004 |
+
"logits/rejected": -1.2376924753189087,
|
10005 |
+
"logps/chosen": -176.64134216308594,
|
10006 |
+
"logps/rejected": -197.2509307861328,
|
10007 |
+
"loss": 0.3503,
|
10008 |
+
"rewards/accuracies": 0.90625,
|
10009 |
+
"rewards/chosen": -0.33254703879356384,
|
10010 |
+
"rewards/margins": 1.2227458953857422,
|
10011 |
+
"rewards/rejected": -1.5552929639816284,
|
10012 |
+
"step": 1306
|
10013 |
+
},
|
10014 |
+
{
|
10015 |
+
"epoch": 1.5148751357220411,
|
10016 |
+
"grad_norm": 76.85376628040396,
|
10017 |
+
"learning_rate": 3.087777665941565e-08,
|
10018 |
+
"logits/chosen": -1.0722814798355103,
|
10019 |
+
"logits/rejected": -1.1189197301864624,
|
10020 |
+
"logps/chosen": -142.5889892578125,
|
10021 |
+
"logps/rejected": -179.30929565429688,
|
10022 |
+
"loss": 0.425,
|
10023 |
+
"rewards/accuracies": 0.9375,
|
10024 |
+
"rewards/chosen": -0.607629120349884,
|
10025 |
+
"rewards/margins": 1.3540990352630615,
|
10026 |
+
"rewards/rejected": -1.9617282152175903,
|
10027 |
+
"step": 1308
|
10028 |
+
},
|
10029 |
+
{
|
10030 |
+
"epoch": 1.5171914585595367,
|
10031 |
+
"grad_norm": 53.40350152738603,
|
10032 |
+
"learning_rate": 3.059905017511536e-08,
|
10033 |
+
"logits/chosen": -1.289185881614685,
|
10034 |
+
"logits/rejected": -1.3002314567565918,
|
10035 |
+
"logps/chosen": -155.68594360351562,
|
10036 |
+
"logps/rejected": -181.0437774658203,
|
10037 |
+
"loss": 0.4321,
|
10038 |
+
"rewards/accuracies": 0.78125,
|
10039 |
+
"rewards/chosen": -0.1934920847415924,
|
10040 |
+
"rewards/margins": 1.0485948324203491,
|
10041 |
+
"rewards/rejected": -1.2420868873596191,
|
10042 |
+
"step": 1310
|
10043 |
+
},
|
10044 |
+
{
|
10045 |
+
"epoch": 1.5195077813970324,
|
10046 |
+
"grad_norm": 62.10414133443873,
|
10047 |
+
"learning_rate": 3.032135998611409e-08,
|
10048 |
+
"logits/chosen": -1.2625949382781982,
|
10049 |
+
"logits/rejected": -1.2948625087738037,
|
10050 |
+
"logps/chosen": -120.27640533447266,
|
10051 |
+
"logps/rejected": -124.78179931640625,
|
10052 |
+
"loss": 0.4706,
|
10053 |
+
"rewards/accuracies": 0.75,
|
10054 |
+
"rewards/chosen": -0.41021931171417236,
|
10055 |
+
"rewards/margins": 0.7083292603492737,
|
10056 |
+
"rewards/rejected": -1.1185486316680908,
|
10057 |
+
"step": 1312
|
10058 |
+
},
|
10059 |
+
{
|
10060 |
+
"epoch": 1.5218241042345277,
|
10061 |
+
"grad_norm": 53.355625531453235,
|
10062 |
+
"learning_rate": 3.004471023888307e-08,
|
10063 |
+
"logits/chosen": -1.202606201171875,
|
10064 |
+
"logits/rejected": -1.2765260934829712,
|
10065 |
+
"logps/chosen": -154.8223876953125,
|
10066 |
+
"logps/rejected": -174.3974609375,
|
10067 |
+
"loss": 0.3724,
|
10068 |
+
"rewards/accuracies": 0.90625,
|
10069 |
+
"rewards/chosen": -0.02067536488175392,
|
10070 |
+
"rewards/margins": 1.3669226169586182,
|
10071 |
+
"rewards/rejected": -1.3875980377197266,
|
10072 |
+
"step": 1314
|
10073 |
+
},
|
10074 |
+
{
|
10075 |
+
"epoch": 1.5241404270720231,
|
10076 |
+
"grad_norm": 59.47198871147131,
|
10077 |
+
"learning_rate": 2.9769105064357537e-08,
|
10078 |
+
"logits/chosen": -1.3064639568328857,
|
10079 |
+
"logits/rejected": -1.2530848979949951,
|
10080 |
+
"logps/chosen": -183.5660858154297,
|
10081 |
+
"logps/rejected": -209.7610626220703,
|
10082 |
+
"loss": 0.4216,
|
10083 |
+
"rewards/accuracies": 0.84375,
|
10084 |
+
"rewards/chosen": -0.21822071075439453,
|
10085 |
+
"rewards/margins": 1.0935778617858887,
|
10086 |
+
"rewards/rejected": -1.3117986917495728,
|
10087 |
+
"step": 1316
|
10088 |
+
},
|
10089 |
+
{
|
10090 |
+
"epoch": 1.5264567499095185,
|
10091 |
+
"grad_norm": 65.91690062559164,
|
10092 |
+
"learning_rate": 2.949454857787519e-08,
|
10093 |
+
"logits/chosen": -1.238956093788147,
|
10094 |
+
"logits/rejected": -1.218425989151001,
|
10095 |
+
"logps/chosen": -214.0928955078125,
|
10096 |
+
"logps/rejected": -242.11489868164062,
|
10097 |
+
"loss": 0.431,
|
10098 |
+
"rewards/accuracies": 0.78125,
|
10099 |
+
"rewards/chosen": -0.3267192244529724,
|
10100 |
+
"rewards/margins": 2.120006799697876,
|
10101 |
+
"rewards/rejected": -2.446725606918335,
|
10102 |
+
"step": 1318
|
10103 |
+
},
|
10104 |
+
{
|
10105 |
+
"epoch": 1.5287730727470141,
|
10106 |
+
"grad_norm": 79.84542515208742,
|
10107 |
+
"learning_rate": 2.9221044879114775e-08,
|
10108 |
+
"logits/chosen": -1.251328468322754,
|
10109 |
+
"logits/rejected": -1.3467867374420166,
|
10110 |
+
"logps/chosen": -177.4748077392578,
|
10111 |
+
"logps/rejected": -217.42910766601562,
|
10112 |
+
"loss": 0.4328,
|
10113 |
+
"rewards/accuracies": 0.90625,
|
10114 |
+
"rewards/chosen": -0.36167412996292114,
|
10115 |
+
"rewards/margins": 1.5050268173217773,
|
10116 |
+
"rewards/rejected": -1.8667008876800537,
|
10117 |
+
"step": 1320
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 1.5310893955845097,
|
10121 |
+
"grad_norm": 49.694028003635175,
|
10122 |
+
"learning_rate": 2.8948598052034777e-08,
|
10123 |
+
"logits/chosen": -1.3321678638458252,
|
10124 |
+
"logits/rejected": -1.336784839630127,
|
10125 |
+
"logps/chosen": -151.92041015625,
|
10126 |
+
"logps/rejected": -168.43971252441406,
|
10127 |
+
"loss": 0.4527,
|
10128 |
+
"rewards/accuracies": 0.8125,
|
10129 |
+
"rewards/chosen": -0.07541098445653915,
|
10130 |
+
"rewards/margins": 1.295732021331787,
|
10131 |
+
"rewards/rejected": -1.371143102645874,
|
10132 |
+
"step": 1322
|
10133 |
+
},
|
10134 |
+
{
|
10135 |
+
"epoch": 1.5334057184220051,
|
10136 |
+
"grad_norm": 53.56450173273162,
|
10137 |
+
"learning_rate": 2.867721216481246e-08,
|
10138 |
+
"logits/chosen": -1.278252363204956,
|
10139 |
+
"logits/rejected": -1.380322813987732,
|
10140 |
+
"logps/chosen": -117.78822326660156,
|
10141 |
+
"logps/rejected": -142.52127075195312,
|
10142 |
+
"loss": 0.4433,
|
10143 |
+
"rewards/accuracies": 0.8125,
|
10144 |
+
"rewards/chosen": -0.0004923827946186066,
|
10145 |
+
"rewards/margins": 0.94923996925354,
|
10146 |
+
"rewards/rejected": -0.94973224401474,
|
10147 |
+
"step": 1324
|
10148 |
+
},
|
10149 |
+
{
|
10150 |
+
"epoch": 1.5357220412595005,
|
10151 |
+
"grad_norm": 61.396961223172646,
|
10152 |
+
"learning_rate": 2.8406891269783073e-08,
|
10153 |
+
"logits/chosen": -1.1902800798416138,
|
10154 |
+
"logits/rejected": -1.3021633625030518,
|
10155 |
+
"logps/chosen": -158.06736755371094,
|
10156 |
+
"logps/rejected": -229.27200317382812,
|
10157 |
+
"loss": 0.4586,
|
10158 |
+
"rewards/accuracies": 0.8125,
|
10159 |
+
"rewards/chosen": -0.1848677396774292,
|
10160 |
+
"rewards/margins": 1.0514723062515259,
|
10161 |
+
"rewards/rejected": -1.236340045928955,
|
10162 |
+
"step": 1326
|
10163 |
+
},
|
10164 |
+
{
|
10165 |
+
"epoch": 1.538038364096996,
|
10166 |
+
"grad_norm": 60.56277160405794,
|
10167 |
+
"learning_rate": 2.813763940337952e-08,
|
10168 |
+
"logits/chosen": -1.2507346868515015,
|
10169 |
+
"logits/rejected": -1.2003321647644043,
|
10170 |
+
"logps/chosen": -127.8934097290039,
|
10171 |
+
"logps/rejected": -149.8028564453125,
|
10172 |
+
"loss": 0.4295,
|
10173 |
+
"rewards/accuracies": 0.78125,
|
10174 |
+
"rewards/chosen": -0.22713351249694824,
|
10175 |
+
"rewards/margins": 1.3372572660446167,
|
10176 |
+
"rewards/rejected": -1.5643908977508545,
|
10177 |
+
"step": 1328
|
10178 |
+
},
|
10179 |
+
{
|
10180 |
+
"epoch": 1.5403546869344915,
|
10181 |
+
"grad_norm": 49.96943093434472,
|
10182 |
+
"learning_rate": 2.7869460586071868e-08,
|
10183 |
+
"logits/chosen": -1.290654182434082,
|
10184 |
+
"logits/rejected": -1.226252794265747,
|
10185 |
+
"logps/chosen": -154.7710418701172,
|
10186 |
+
"logps/rejected": -160.32904052734375,
|
10187 |
+
"loss": 0.4118,
|
10188 |
+
"rewards/accuracies": 0.96875,
|
10189 |
+
"rewards/chosen": 0.027480699121952057,
|
10190 |
+
"rewards/margins": 0.9481416940689087,
|
10191 |
+
"rewards/rejected": -0.9206609129905701,
|
10192 |
+
"step": 1330
|
10193 |
+
},
|
10194 |
+
{
|
10195 |
+
"epoch": 1.5426710097719871,
|
10196 |
+
"grad_norm": 59.55494034761672,
|
10197 |
+
"learning_rate": 2.7602358822307413e-08,
|
10198 |
+
"logits/chosen": -1.2469313144683838,
|
10199 |
+
"logits/rejected": -1.2422947883605957,
|
10200 |
+
"logps/chosen": -134.33392333984375,
|
10201 |
+
"logps/rejected": -147.20509338378906,
|
10202 |
+
"loss": 0.4157,
|
10203 |
+
"rewards/accuracies": 0.875,
|
10204 |
+
"rewards/chosen": -0.2177230417728424,
|
10205 |
+
"rewards/margins": 1.3250346183776855,
|
10206 |
+
"rewards/rejected": -1.5427578687667847,
|
10207 |
+
"step": 1332
|
10208 |
+
},
|
10209 |
+
{
|
10210 |
+
"epoch": 1.5449873326094825,
|
10211 |
+
"grad_norm": 50.53246486906999,
|
10212 |
+
"learning_rate": 2.733633810045094e-08,
|
10213 |
+
"logits/chosen": -1.2038668394088745,
|
10214 |
+
"logits/rejected": -1.2241549491882324,
|
10215 |
+
"logps/chosen": -134.41952514648438,
|
10216 |
+
"logps/rejected": -164.3434600830078,
|
10217 |
+
"loss": 0.3798,
|
10218 |
+
"rewards/accuracies": 0.90625,
|
10219 |
+
"rewards/chosen": -0.30086490511894226,
|
10220 |
+
"rewards/margins": 1.5149168968200684,
|
10221 |
+
"rewards/rejected": -1.8157817125320435,
|
10222 |
+
"step": 1334
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 1.547303655446978,
|
10226 |
+
"grad_norm": 64.60352169707365,
|
10227 |
+
"learning_rate": 2.7071402392725096e-08,
|
10228 |
+
"logits/chosen": -1.223114013671875,
|
10229 |
+
"logits/rejected": -1.2082433700561523,
|
10230 |
+
"logps/chosen": -165.63157653808594,
|
10231 |
+
"logps/rejected": -190.38706970214844,
|
10232 |
+
"loss": 0.4127,
|
10233 |
+
"rewards/accuracies": 0.875,
|
10234 |
+
"rewards/chosen": -0.4103352725505829,
|
10235 |
+
"rewards/margins": 1.6495842933654785,
|
10236 |
+
"rewards/rejected": -2.0599193572998047,
|
10237 |
+
"step": 1336
|
10238 |
+
},
|
10239 |
+
{
|
10240 |
+
"epoch": 1.5496199782844733,
|
10241 |
+
"grad_norm": 49.10542820839143,
|
10242 |
+
"learning_rate": 2.6807555655151025e-08,
|
10243 |
+
"logits/chosen": -1.2764735221862793,
|
10244 |
+
"logits/rejected": -1.3327206373214722,
|
10245 |
+
"logps/chosen": -134.23565673828125,
|
10246 |
+
"logps/rejected": -163.20474243164062,
|
10247 |
+
"loss": 0.3816,
|
10248 |
+
"rewards/accuracies": 0.78125,
|
10249 |
+
"rewards/chosen": -0.16112415492534637,
|
10250 |
+
"rewards/margins": 1.4172645807266235,
|
10251 |
+
"rewards/rejected": -1.5783886909484863,
|
10252 |
+
"step": 1338
|
10253 |
+
},
|
10254 |
+
{
|
10255 |
+
"epoch": 1.551936301121969,
|
10256 |
+
"grad_norm": 50.20237542030147,
|
10257 |
+
"learning_rate": 2.6544801827489482e-08,
|
10258 |
+
"logits/chosen": -1.2196974754333496,
|
10259 |
+
"logits/rejected": -1.1965646743774414,
|
10260 |
+
"logps/chosen": -129.3689422607422,
|
10261 |
+
"logps/rejected": -142.672119140625,
|
10262 |
+
"loss": 0.367,
|
10263 |
+
"rewards/accuracies": 0.84375,
|
10264 |
+
"rewards/chosen": -0.257286012172699,
|
10265 |
+
"rewards/margins": 1.1139978170394897,
|
10266 |
+
"rewards/rejected": -1.3712838888168335,
|
10267 |
+
"step": 1340
|
10268 |
+
},
|
10269 |
+
{
|
10270 |
+
"epoch": 1.5542526239594645,
|
10271 |
+
"grad_norm": 59.30212350967455,
|
10272 |
+
"learning_rate": 2.6283144833181782e-08,
|
10273 |
+
"logits/chosen": -1.1602783203125,
|
10274 |
+
"logits/rejected": -1.1242191791534424,
|
10275 |
+
"logps/chosen": -120.19819641113281,
|
10276 |
+
"logps/rejected": -154.9192352294922,
|
10277 |
+
"loss": 0.44,
|
10278 |
+
"rewards/accuracies": 0.875,
|
10279 |
+
"rewards/chosen": -0.2868664264678955,
|
10280 |
+
"rewards/margins": 1.309116244316101,
|
10281 |
+
"rewards/rejected": -1.5959827899932861,
|
10282 |
+
"step": 1342
|
10283 |
+
},
|
10284 |
+
{
|
10285 |
+
"epoch": 1.55656894679696,
|
10286 |
+
"grad_norm": 55.629592680909475,
|
10287 |
+
"learning_rate": 2.6022588579291327e-08,
|
10288 |
+
"logits/chosen": -1.1773267984390259,
|
10289 |
+
"logits/rejected": -1.1578972339630127,
|
10290 |
+
"logps/chosen": -120.31849670410156,
|
10291 |
+
"logps/rejected": -141.1858367919922,
|
10292 |
+
"loss": 0.4489,
|
10293 |
+
"rewards/accuracies": 0.84375,
|
10294 |
+
"rewards/chosen": -0.2234707921743393,
|
10295 |
+
"rewards/margins": 0.9735670685768127,
|
10296 |
+
"rewards/rejected": -1.1970378160476685,
|
10297 |
+
"step": 1344
|
10298 |
+
},
|
10299 |
+
{
|
10300 |
+
"epoch": 1.5588852696344553,
|
10301 |
+
"grad_norm": 58.52065186564036,
|
10302 |
+
"learning_rate": 2.5763136956445342e-08,
|
10303 |
+
"logits/chosen": -1.13753342628479,
|
10304 |
+
"logits/rejected": -1.1701133251190186,
|
10305 |
+
"logps/chosen": -139.2621612548828,
|
10306 |
+
"logps/rejected": -194.79722595214844,
|
10307 |
+
"loss": 0.4361,
|
10308 |
+
"rewards/accuracies": 0.75,
|
10309 |
+
"rewards/chosen": -0.3876231908798218,
|
10310 |
+
"rewards/margins": 1.8029227256774902,
|
10311 |
+
"rewards/rejected": -2.1905460357666016,
|
10312 |
+
"step": 1346
|
10313 |
+
},
|
10314 |
+
{
|
10315 |
+
"epoch": 1.5612015924719507,
|
10316 |
+
"grad_norm": 53.922591259946095,
|
10317 |
+
"learning_rate": 2.5504793838776582e-08,
|
10318 |
+
"logits/chosen": -1.2545125484466553,
|
10319 |
+
"logits/rejected": -1.330094814300537,
|
10320 |
+
"logps/chosen": -150.107421875,
|
10321 |
+
"logps/rejected": -178.18336486816406,
|
10322 |
+
"loss": 0.3999,
|
10323 |
+
"rewards/accuracies": 0.8125,
|
10324 |
+
"rewards/chosen": -0.33850181102752686,
|
10325 |
+
"rewards/margins": 1.4819968938827515,
|
10326 |
+
"rewards/rejected": -1.8204987049102783,
|
10327 |
+
"step": 1348
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 1.5635179153094463,
|
10331 |
+
"grad_norm": 50.688082246559354,
|
10332 |
+
"learning_rate": 2.5247563083865697e-08,
|
10333 |
+
"logits/chosen": -1.2270935773849487,
|
10334 |
+
"logits/rejected": -1.2281591892242432,
|
10335 |
+
"logps/chosen": -151.96878051757812,
|
10336 |
+
"logps/rejected": -163.12692260742188,
|
10337 |
+
"loss": 0.3785,
|
10338 |
+
"rewards/accuracies": 0.78125,
|
10339 |
+
"rewards/chosen": -0.06753317266702652,
|
10340 |
+
"rewards/margins": 0.8644936084747314,
|
10341 |
+
"rewards/rejected": -0.9320268630981445,
|
10342 |
+
"step": 1350
|
10343 |
+
},
|
10344 |
+
{
|
10345 |
+
"epoch": 1.565834238146942,
|
10346 |
+
"grad_norm": 48.380975770041516,
|
10347 |
+
"learning_rate": 2.4991448532683525e-08,
|
10348 |
+
"logits/chosen": -1.08161461353302,
|
10349 |
+
"logits/rejected": -1.0994572639465332,
|
10350 |
+
"logps/chosen": -72.54817962646484,
|
10351 |
+
"logps/rejected": -86.65010833740234,
|
10352 |
+
"loss": 0.4118,
|
10353 |
+
"rewards/accuracies": 0.8125,
|
10354 |
+
"rewards/chosen": 0.03128306567668915,
|
10355 |
+
"rewards/margins": 1.0805275440216064,
|
10356 |
+
"rewards/rejected": -1.0492445230484009,
|
10357 |
+
"step": 1352
|
10358 |
+
},
|
10359 |
+
{
|
10360 |
+
"epoch": 1.5681505609844373,
|
10361 |
+
"grad_norm": 56.228286429005685,
|
10362 |
+
"learning_rate": 2.4736454009533657e-08,
|
10363 |
+
"logits/chosen": -1.110878586769104,
|
10364 |
+
"logits/rejected": -1.1308159828186035,
|
10365 |
+
"logps/chosen": -91.99187469482422,
|
10366 |
+
"logps/rejected": -103.77259826660156,
|
10367 |
+
"loss": 0.4426,
|
10368 |
+
"rewards/accuracies": 0.71875,
|
10369 |
+
"rewards/chosen": -0.12472671270370483,
|
10370 |
+
"rewards/margins": 0.7570998668670654,
|
10371 |
+
"rewards/rejected": -0.8818265199661255,
|
10372 |
+
"step": 1354
|
10373 |
+
},
|
10374 |
+
{
|
10375 |
+
"epoch": 1.5704668838219327,
|
10376 |
+
"grad_norm": 45.1944314609051,
|
10377 |
+
"learning_rate": 2.4482583321995476e-08,
|
10378 |
+
"logits/chosen": -1.1095668077468872,
|
10379 |
+
"logits/rejected": -1.0675170421600342,
|
10380 |
+
"logps/chosen": -136.01248168945312,
|
10381 |
+
"logps/rejected": -142.30926513671875,
|
10382 |
+
"loss": 0.4013,
|
10383 |
+
"rewards/accuracies": 0.78125,
|
10384 |
+
"rewards/chosen": -0.3576383888721466,
|
10385 |
+
"rewards/margins": 1.211026668548584,
|
10386 |
+
"rewards/rejected": -1.5686650276184082,
|
10387 |
+
"step": 1356
|
10388 |
+
},
|
10389 |
+
{
|
10390 |
+
"epoch": 1.572783206659428,
|
10391 |
+
"grad_norm": 69.47752257662052,
|
10392 |
+
"learning_rate": 2.4229840260867286e-08,
|
10393 |
+
"logits/chosen": -1.1537913084030151,
|
10394 |
+
"logits/rejected": -1.1009234189987183,
|
10395 |
+
"logps/chosen": -154.23377990722656,
|
10396 |
+
"logps/rejected": -169.52256774902344,
|
10397 |
+
"loss": 0.4161,
|
10398 |
+
"rewards/accuracies": 0.84375,
|
10399 |
+
"rewards/chosen": -0.404860258102417,
|
10400 |
+
"rewards/margins": 1.2964788675308228,
|
10401 |
+
"rewards/rejected": -1.7013392448425293,
|
10402 |
+
"step": 1358
|
10403 |
+
},
|
10404 |
+
{
|
10405 |
+
"epoch": 1.5750995294969237,
|
10406 |
+
"grad_norm": 57.67713141201682,
|
10407 |
+
"learning_rate": 2.3978228600109563e-08,
|
10408 |
+
"logits/chosen": -1.2760729789733887,
|
10409 |
+
"logits/rejected": -1.3226953744888306,
|
10410 |
+
"logps/chosen": -157.44998168945312,
|
10411 |
+
"logps/rejected": -199.5008087158203,
|
10412 |
+
"loss": 0.4445,
|
10413 |
+
"rewards/accuracies": 0.84375,
|
10414 |
+
"rewards/chosen": -0.27148348093032837,
|
10415 |
+
"rewards/margins": 1.2586216926574707,
|
10416 |
+
"rewards/rejected": -1.5301051139831543,
|
10417 |
+
"step": 1360
|
10418 |
+
},
|
10419 |
+
{
|
10420 |
+
"epoch": 1.577415852334419,
|
10421 |
+
"grad_norm": 63.00178299040625,
|
10422 |
+
"learning_rate": 2.372775209678881e-08,
|
10423 |
+
"logits/chosen": -1.2917158603668213,
|
10424 |
+
"logits/rejected": -1.2442728281021118,
|
10425 |
+
"logps/chosen": -135.3437957763672,
|
10426 |
+
"logps/rejected": -150.4008026123047,
|
10427 |
+
"loss": 0.4729,
|
10428 |
+
"rewards/accuracies": 0.75,
|
10429 |
+
"rewards/chosen": -0.21689726412296295,
|
10430 |
+
"rewards/margins": 0.914188802242279,
|
10431 |
+
"rewards/rejected": -1.1310861110687256,
|
10432 |
+
"step": 1362
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 1.5797321751719147,
|
10436 |
+
"grad_norm": 54.9545294199578,
|
10437 |
+
"learning_rate": 2.347841449102136e-08,
|
10438 |
+
"logits/chosen": -1.16354501247406,
|
10439 |
+
"logits/rejected": -1.2029650211334229,
|
10440 |
+
"logps/chosen": -127.01541137695312,
|
10441 |
+
"logps/rejected": -160.94837951660156,
|
10442 |
+
"loss": 0.4415,
|
10443 |
+
"rewards/accuracies": 0.84375,
|
10444 |
+
"rewards/chosen": -0.2984340488910675,
|
10445 |
+
"rewards/margins": 1.5368034839630127,
|
10446 |
+
"rewards/rejected": -1.8352375030517578,
|
10447 |
+
"step": 1364
|
10448 |
+
},
|
10449 |
+
{
|
10450 |
+
"epoch": 1.58204849800941,
|
10451 |
+
"grad_norm": 59.15647045553974,
|
10452 |
+
"learning_rate": 2.3230219505917424e-08,
|
10453 |
+
"logits/chosen": -1.1245296001434326,
|
10454 |
+
"logits/rejected": -1.1090185642242432,
|
10455 |
+
"logps/chosen": -88.6761245727539,
|
10456 |
+
"logps/rejected": -99.77947235107422,
|
10457 |
+
"loss": 0.4674,
|
10458 |
+
"rewards/accuracies": 0.8125,
|
10459 |
+
"rewards/chosen": -0.2688758969306946,
|
10460 |
+
"rewards/margins": 1.0267162322998047,
|
10461 |
+
"rewards/rejected": -1.295592188835144,
|
10462 |
+
"step": 1366
|
10463 |
+
},
|
10464 |
+
{
|
10465 |
+
"epoch": 1.5843648208469054,
|
10466 |
+
"grad_norm": 95.242652077602,
|
10467 |
+
"learning_rate": 2.2983170847525635e-08,
|
10468 |
+
"logits/chosen": -1.2544560432434082,
|
10469 |
+
"logits/rejected": -1.334200382232666,
|
10470 |
+
"logps/chosen": -132.92308044433594,
|
10471 |
+
"logps/rejected": -153.16336059570312,
|
10472 |
+
"loss": 0.4863,
|
10473 |
+
"rewards/accuracies": 0.78125,
|
10474 |
+
"rewards/chosen": -0.2278953194618225,
|
10475 |
+
"rewards/margins": 0.9455510973930359,
|
10476 |
+
"rewards/rejected": -1.1734462976455688,
|
10477 |
+
"step": 1368
|
10478 |
+
},
|
10479 |
+
{
|
10480 |
+
"epoch": 1.586681143684401,
|
10481 |
+
"grad_norm": 81.42436059360928,
|
10482 |
+
"learning_rate": 2.2737272204777737e-08,
|
10483 |
+
"logits/chosen": -1.0738383531570435,
|
10484 |
+
"logits/rejected": -1.143731951713562,
|
10485 |
+
"logps/chosen": -123.22402954101562,
|
10486 |
+
"logps/rejected": -167.40036010742188,
|
10487 |
+
"loss": 0.4226,
|
10488 |
+
"rewards/accuracies": 0.9375,
|
10489 |
+
"rewards/chosen": -0.27396196126937866,
|
10490 |
+
"rewards/margins": 1.5770013332366943,
|
10491 |
+
"rewards/rejected": -1.8509632349014282,
|
10492 |
+
"step": 1370
|
10493 |
+
},
|
10494 |
+
{
|
10495 |
+
"epoch": 1.5889974665218964,
|
10496 |
+
"grad_norm": 55.49979161745584,
|
10497 |
+
"learning_rate": 2.249252724943336e-08,
|
10498 |
+
"logits/chosen": -1.2299567461013794,
|
10499 |
+
"logits/rejected": -1.2440650463104248,
|
10500 |
+
"logps/chosen": -123.21504974365234,
|
10501 |
+
"logps/rejected": -143.9616241455078,
|
10502 |
+
"loss": 0.4755,
|
10503 |
+
"rewards/accuracies": 0.75,
|
10504 |
+
"rewards/chosen": -0.491690993309021,
|
10505 |
+
"rewards/margins": 0.894086480140686,
|
10506 |
+
"rewards/rejected": -1.385777473449707,
|
10507 |
+
"step": 1372
|
10508 |
+
},
|
10509 |
+
{
|
10510 |
+
"epoch": 1.591313789359392,
|
10511 |
+
"grad_norm": 54.18985696863257,
|
10512 |
+
"learning_rate": 2.2248939636025264e-08,
|
10513 |
+
"logits/chosen": -1.203713297843933,
|
10514 |
+
"logits/rejected": -1.2125518321990967,
|
10515 |
+
"logps/chosen": -176.7722930908203,
|
10516 |
+
"logps/rejected": -228.38929748535156,
|
10517 |
+
"loss": 0.408,
|
10518 |
+
"rewards/accuracies": 0.875,
|
10519 |
+
"rewards/chosen": -0.5514904260635376,
|
10520 |
+
"rewards/margins": 3.2685635089874268,
|
10521 |
+
"rewards/rejected": -3.820053815841675,
|
10522 |
+
"step": 1374
|
10523 |
+
},
|
10524 |
+
{
|
10525 |
+
"epoch": 1.5936301121968874,
|
10526 |
+
"grad_norm": 54.36252764320043,
|
10527 |
+
"learning_rate": 2.200651300180483e-08,
|
10528 |
+
"logits/chosen": -1.2815027236938477,
|
10529 |
+
"logits/rejected": -1.318671703338623,
|
10530 |
+
"logps/chosen": -130.49078369140625,
|
10531 |
+
"logps/rejected": -142.1308135986328,
|
10532 |
+
"loss": 0.3952,
|
10533 |
+
"rewards/accuracies": 0.90625,
|
10534 |
+
"rewards/chosen": -0.029716283082962036,
|
10535 |
+
"rewards/margins": 0.9699075222015381,
|
10536 |
+
"rewards/rejected": -0.999623715877533,
|
10537 |
+
"step": 1376
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 1.5959464350343828,
|
10541 |
+
"grad_norm": 96.21574776555832,
|
10542 |
+
"learning_rate": 2.1765250966687687e-08,
|
10543 |
+
"logits/chosen": -1.3336846828460693,
|
10544 |
+
"logits/rejected": -1.3228704929351807,
|
10545 |
+
"logps/chosen": -176.54737854003906,
|
10546 |
+
"logps/rejected": -209.42909240722656,
|
10547 |
+
"loss": 0.4583,
|
10548 |
+
"rewards/accuracies": 0.71875,
|
10549 |
+
"rewards/chosen": -0.7003276348114014,
|
10550 |
+
"rewards/margins": 1.229347586631775,
|
10551 |
+
"rewards/rejected": -1.9296752214431763,
|
10552 |
+
"step": 1378
|
10553 |
+
},
|
10554 |
+
{
|
10555 |
+
"epoch": 1.5982627578718784,
|
10556 |
+
"grad_norm": 71.93512521949452,
|
10557 |
+
"learning_rate": 2.1525157133199633e-08,
|
10558 |
+
"logits/chosen": -1.08268404006958,
|
10559 |
+
"logits/rejected": -1.1782095432281494,
|
10560 |
+
"logps/chosen": -122.80490112304688,
|
10561 |
+
"logps/rejected": -159.9678955078125,
|
10562 |
+
"loss": 0.4574,
|
10563 |
+
"rewards/accuracies": 0.8125,
|
10564 |
+
"rewards/chosen": -0.40625882148742676,
|
10565 |
+
"rewards/margins": 1.3748208284378052,
|
10566 |
+
"rewards/rejected": -1.7810795307159424,
|
10567 |
+
"step": 1380
|
10568 |
+
},
|
10569 |
+
{
|
10570 |
+
"epoch": 1.6005790807093738,
|
10571 |
+
"grad_norm": 70.14564165621101,
|
10572 |
+
"learning_rate": 2.1286235086422843e-08,
|
10573 |
+
"logits/chosen": -1.3746612071990967,
|
10574 |
+
"logits/rejected": -1.3713853359222412,
|
10575 |
+
"logps/chosen": -178.19586181640625,
|
10576 |
+
"logps/rejected": -212.018310546875,
|
10577 |
+
"loss": 0.3689,
|
10578 |
+
"rewards/accuracies": 0.96875,
|
10579 |
+
"rewards/chosen": -0.2453938126564026,
|
10580 |
+
"rewards/margins": 1.7820340394973755,
|
10581 |
+
"rewards/rejected": -2.027428150177002,
|
10582 |
+
"step": 1382
|
10583 |
+
},
|
10584 |
+
{
|
10585 |
+
"epoch": 1.6028954035468694,
|
10586 |
+
"grad_norm": 53.127835579709945,
|
10587 |
+
"learning_rate": 2.1048488393942455e-08,
|
10588 |
+
"logits/chosen": -1.17882239818573,
|
10589 |
+
"logits/rejected": -1.1624205112457275,
|
10590 |
+
"logps/chosen": -111.90765380859375,
|
10591 |
+
"logps/rejected": -135.91148376464844,
|
10592 |
+
"loss": 0.475,
|
10593 |
+
"rewards/accuracies": 0.78125,
|
10594 |
+
"rewards/chosen": -0.25234535336494446,
|
10595 |
+
"rewards/margins": 1.1534082889556885,
|
10596 |
+
"rewards/rejected": -1.4057536125183105,
|
10597 |
+
"step": 1384
|
10598 |
+
},
|
10599 |
+
{
|
10600 |
+
"epoch": 1.6052117263843648,
|
10601 |
+
"grad_norm": 59.635014558757035,
|
10602 |
+
"learning_rate": 2.0811920605793122e-08,
|
10603 |
+
"logits/chosen": -1.1906554698944092,
|
10604 |
+
"logits/rejected": -1.2808858156204224,
|
10605 |
+
"logps/chosen": -129.28529357910156,
|
10606 |
+
"logps/rejected": -157.04090881347656,
|
10607 |
+
"loss": 0.3961,
|
10608 |
+
"rewards/accuracies": 0.75,
|
10609 |
+
"rewards/chosen": 0.06318804621696472,
|
10610 |
+
"rewards/margins": 1.2911432981491089,
|
10611 |
+
"rewards/rejected": -1.2279552221298218,
|
10612 |
+
"step": 1386
|
10613 |
+
},
|
10614 |
+
{
|
10615 |
+
"epoch": 1.6075280492218602,
|
10616 |
+
"grad_norm": 57.41476924308661,
|
10617 |
+
"learning_rate": 2.0576535254406157e-08,
|
10618 |
+
"logits/chosen": -1.1566798686981201,
|
10619 |
+
"logits/rejected": -1.1978453397750854,
|
10620 |
+
"logps/chosen": -153.1495819091797,
|
10621 |
+
"logps/rejected": -175.12283325195312,
|
10622 |
+
"loss": 0.3893,
|
10623 |
+
"rewards/accuracies": 0.8125,
|
10624 |
+
"rewards/chosen": -0.4101862907409668,
|
10625 |
+
"rewards/margins": 1.2702761888504028,
|
10626 |
+
"rewards/rejected": -1.6804625988006592,
|
10627 |
+
"step": 1388
|
10628 |
+
},
|
10629 |
+
{
|
10630 |
+
"epoch": 1.6098443720593558,
|
10631 |
+
"grad_norm": 46.94707676426246,
|
10632 |
+
"learning_rate": 2.0342335854556736e-08,
|
10633 |
+
"logits/chosen": -1.3085883855819702,
|
10634 |
+
"logits/rejected": -1.2242193222045898,
|
10635 |
+
"logps/chosen": -173.11146545410156,
|
10636 |
+
"logps/rejected": -201.7664031982422,
|
10637 |
+
"loss": 0.3882,
|
10638 |
+
"rewards/accuracies": 0.90625,
|
10639 |
+
"rewards/chosen": -0.6821246147155762,
|
10640 |
+
"rewards/margins": 1.9655760526657104,
|
10641 |
+
"rewards/rejected": -2.6477010250091553,
|
10642 |
+
"step": 1390
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 1.6121606948968512,
|
10646 |
+
"grad_norm": 51.39756905271222,
|
10647 |
+
"learning_rate": 2.0109325903311324e-08,
|
10648 |
+
"logits/chosen": -1.1785892248153687,
|
10649 |
+
"logits/rejected": -1.2349551916122437,
|
10650 |
+
"logps/chosen": -173.98049926757812,
|
10651 |
+
"logps/rejected": -225.72409057617188,
|
10652 |
+
"loss": 0.378,
|
10653 |
+
"rewards/accuracies": 0.875,
|
10654 |
+
"rewards/chosen": -0.16729062795639038,
|
10655 |
+
"rewards/margins": 1.8970539569854736,
|
10656 |
+
"rewards/rejected": -2.064344644546509,
|
10657 |
+
"step": 1392
|
10658 |
+
},
|
10659 |
+
{
|
10660 |
+
"epoch": 1.6144770177343468,
|
10661 |
+
"grad_norm": 75.45520614404798,
|
10662 |
+
"learning_rate": 1.9877508879975557e-08,
|
10663 |
+
"logits/chosen": -1.2368243932724,
|
10664 |
+
"logits/rejected": -1.1864349842071533,
|
10665 |
+
"logps/chosen": -157.86318969726562,
|
10666 |
+
"logps/rejected": -164.7779083251953,
|
10667 |
+
"loss": 0.4102,
|
10668 |
+
"rewards/accuracies": 0.875,
|
10669 |
+
"rewards/chosen": -0.416544646024704,
|
10670 |
+
"rewards/margins": 1.2882412672042847,
|
10671 |
+
"rewards/rejected": -1.7047858238220215,
|
10672 |
+
"step": 1394
|
10673 |
+
},
|
10674 |
+
{
|
10675 |
+
"epoch": 1.6167933405718422,
|
10676 |
+
"grad_norm": 54.69368738030066,
|
10677 |
+
"learning_rate": 1.9646888246042337e-08,
|
10678 |
+
"logits/chosen": -1.2882027626037598,
|
10679 |
+
"logits/rejected": -1.2957350015640259,
|
10680 |
+
"logps/chosen": -168.46693420410156,
|
10681 |
+
"logps/rejected": -184.96092224121094,
|
10682 |
+
"loss": 0.3796,
|
10683 |
+
"rewards/accuracies": 0.71875,
|
10684 |
+
"rewards/chosen": -0.5366595983505249,
|
10685 |
+
"rewards/margins": 1.4451313018798828,
|
10686 |
+
"rewards/rejected": -1.9817910194396973,
|
10687 |
+
"step": 1396
|
10688 |
+
},
|
10689 |
+
{
|
10690 |
+
"epoch": 1.6191096634093376,
|
10691 |
+
"grad_norm": 47.07150629441627,
|
10692 |
+
"learning_rate": 1.941746744513999e-08,
|
10693 |
+
"logits/chosen": -1.2016966342926025,
|
10694 |
+
"logits/rejected": -1.2090229988098145,
|
10695 |
+
"logps/chosen": -119.58673095703125,
|
10696 |
+
"logps/rejected": -141.86593627929688,
|
10697 |
+
"loss": 0.3931,
|
10698 |
+
"rewards/accuracies": 0.875,
|
10699 |
+
"rewards/chosen": -0.3934524655342102,
|
10700 |
+
"rewards/margins": 1.4040158987045288,
|
10701 |
+
"rewards/rejected": -1.7974684238433838,
|
10702 |
+
"step": 1398
|
10703 |
+
},
|
10704 |
+
{
|
10705 |
+
"epoch": 1.6214259862468332,
|
10706 |
+
"grad_norm": 62.032269869178876,
|
10707 |
+
"learning_rate": 1.918924990298091e-08,
|
10708 |
+
"logits/chosen": -1.2298343181610107,
|
10709 |
+
"logits/rejected": -1.233276128768921,
|
10710 |
+
"logps/chosen": -160.66769409179688,
|
10711 |
+
"logps/rejected": -165.26895141601562,
|
10712 |
+
"loss": 0.4199,
|
10713 |
+
"rewards/accuracies": 0.84375,
|
10714 |
+
"rewards/chosen": -0.6389051079750061,
|
10715 |
+
"rewards/margins": 0.8895078897476196,
|
10716 |
+
"rewards/rejected": -1.52841317653656,
|
10717 |
+
"step": 1400
|
10718 |
+
},
|
10719 |
+
{
|
10720 |
+
"epoch": 1.6214259862468332,
|
10721 |
+
"eval_logits/chosen": -1.2133427858352661,
|
10722 |
+
"eval_logits/rejected": -1.2084039449691772,
|
10723 |
+
"eval_logps/chosen": -144.74742126464844,
|
10724 |
+
"eval_logps/rejected": -149.5865478515625,
|
10725 |
+
"eval_loss": 0.6033037304878235,
|
10726 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
10727 |
+
"eval_rewards/chosen": -0.9116251468658447,
|
10728 |
+
"eval_rewards/margins": 0.6516737341880798,
|
10729 |
+
"eval_rewards/rejected": -1.5632988214492798,
|
10730 |
+
"eval_runtime": 24.3113,
|
10731 |
+
"eval_samples_per_second": 4.113,
|
10732 |
+
"eval_steps_per_second": 1.028,
|
10733 |
+
"step": 1400
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 1.6237423090843286,
|
10737 |
+
"grad_norm": 86.9760098456171,
|
10738 |
+
"learning_rate": 1.8962239027310577e-08,
|
10739 |
+
"logits/chosen": -1.1873736381530762,
|
10740 |
+
"logits/rejected": -1.2712754011154175,
|
10741 |
+
"logps/chosen": -150.010498046875,
|
10742 |
+
"logps/rejected": -177.68890380859375,
|
10743 |
+
"loss": 0.4684,
|
10744 |
+
"rewards/accuracies": 0.78125,
|
10745 |
+
"rewards/chosen": -0.3790132403373718,
|
10746 |
+
"rewards/margins": 1.0307780504226685,
|
10747 |
+
"rewards/rejected": -1.409791350364685,
|
10748 |
+
"step": 1402
|
10749 |
+
},
|
10750 |
+
{
|
10751 |
+
"epoch": 1.6260586319218242,
|
10752 |
+
"grad_norm": 54.339526567430624,
|
10753 |
+
"learning_rate": 1.8736438207856377e-08,
|
10754 |
+
"logits/chosen": -1.346308708190918,
|
10755 |
+
"logits/rejected": -1.3275481462478638,
|
10756 |
+
"logps/chosen": -172.08343505859375,
|
10757 |
+
"logps/rejected": -188.81179809570312,
|
10758 |
+
"loss": 0.4124,
|
10759 |
+
"rewards/accuracies": 0.90625,
|
10760 |
+
"rewards/chosen": -0.4836081564426422,
|
10761 |
+
"rewards/margins": 1.5920302867889404,
|
10762 |
+
"rewards/rejected": -2.07563853263855,
|
10763 |
+
"step": 1404
|
10764 |
+
},
|
10765 |
+
{
|
10766 |
+
"epoch": 1.6283749547593196,
|
10767 |
+
"grad_norm": 50.44666894199177,
|
10768 |
+
"learning_rate": 1.851185081627714e-08,
|
10769 |
+
"logits/chosen": -1.313905954360962,
|
10770 |
+
"logits/rejected": -1.3108646869659424,
|
10771 |
+
"logps/chosen": -150.0095977783203,
|
10772 |
+
"logps/rejected": -163.9129180908203,
|
10773 |
+
"loss": 0.4744,
|
10774 |
+
"rewards/accuracies": 0.78125,
|
10775 |
+
"rewards/chosen": -0.6312530040740967,
|
10776 |
+
"rewards/margins": 0.8648273348808289,
|
10777 |
+
"rewards/rejected": -1.4960802793502808,
|
10778 |
+
"step": 1406
|
10779 |
+
},
|
10780 |
+
{
|
10781 |
+
"epoch": 1.630691277596815,
|
10782 |
+
"grad_norm": 63.4120821056028,
|
10783 |
+
"learning_rate": 1.8288480206112877e-08,
|
10784 |
+
"logits/chosen": -1.2368450164794922,
|
10785 |
+
"logits/rejected": -1.222330927848816,
|
10786 |
+
"logps/chosen": -114.20048522949219,
|
10787 |
+
"logps/rejected": -153.55780029296875,
|
10788 |
+
"loss": 0.4453,
|
10789 |
+
"rewards/accuracies": 0.75,
|
10790 |
+
"rewards/chosen": -0.13375352323055267,
|
10791 |
+
"rewards/margins": 1.81991446018219,
|
10792 |
+
"rewards/rejected": -1.9536678791046143,
|
10793 |
+
"step": 1408
|
10794 |
+
},
|
10795 |
+
{
|
10796 |
+
"epoch": 1.6330076004343104,
|
10797 |
+
"grad_norm": 57.27120326939087,
|
10798 |
+
"learning_rate": 1.806632971273454e-08,
|
10799 |
+
"logits/chosen": -1.3121931552886963,
|
10800 |
+
"logits/rejected": -1.3301172256469727,
|
10801 |
+
"logps/chosen": -140.49411010742188,
|
10802 |
+
"logps/rejected": -147.28729248046875,
|
10803 |
+
"loss": 0.4135,
|
10804 |
+
"rewards/accuracies": 0.84375,
|
10805 |
+
"rewards/chosen": -0.3088449537754059,
|
10806 |
+
"rewards/margins": 0.9551953673362732,
|
10807 |
+
"rewards/rejected": -1.264040470123291,
|
10808 |
+
"step": 1410
|
10809 |
+
},
|
10810 |
+
{
|
10811 |
+
"epoch": 1.635323923271806,
|
10812 |
+
"grad_norm": 58.835101317691525,
|
10813 |
+
"learning_rate": 1.7845402653294262e-08,
|
10814 |
+
"logits/chosen": -1.1533010005950928,
|
10815 |
+
"logits/rejected": -1.2099212408065796,
|
10816 |
+
"logps/chosen": -161.35955810546875,
|
10817 |
+
"logps/rejected": -182.40274047851562,
|
10818 |
+
"loss": 0.3941,
|
10819 |
+
"rewards/accuracies": 0.96875,
|
10820 |
+
"rewards/chosen": -0.4202643036842346,
|
10821 |
+
"rewards/margins": 1.3078231811523438,
|
10822 |
+
"rewards/rejected": -1.7280876636505127,
|
10823 |
+
"step": 1412
|
10824 |
+
},
|
10825 |
+
{
|
10826 |
+
"epoch": 1.6376402461093016,
|
10827 |
+
"grad_norm": 62.35716207894577,
|
10828 |
+
"learning_rate": 1.762570232667595e-08,
|
10829 |
+
"logits/chosen": -1.164352297782898,
|
10830 |
+
"logits/rejected": -1.2668792009353638,
|
10831 |
+
"logps/chosen": -121.197021484375,
|
10832 |
+
"logps/rejected": -175.3108367919922,
|
10833 |
+
"loss": 0.4048,
|
10834 |
+
"rewards/accuracies": 0.78125,
|
10835 |
+
"rewards/chosen": -0.2763165235519409,
|
10836 |
+
"rewards/margins": 1.5692354440689087,
|
10837 |
+
"rewards/rejected": -1.8455519676208496,
|
10838 |
+
"step": 1414
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 1.639956568946797,
|
10842 |
+
"grad_norm": 72.03828023987269,
|
10843 |
+
"learning_rate": 1.7407232013445893e-08,
|
10844 |
+
"logits/chosen": -1.120769739151001,
|
10845 |
+
"logits/rejected": -1.203442096710205,
|
10846 |
+
"logps/chosen": -147.95274353027344,
|
10847 |
+
"logps/rejected": -192.7701873779297,
|
10848 |
+
"loss": 0.4188,
|
10849 |
+
"rewards/accuracies": 0.9375,
|
10850 |
+
"rewards/chosen": -0.37782442569732666,
|
10851 |
+
"rewards/margins": 1.473639965057373,
|
10852 |
+
"rewards/rejected": -1.8514643907546997,
|
10853 |
+
"step": 1416
|
10854 |
+
},
|
10855 |
+
{
|
10856 |
+
"epoch": 1.6422728917842924,
|
10857 |
+
"grad_norm": 66.4702023164338,
|
10858 |
+
"learning_rate": 1.7189994975803758e-08,
|
10859 |
+
"logits/chosen": -1.1487022638320923,
|
10860 |
+
"logits/rejected": -1.2365858554840088,
|
10861 |
+
"logps/chosen": -117.31167602539062,
|
10862 |
+
"logps/rejected": -146.18536376953125,
|
10863 |
+
"loss": 0.4393,
|
10864 |
+
"rewards/accuracies": 0.78125,
|
10865 |
+
"rewards/chosen": -0.3540950417518616,
|
10866 |
+
"rewards/margins": 0.7364134192466736,
|
10867 |
+
"rewards/rejected": -1.0905084609985352,
|
10868 |
+
"step": 1418
|
10869 |
+
},
|
10870 |
+
{
|
10871 |
+
"epoch": 1.6445892146217878,
|
10872 |
+
"grad_norm": 78.71556855680205,
|
10873 |
+
"learning_rate": 1.6973994457534023e-08,
|
10874 |
+
"logits/chosen": -1.2775179147720337,
|
10875 |
+
"logits/rejected": -1.3115909099578857,
|
10876 |
+
"logps/chosen": -164.73760986328125,
|
10877 |
+
"logps/rejected": -190.9739227294922,
|
10878 |
+
"loss": 0.506,
|
10879 |
+
"rewards/accuracies": 0.8125,
|
10880 |
+
"rewards/chosen": -0.6548792123794556,
|
10881 |
+
"rewards/margins": 1.3077069520950317,
|
10882 |
+
"rewards/rejected": -1.9625860452651978,
|
10883 |
+
"step": 1420
|
10884 |
+
},
|
10885 |
+
{
|
10886 |
+
"epoch": 1.6469055374592834,
|
10887 |
+
"grad_norm": 62.03650354576015,
|
10888 |
+
"learning_rate": 1.6759233683957396e-08,
|
10889 |
+
"logits/chosen": -1.280670166015625,
|
10890 |
+
"logits/rejected": -1.270959734916687,
|
10891 |
+
"logps/chosen": -174.10838317871094,
|
10892 |
+
"logps/rejected": -190.41744995117188,
|
10893 |
+
"loss": 0.417,
|
10894 |
+
"rewards/accuracies": 0.90625,
|
10895 |
+
"rewards/chosen": -0.3348293602466583,
|
10896 |
+
"rewards/margins": 1.4488041400909424,
|
10897 |
+
"rewards/rejected": -1.7836335897445679,
|
10898 |
+
"step": 1422
|
10899 |
+
},
|
10900 |
+
{
|
10901 |
+
"epoch": 1.649221860296779,
|
10902 |
+
"grad_norm": 57.86020225721166,
|
10903 |
+
"learning_rate": 1.6545715861882702e-08,
|
10904 |
+
"logits/chosen": -1.101415753364563,
|
10905 |
+
"logits/rejected": -1.1186178922653198,
|
10906 |
+
"logps/chosen": -142.97349548339844,
|
10907 |
+
"logps/rejected": -186.55322265625,
|
10908 |
+
"loss": 0.4078,
|
10909 |
+
"rewards/accuracies": 0.84375,
|
10910 |
+
"rewards/chosen": -0.5462976098060608,
|
10911 |
+
"rewards/margins": 2.0124125480651855,
|
10912 |
+
"rewards/rejected": -2.5587100982666016,
|
10913 |
+
"step": 1424
|
10914 |
+
},
|
10915 |
+
{
|
10916 |
+
"epoch": 1.6515381831342744,
|
10917 |
+
"grad_norm": 48.695250546687646,
|
10918 |
+
"learning_rate": 1.6333444179559074e-08,
|
10919 |
+
"logits/chosen": -1.2034971714019775,
|
10920 |
+
"logits/rejected": -1.2503241300582886,
|
10921 |
+
"logps/chosen": -169.5352020263672,
|
10922 |
+
"logps/rejected": -214.4050750732422,
|
10923 |
+
"loss": 0.4054,
|
10924 |
+
"rewards/accuracies": 0.875,
|
10925 |
+
"rewards/chosen": -0.5561625361442566,
|
10926 |
+
"rewards/margins": 2.379465341567993,
|
10927 |
+
"rewards/rejected": -2.9356279373168945,
|
10928 |
+
"step": 1426
|
10929 |
+
},
|
10930 |
+
{
|
10931 |
+
"epoch": 1.6538545059717698,
|
10932 |
+
"grad_norm": 58.54466931855239,
|
10933 |
+
"learning_rate": 1.6122421806628207e-08,
|
10934 |
+
"logits/chosen": -1.2484573125839233,
|
10935 |
+
"logits/rejected": -1.298370599746704,
|
10936 |
+
"logps/chosen": -224.2542266845703,
|
10937 |
+
"logps/rejected": -239.31918334960938,
|
10938 |
+
"loss": 0.3847,
|
10939 |
+
"rewards/accuracies": 0.84375,
|
10940 |
+
"rewards/chosen": -0.5975647568702698,
|
10941 |
+
"rewards/margins": 2.2055599689483643,
|
10942 |
+
"rewards/rejected": -2.8031251430511475,
|
10943 |
+
"step": 1428
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 1.6561708288092651,
|
10947 |
+
"grad_norm": 59.92256675039104,
|
10948 |
+
"learning_rate": 1.5912651894077167e-08,
|
10949 |
+
"logits/chosen": -1.2150397300720215,
|
10950 |
+
"logits/rejected": -1.1968854665756226,
|
10951 |
+
"logps/chosen": -150.77381896972656,
|
10952 |
+
"logps/rejected": -189.05838012695312,
|
10953 |
+
"loss": 0.4165,
|
10954 |
+
"rewards/accuracies": 0.9375,
|
10955 |
+
"rewards/chosen": -0.49205654859542847,
|
10956 |
+
"rewards/margins": 2.0640861988067627,
|
10957 |
+
"rewards/rejected": -2.556142807006836,
|
10958 |
+
"step": 1430
|
10959 |
+
},
|
10960 |
+
{
|
10961 |
+
"epoch": 1.6584871516467607,
|
10962 |
+
"grad_norm": 59.42932647345947,
|
10963 |
+
"learning_rate": 1.57041375741912e-08,
|
10964 |
+
"logits/chosen": -1.196043610572815,
|
10965 |
+
"logits/rejected": -1.2252073287963867,
|
10966 |
+
"logps/chosen": -145.3686065673828,
|
10967 |
+
"logps/rejected": -144.02952575683594,
|
10968 |
+
"loss": 0.389,
|
10969 |
+
"rewards/accuracies": 0.8125,
|
10970 |
+
"rewards/chosen": -0.924052894115448,
|
10971 |
+
"rewards/margins": 0.9492592215538025,
|
10972 |
+
"rewards/rejected": -1.8733121156692505,
|
10973 |
+
"step": 1432
|
10974 |
+
},
|
10975 |
+
{
|
10976 |
+
"epoch": 1.6608034744842564,
|
10977 |
+
"grad_norm": 59.90616287675461,
|
10978 |
+
"learning_rate": 1.5496881960507124e-08,
|
10979 |
+
"logits/chosen": -1.1822700500488281,
|
10980 |
+
"logits/rejected": -1.1750373840332031,
|
10981 |
+
"logps/chosen": -134.07861328125,
|
10982 |
+
"logps/rejected": -201.8026123046875,
|
10983 |
+
"loss": 0.382,
|
10984 |
+
"rewards/accuracies": 0.875,
|
10985 |
+
"rewards/chosen": -0.24189743399620056,
|
10986 |
+
"rewards/margins": 2.8620426654815674,
|
10987 |
+
"rewards/rejected": -3.103940010070801,
|
10988 |
+
"step": 1434
|
10989 |
+
},
|
10990 |
+
{
|
10991 |
+
"epoch": 1.6631197973217517,
|
10992 |
+
"grad_norm": 57.85512148826699,
|
10993 |
+
"learning_rate": 1.529088814776668e-08,
|
10994 |
+
"logits/chosen": -1.2793397903442383,
|
10995 |
+
"logits/rejected": -1.2699894905090332,
|
10996 |
+
"logps/chosen": -182.90478515625,
|
10997 |
+
"logps/rejected": -223.45468139648438,
|
10998 |
+
"loss": 0.3792,
|
10999 |
+
"rewards/accuracies": 0.78125,
|
11000 |
+
"rewards/chosen": -0.6292093992233276,
|
11001 |
+
"rewards/margins": 1.7043105363845825,
|
11002 |
+
"rewards/rejected": -2.33351993560791,
|
11003 |
+
"step": 1436
|
11004 |
+
},
|
11005 |
+
{
|
11006 |
+
"epoch": 1.6654361201592471,
|
11007 |
+
"grad_norm": 80.74607876180737,
|
11008 |
+
"learning_rate": 1.508615921187044e-08,
|
11009 |
+
"logits/chosen": -1.049709677696228,
|
11010 |
+
"logits/rejected": -1.1322901248931885,
|
11011 |
+
"logps/chosen": -109.7282943725586,
|
11012 |
+
"logps/rejected": -159.00778198242188,
|
11013 |
+
"loss": 0.3868,
|
11014 |
+
"rewards/accuracies": 0.90625,
|
11015 |
+
"rewards/chosen": -0.34554052352905273,
|
11016 |
+
"rewards/margins": 1.7658944129943848,
|
11017 |
+
"rewards/rejected": -2.1114349365234375,
|
11018 |
+
"step": 1438
|
11019 |
+
},
|
11020 |
+
{
|
11021 |
+
"epoch": 1.6677524429967425,
|
11022 |
+
"grad_norm": 66.72584214795384,
|
11023 |
+
"learning_rate": 1.4882698209831779e-08,
|
11024 |
+
"logits/chosen": -1.1424648761749268,
|
11025 |
+
"logits/rejected": -1.1974608898162842,
|
11026 |
+
"logps/chosen": -97.99867248535156,
|
11027 |
+
"logps/rejected": -134.96340942382812,
|
11028 |
+
"loss": 0.4193,
|
11029 |
+
"rewards/accuracies": 0.71875,
|
11030 |
+
"rewards/chosen": -0.35685765743255615,
|
11031 |
+
"rewards/margins": 1.5133432149887085,
|
11032 |
+
"rewards/rejected": -1.8702008724212646,
|
11033 |
+
"step": 1440
|
11034 |
+
},
|
11035 |
+
{
|
11036 |
+
"epoch": 1.6700687658342381,
|
11037 |
+
"grad_norm": 78.22654470786401,
|
11038 |
+
"learning_rate": 1.4680508179731343e-08,
|
11039 |
+
"logits/chosen": -1.2903565168380737,
|
11040 |
+
"logits/rejected": -1.3427797555923462,
|
11041 |
+
"logps/chosen": -164.58184814453125,
|
11042 |
+
"logps/rejected": -196.1396484375,
|
11043 |
+
"loss": 0.4565,
|
11044 |
+
"rewards/accuracies": 0.8125,
|
11045 |
+
"rewards/chosen": -0.2896607220172882,
|
11046 |
+
"rewards/margins": 1.235891580581665,
|
11047 |
+
"rewards/rejected": -1.5255522727966309,
|
11048 |
+
"step": 1442
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 1.6723850886717337,
|
11052 |
+
"grad_norm": 64.64622163037477,
|
11053 |
+
"learning_rate": 1.4479592140671548e-08,
|
11054 |
+
"logits/chosen": -1.3046828508377075,
|
11055 |
+
"logits/rejected": -1.2628744840621948,
|
11056 |
+
"logps/chosen": -194.83673095703125,
|
11057 |
+
"logps/rejected": -228.6671905517578,
|
11058 |
+
"loss": 0.4481,
|
11059 |
+
"rewards/accuracies": 0.78125,
|
11060 |
+
"rewards/chosen": -0.3258281648159027,
|
11061 |
+
"rewards/margins": 1.5874714851379395,
|
11062 |
+
"rewards/rejected": -1.913299560546875,
|
11063 |
+
"step": 1444
|
11064 |
+
},
|
11065 |
+
{
|
11066 |
+
"epoch": 1.6747014115092291,
|
11067 |
+
"grad_norm": 54.707446026319836,
|
11068 |
+
"learning_rate": 1.4279953092731633e-08,
|
11069 |
+
"logits/chosen": -1.3320201635360718,
|
11070 |
+
"logits/rejected": -1.3739019632339478,
|
11071 |
+
"logps/chosen": -184.18093872070312,
|
11072 |
+
"logps/rejected": -212.85275268554688,
|
11073 |
+
"loss": 0.3619,
|
11074 |
+
"rewards/accuracies": 0.90625,
|
11075 |
+
"rewards/chosen": -0.21321099996566772,
|
11076 |
+
"rewards/margins": 1.5299046039581299,
|
11077 |
+
"rewards/rejected": -1.7431155443191528,
|
11078 |
+
"step": 1446
|
11079 |
+
},
|
11080 |
+
{
|
11081 |
+
"epoch": 1.6770177343467245,
|
11082 |
+
"grad_norm": 82.01457178127649,
|
11083 |
+
"learning_rate": 1.4081594016922772e-08,
|
11084 |
+
"logits/chosen": -1.3037813901901245,
|
11085 |
+
"logits/rejected": -1.2591630220413208,
|
11086 |
+
"logps/chosen": -190.23208618164062,
|
11087 |
+
"logps/rejected": -201.10235595703125,
|
11088 |
+
"loss": 0.4678,
|
11089 |
+
"rewards/accuracies": 0.6875,
|
11090 |
+
"rewards/chosen": -0.4632962942123413,
|
11091 |
+
"rewards/margins": 0.94648677110672,
|
11092 |
+
"rewards/rejected": -1.409783124923706,
|
11093 |
+
"step": 1448
|
11094 |
+
},
|
11095 |
+
{
|
11096 |
+
"epoch": 1.67933405718422,
|
11097 |
+
"grad_norm": 55.804214360937905,
|
11098 |
+
"learning_rate": 1.3884517875143542e-08,
|
11099 |
+
"logits/chosen": -1.2149088382720947,
|
11100 |
+
"logits/rejected": -1.2506321668624878,
|
11101 |
+
"logps/chosen": -134.70565795898438,
|
11102 |
+
"logps/rejected": -170.83775329589844,
|
11103 |
+
"loss": 0.4304,
|
11104 |
+
"rewards/accuracies": 0.84375,
|
11105 |
+
"rewards/chosen": -0.13964088261127472,
|
11106 |
+
"rewards/margins": 1.4484151601791382,
|
11107 |
+
"rewards/rejected": -1.588055968284607,
|
11108 |
+
"step": 1450
|
11109 |
+
},
|
11110 |
+
{
|
11111 |
+
"epoch": 1.6816503800217155,
|
11112 |
+
"grad_norm": 45.37776187937181,
|
11113 |
+
"learning_rate": 1.3688727610135841e-08,
|
11114 |
+
"logits/chosen": -1.182690978050232,
|
11115 |
+
"logits/rejected": -1.128598928451538,
|
11116 |
+
"logps/chosen": -166.2299041748047,
|
11117 |
+
"logps/rejected": -189.45960998535156,
|
11118 |
+
"loss": 0.3818,
|
11119 |
+
"rewards/accuracies": 0.90625,
|
11120 |
+
"rewards/chosen": -0.45779427886009216,
|
11121 |
+
"rewards/margins": 1.7794036865234375,
|
11122 |
+
"rewards/rejected": -2.2371981143951416,
|
11123 |
+
"step": 1452
|
11124 |
+
},
|
11125 |
+
{
|
11126 |
+
"epoch": 1.6839667028592111,
|
11127 |
+
"grad_norm": 52.071114969595605,
|
11128 |
+
"learning_rate": 1.3494226145440767e-08,
|
11129 |
+
"logits/chosen": -1.129225492477417,
|
11130 |
+
"logits/rejected": -1.1527059078216553,
|
11131 |
+
"logps/chosen": -131.0524139404297,
|
11132 |
+
"logps/rejected": -152.54283142089844,
|
11133 |
+
"loss": 0.3594,
|
11134 |
+
"rewards/accuracies": 0.84375,
|
11135 |
+
"rewards/chosen": -0.4113074541091919,
|
11136 |
+
"rewards/margins": 1.4839246273040771,
|
11137 |
+
"rewards/rejected": -1.8952319622039795,
|
11138 |
+
"step": 1454
|
11139 |
+
},
|
11140 |
+
{
|
11141 |
+
"epoch": 1.6862830256967065,
|
11142 |
+
"grad_norm": 47.42528149711889,
|
11143 |
+
"learning_rate": 1.3301016385355091e-08,
|
11144 |
+
"logits/chosen": -1.2042808532714844,
|
11145 |
+
"logits/rejected": -1.194934368133545,
|
11146 |
+
"logps/chosen": -156.91098022460938,
|
11147 |
+
"logps/rejected": -188.40354919433594,
|
11148 |
+
"loss": 0.3751,
|
11149 |
+
"rewards/accuracies": 0.84375,
|
11150 |
+
"rewards/chosen": -0.7726644277572632,
|
11151 |
+
"rewards/margins": 1.8851245641708374,
|
11152 |
+
"rewards/rejected": -2.6577892303466797,
|
11153 |
+
"step": 1456
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 1.688599348534202,
|
11157 |
+
"grad_norm": 61.34540856220831,
|
11158 |
+
"learning_rate": 1.3109101214887864e-08,
|
11159 |
+
"logits/chosen": -1.2197272777557373,
|
11160 |
+
"logits/rejected": -1.1938438415527344,
|
11161 |
+
"logps/chosen": -133.5988311767578,
|
11162 |
+
"logps/rejected": -192.86639404296875,
|
11163 |
+
"loss": 0.41,
|
11164 |
+
"rewards/accuracies": 0.875,
|
11165 |
+
"rewards/chosen": -0.4758758246898651,
|
11166 |
+
"rewards/margins": 3.0053532123565674,
|
11167 |
+
"rewards/rejected": -3.4812285900115967,
|
11168 |
+
"step": 1458
|
11169 |
+
},
|
11170 |
+
{
|
11171 |
+
"epoch": 1.6909156713716973,
|
11172 |
+
"grad_norm": 54.8216979898567,
|
11173 |
+
"learning_rate": 1.2918483499717237e-08,
|
11174 |
+
"logits/chosen": -1.1394593715667725,
|
11175 |
+
"logits/rejected": -1.1835401058197021,
|
11176 |
+
"logps/chosen": -122.25736999511719,
|
11177 |
+
"logps/rejected": -133.27230834960938,
|
11178 |
+
"loss": 0.3597,
|
11179 |
+
"rewards/accuracies": 0.90625,
|
11180 |
+
"rewards/chosen": -0.4267653822898865,
|
11181 |
+
"rewards/margins": 1.0661195516586304,
|
11182 |
+
"rewards/rejected": -1.4928849935531616,
|
11183 |
+
"step": 1460
|
11184 |
+
},
|
11185 |
+
{
|
11186 |
+
"epoch": 1.693231994209193,
|
11187 |
+
"grad_norm": 62.86307508634661,
|
11188 |
+
"learning_rate": 1.2729166086147803e-08,
|
11189 |
+
"logits/chosen": -1.2270219326019287,
|
11190 |
+
"logits/rejected": -1.2750426530838013,
|
11191 |
+
"logps/chosen": -128.8585968017578,
|
11192 |
+
"logps/rejected": -152.87161254882812,
|
11193 |
+
"loss": 0.4384,
|
11194 |
+
"rewards/accuracies": 0.84375,
|
11195 |
+
"rewards/chosen": -0.016907572746276855,
|
11196 |
+
"rewards/margins": 1.5406163930892944,
|
11197 |
+
"rewards/rejected": -1.5575240850448608,
|
11198 |
+
"step": 1462
|
11199 |
+
},
|
11200 |
+
{
|
11201 |
+
"epoch": 1.6955483170466885,
|
11202 |
+
"grad_norm": 87.68154889445682,
|
11203 |
+
"learning_rate": 1.2541151801068072e-08,
|
11204 |
+
"logits/chosen": -1.1000306606292725,
|
11205 |
+
"logits/rejected": -1.1639959812164307,
|
11206 |
+
"logps/chosen": -173.75588989257812,
|
11207 |
+
"logps/rejected": -201.65650939941406,
|
11208 |
+
"loss": 0.4866,
|
11209 |
+
"rewards/accuracies": 0.78125,
|
11210 |
+
"rewards/chosen": -1.0096687078475952,
|
11211 |
+
"rewards/margins": 1.2543641328811646,
|
11212 |
+
"rewards/rejected": -2.2640328407287598,
|
11213 |
+
"step": 1464
|
11214 |
+
},
|
11215 |
+
{
|
11216 |
+
"epoch": 1.697864639884184,
|
11217 |
+
"grad_norm": 52.24912842261691,
|
11218 |
+
"learning_rate": 1.2354443451908202e-08,
|
11219 |
+
"logits/chosen": -1.2294087409973145,
|
11220 |
+
"logits/rejected": -1.2400881052017212,
|
11221 |
+
"logps/chosen": -153.24412536621094,
|
11222 |
+
"logps/rejected": -166.8737030029297,
|
11223 |
+
"loss": 0.3901,
|
11224 |
+
"rewards/accuracies": 0.78125,
|
11225 |
+
"rewards/chosen": -0.4041752815246582,
|
11226 |
+
"rewards/margins": 1.1100343465805054,
|
11227 |
+
"rewards/rejected": -1.5142096281051636,
|
11228 |
+
"step": 1466
|
11229 |
+
},
|
11230 |
+
{
|
11231 |
+
"epoch": 1.7001809627216793,
|
11232 |
+
"grad_norm": 59.60290932317476,
|
11233 |
+
"learning_rate": 1.2169043826598057e-08,
|
11234 |
+
"logits/chosen": -1.1557788848876953,
|
11235 |
+
"logits/rejected": -1.2175214290618896,
|
11236 |
+
"logps/chosen": -141.86160278320312,
|
11237 |
+
"logps/rejected": -166.4970245361328,
|
11238 |
+
"loss": 0.4357,
|
11239 |
+
"rewards/accuracies": 0.8125,
|
11240 |
+
"rewards/chosen": -0.4277328848838806,
|
11241 |
+
"rewards/margins": 1.5622481107711792,
|
11242 |
+
"rewards/rejected": -1.989980936050415,
|
11243 |
+
"step": 1468
|
11244 |
+
},
|
11245 |
+
{
|
11246 |
+
"epoch": 1.7024972855591747,
|
11247 |
+
"grad_norm": 56.1236013000123,
|
11248 |
+
"learning_rate": 1.1984955693525788e-08,
|
11249 |
+
"logits/chosen": -1.224461317062378,
|
11250 |
+
"logits/rejected": -1.1498862504959106,
|
11251 |
+
"logps/chosen": -118.77024841308594,
|
11252 |
+
"logps/rejected": -134.83253479003906,
|
11253 |
+
"loss": 0.4148,
|
11254 |
+
"rewards/accuracies": 0.9375,
|
11255 |
+
"rewards/chosen": -0.11248160153627396,
|
11256 |
+
"rewards/margins": 1.6419700384140015,
|
11257 |
+
"rewards/rejected": -1.7544519901275635,
|
11258 |
+
"step": 1470
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 1.7048136083966703,
|
11262 |
+
"grad_norm": 74.87499789321072,
|
11263 |
+
"learning_rate": 1.180218180149617e-08,
|
11264 |
+
"logits/chosen": -1.3031408786773682,
|
11265 |
+
"logits/rejected": -1.2794002294540405,
|
11266 |
+
"logps/chosen": -166.36143493652344,
|
11267 |
+
"logps/rejected": -182.16787719726562,
|
11268 |
+
"loss": 0.4709,
|
11269 |
+
"rewards/accuracies": 0.75,
|
11270 |
+
"rewards/chosen": -0.30539318919181824,
|
11271 |
+
"rewards/margins": 1.1033700704574585,
|
11272 |
+
"rewards/rejected": -1.4087631702423096,
|
11273 |
+
"step": 1472
|
11274 |
+
},
|
11275 |
+
{
|
11276 |
+
"epoch": 1.707129931234166,
|
11277 |
+
"grad_norm": 55.679405729529364,
|
11278 |
+
"learning_rate": 1.1620724879689791e-08,
|
11279 |
+
"logits/chosen": -1.2027887105941772,
|
11280 |
+
"logits/rejected": -1.2204790115356445,
|
11281 |
+
"logps/chosen": -131.13787841796875,
|
11282 |
+
"logps/rejected": -150.597900390625,
|
11283 |
+
"loss": 0.3978,
|
11284 |
+
"rewards/accuracies": 0.71875,
|
11285 |
+
"rewards/chosen": -0.2811795473098755,
|
11286 |
+
"rewards/margins": 0.9655717015266418,
|
11287 |
+
"rewards/rejected": -1.2467511892318726,
|
11288 |
+
"step": 1474
|
11289 |
+
},
|
11290 |
+
{
|
11291 |
+
"epoch": 1.7094462540716613,
|
11292 |
+
"grad_norm": 59.35657673584767,
|
11293 |
+
"learning_rate": 1.1440587637622256e-08,
|
11294 |
+
"logits/chosen": -1.2350002527236938,
|
11295 |
+
"logits/rejected": -1.3663004636764526,
|
11296 |
+
"logps/chosen": -94.2022705078125,
|
11297 |
+
"logps/rejected": -113.4736099243164,
|
11298 |
+
"loss": 0.4273,
|
11299 |
+
"rewards/accuracies": 0.8125,
|
11300 |
+
"rewards/chosen": -0.37293869256973267,
|
11301 |
+
"rewards/margins": 0.5190171599388123,
|
11302 |
+
"rewards/rejected": -0.8919559121131897,
|
11303 |
+
"step": 1476
|
11304 |
+
},
|
11305 |
+
{
|
11306 |
+
"epoch": 1.7117625769091567,
|
11307 |
+
"grad_norm": 58.75339969893179,
|
11308 |
+
"learning_rate": 1.1261772765103682e-08,
|
11309 |
+
"logits/chosen": -1.2917990684509277,
|
11310 |
+
"logits/rejected": -1.3643466234207153,
|
11311 |
+
"logps/chosen": -115.38711547851562,
|
11312 |
+
"logps/rejected": -147.71717834472656,
|
11313 |
+
"loss": 0.4022,
|
11314 |
+
"rewards/accuracies": 0.875,
|
11315 |
+
"rewards/chosen": -0.19546058773994446,
|
11316 |
+
"rewards/margins": 1.1290830373764038,
|
11317 |
+
"rewards/rejected": -1.3245434761047363,
|
11318 |
+
"step": 1478
|
11319 |
+
},
|
11320 |
+
{
|
11321 |
+
"epoch": 1.714078899746652,
|
11322 |
+
"grad_norm": 65.37238691562818,
|
11323 |
+
"learning_rate": 1.108428293219854e-08,
|
11324 |
+
"logits/chosen": -1.2615394592285156,
|
11325 |
+
"logits/rejected": -1.2974615097045898,
|
11326 |
+
"logps/chosen": -134.23512268066406,
|
11327 |
+
"logps/rejected": -218.10336303710938,
|
11328 |
+
"loss": 0.414,
|
11329 |
+
"rewards/accuracies": 0.90625,
|
11330 |
+
"rewards/chosen": -0.6633899807929993,
|
11331 |
+
"rewards/margins": 2.063319206237793,
|
11332 |
+
"rewards/rejected": -2.7267091274261475,
|
11333 |
+
"step": 1480
|
11334 |
+
},
|
11335 |
+
{
|
11336 |
+
"epoch": 1.7163952225841477,
|
11337 |
+
"grad_norm": 46.08904659289323,
|
11338 |
+
"learning_rate": 1.0908120789185837e-08,
|
11339 |
+
"logits/chosen": -1.1299887895584106,
|
11340 |
+
"logits/rejected": -1.0999367237091064,
|
11341 |
+
"logps/chosen": -147.2964324951172,
|
11342 |
+
"logps/rejected": -166.0971221923828,
|
11343 |
+
"loss": 0.3567,
|
11344 |
+
"rewards/accuracies": 0.84375,
|
11345 |
+
"rewards/chosen": -0.3920423686504364,
|
11346 |
+
"rewards/margins": 1.2661586999893188,
|
11347 |
+
"rewards/rejected": -1.658200979232788,
|
11348 |
+
"step": 1482
|
11349 |
+
},
|
11350 |
+
{
|
11351 |
+
"epoch": 1.7187115454216433,
|
11352 |
+
"grad_norm": 62.032458754200285,
|
11353 |
+
"learning_rate": 1.0733288966519516e-08,
|
11354 |
+
"logits/chosen": -1.258570909500122,
|
11355 |
+
"logits/rejected": -1.253230333328247,
|
11356 |
+
"logps/chosen": -97.19083404541016,
|
11357 |
+
"logps/rejected": -109.28732299804688,
|
11358 |
+
"loss": 0.4554,
|
11359 |
+
"rewards/accuracies": 0.75,
|
11360 |
+
"rewards/chosen": 0.02817530930042267,
|
11361 |
+
"rewards/margins": 0.9206870794296265,
|
11362 |
+
"rewards/rejected": -0.8925117254257202,
|
11363 |
+
"step": 1484
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 1.7210278682591387,
|
11367 |
+
"grad_norm": 75.38001959898925,
|
11368 |
+
"learning_rate": 1.0559790074789132e-08,
|
11369 |
+
"logits/chosen": -1.2418440580368042,
|
11370 |
+
"logits/rejected": -1.3091630935668945,
|
11371 |
+
"logps/chosen": -140.07681274414062,
|
11372 |
+
"logps/rejected": -164.11033630371094,
|
11373 |
+
"loss": 0.4497,
|
11374 |
+
"rewards/accuracies": 0.84375,
|
11375 |
+
"rewards/chosen": -0.10434143245220184,
|
11376 |
+
"rewards/margins": 1.5909518003463745,
|
11377 |
+
"rewards/rejected": -1.6952931880950928,
|
11378 |
+
"step": 1486
|
11379 |
+
},
|
11380 |
+
{
|
11381 |
+
"epoch": 1.723344191096634,
|
11382 |
+
"grad_norm": 60.00004527174649,
|
11383 |
+
"learning_rate": 1.0387626704680896e-08,
|
11384 |
+
"logits/chosen": -1.1610029935836792,
|
11385 |
+
"logits/rejected": -1.1764705181121826,
|
11386 |
+
"logps/chosen": -153.56924438476562,
|
11387 |
+
"logps/rejected": -186.0706329345703,
|
11388 |
+
"loss": 0.4105,
|
11389 |
+
"rewards/accuracies": 0.875,
|
11390 |
+
"rewards/chosen": -0.09610500931739807,
|
11391 |
+
"rewards/margins": 1.3976409435272217,
|
11392 |
+
"rewards/rejected": -1.4937461614608765,
|
11393 |
+
"step": 1488
|
11394 |
+
},
|
11395 |
+
{
|
11396 |
+
"epoch": 1.7256605139341294,
|
11397 |
+
"grad_norm": 54.45806934963145,
|
11398 |
+
"learning_rate": 1.0216801426939093e-08,
|
11399 |
+
"logits/chosen": -1.230536699295044,
|
11400 |
+
"logits/rejected": -1.2105457782745361,
|
11401 |
+
"logps/chosen": -138.13816833496094,
|
11402 |
+
"logps/rejected": -160.49887084960938,
|
11403 |
+
"loss": 0.451,
|
11404 |
+
"rewards/accuracies": 0.8125,
|
11405 |
+
"rewards/chosen": -0.30948516726493835,
|
11406 |
+
"rewards/margins": 1.360965371131897,
|
11407 |
+
"rewards/rejected": -1.6704505681991577,
|
11408 |
+
"step": 1490
|
11409 |
+
},
|
11410 |
+
{
|
11411 |
+
"epoch": 1.727976836771625,
|
11412 |
+
"grad_norm": 82.82636273947331,
|
11413 |
+
"learning_rate": 1.0047316792327498e-08,
|
11414 |
+
"logits/chosen": -1.2326574325561523,
|
11415 |
+
"logits/rejected": -1.3276634216308594,
|
11416 |
+
"logps/chosen": -173.7947235107422,
|
11417 |
+
"logps/rejected": -209.5412139892578,
|
11418 |
+
"loss": 0.3827,
|
11419 |
+
"rewards/accuracies": 0.875,
|
11420 |
+
"rewards/chosen": -0.604800820350647,
|
11421 |
+
"rewards/margins": 1.53623366355896,
|
11422 |
+
"rewards/rejected": -2.1410343647003174,
|
11423 |
+
"step": 1492
|
11424 |
+
},
|
11425 |
+
{
|
11426 |
+
"epoch": 1.7302931596091207,
|
11427 |
+
"grad_norm": 69.61022757488912,
|
11428 |
+
"learning_rate": 9.879175331591472e-09,
|
11429 |
+
"logits/chosen": -1.1758224964141846,
|
11430 |
+
"logits/rejected": -1.201228380203247,
|
11431 |
+
"logps/chosen": -147.6432342529297,
|
11432 |
+
"logps/rejected": -187.960693359375,
|
11433 |
+
"loss": 0.3883,
|
11434 |
+
"rewards/accuracies": 0.78125,
|
11435 |
+
"rewards/chosen": -0.6493701934814453,
|
11436 |
+
"rewards/margins": 1.607360601425171,
|
11437 |
+
"rewards/rejected": -2.256730794906616,
|
11438 |
+
"step": 1494
|
11439 |
+
},
|
11440 |
+
{
|
11441 |
+
"epoch": 1.732609482446616,
|
11442 |
+
"grad_norm": 46.06958168770852,
|
11443 |
+
"learning_rate": 9.712379555420092e-09,
|
11444 |
+
"logits/chosen": -1.1795316934585571,
|
11445 |
+
"logits/rejected": -1.285264492034912,
|
11446 |
+
"logps/chosen": -119.38116455078125,
|
11447 |
+
"logps/rejected": -151.4436798095703,
|
11448 |
+
"loss": 0.403,
|
11449 |
+
"rewards/accuracies": 0.8125,
|
11450 |
+
"rewards/chosen": -0.10696488618850708,
|
11451 |
+
"rewards/margins": 1.2390841245651245,
|
11452 |
+
"rewards/rejected": -1.3460490703582764,
|
11453 |
+
"step": 1496
|
11454 |
+
},
|
11455 |
+
{
|
11456 |
+
"epoch": 1.7349258052841114,
|
11457 |
+
"grad_norm": 57.95589326263925,
|
11458 |
+
"learning_rate": 9.546931954408621e-09,
|
11459 |
+
"logits/chosen": -1.1415185928344727,
|
11460 |
+
"logits/rejected": -1.1725091934204102,
|
11461 |
+
"logps/chosen": -115.87825775146484,
|
11462 |
+
"logps/rejected": -149.04075622558594,
|
11463 |
+
"loss": 0.5099,
|
11464 |
+
"rewards/accuracies": 0.875,
|
11465 |
+
"rewards/chosen": -0.27941668033599854,
|
11466 |
+
"rewards/margins": 1.3693090677261353,
|
11467 |
+
"rewards/rejected": -1.6487256288528442,
|
11468 |
+
"step": 1498
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 1.7372421281216068,
|
11472 |
+
"grad_norm": 55.43462235715989,
|
11473 |
+
"learning_rate": 9.382834999021372e-09,
|
11474 |
+
"logits/chosen": -1.2606816291809082,
|
11475 |
+
"logits/rejected": -1.2870656251907349,
|
11476 |
+
"logps/chosen": -125.43834686279297,
|
11477 |
+
"logps/rejected": -157.20016479492188,
|
11478 |
+
"loss": 0.4188,
|
11479 |
+
"rewards/accuracies": 0.75,
|
11480 |
+
"rewards/chosen": -0.31777307391166687,
|
11481 |
+
"rewards/margins": 0.9490638375282288,
|
11482 |
+
"rewards/rejected": -1.2668367624282837,
|
11483 |
+
"step": 1500
|
11484 |
+
},
|
11485 |
+
{
|
11486 |
+
"epoch": 1.7372421281216068,
|
11487 |
+
"eval_logits/chosen": -1.2174957990646362,
|
11488 |
+
"eval_logits/rejected": -1.2125576734542847,
|
11489 |
+
"eval_logps/chosen": -143.9083251953125,
|
11490 |
+
"eval_logps/rejected": -148.98919677734375,
|
11491 |
+
"eval_loss": 0.5947905778884888,
|
11492 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
11493 |
+
"eval_rewards/chosen": -0.8277125358581543,
|
11494 |
+
"eval_rewards/margins": 0.675851047039032,
|
11495 |
+
"eval_rewards/rejected": -1.5035635232925415,
|
11496 |
+
"eval_runtime": 23.0512,
|
11497 |
+
"eval_samples_per_second": 4.338,
|
11498 |
+
"eval_steps_per_second": 1.085,
|
11499 |
+
"step": 1500
|
11500 |
}
|
11501 |
],
|
11502 |
"logging_steps": 2,
|