Training in progress, step 1500, checkpoint
Browse files- last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2300 -2
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e8b57e22ea9a31f314b682e8603b72abb5c7c1059d93c7b9649175fc2b99d90
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9482d352dc81f94eeed704b766a97dfdcebdcdccef2cc7af14042e1308dcfc
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed95171f1cb12f5fa7959a64791d8596d5411aa3997ae72be474532ec9531b98
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2997615d0998626cf7a9b99bd9ef7c501b60db21a78a20a69bd0fb9bed800c4
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8c9c9a7d2fcfa2e8fa40312006739b2c850b9a5ecfacd34f5a60173282093
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:259ffd7944316a19d3cd4a130f207a063579c77fad5d447554aeff12156085d6
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d29c1fe957781c3a62402feef79dfb38cf56b8fddb352092f6ffd32c6211e320
|
3 |
+
size 150693
|
last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:921a755d22277c2d84c9363b6dc0c6e459c1e6fbaddb89bc814ee9c5db4f54fb
|
3 |
+
size 150693
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1500
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c232699cc7833ad1c4bc01be99aaf9576f52b50426c0dd90e77d56fe57003bd8
|
3 |
size 4976698672
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb26630d8f5249ea3099ea703a2b2c80769fc677b8ad9fdfaa85dcfc32419b8a
|
3 |
size 4999802720
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:749813aeff58fe35f2eb5e71bddacb082d1bff06e7b90c4c6dda2ce1fe2792ae
|
3 |
size 4915916176
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efe356faa4bc4c418b49ecbc85a6dd22d1f226c0c0fc5ed29f9e7b49217d392d
|
3 |
size 1168138808
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4f003069486a57c6ac033f30cf4c4213eb6b7d659bab68a5a50fdb8da7c4118
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a016ef89b4392d083b2c15a7cf06a39bc61a759f648cf6dc03f1c32b89a526aa
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b56fe0893036dc052d18d90feba4328b90ea71561942150b07406ac3d7a700e
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0c203d12c2c308dab785ed672c9ca27fb6a2f72acd1e1552d1516c7b0006013
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:542d0559a1accf1318fe1b90ae775b1a17df7be4c307e0080049ab5cb2d79573
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9199,6 +9199,2304 @@
|
|
9199 |
"eval_samples_per_second": 4.241,
|
9200 |
"eval_steps_per_second": 1.06,
|
9201 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9202 |
}
|
9203 |
],
|
9204 |
"logging_steps": 2,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.7302285343522457,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9199 |
"eval_samples_per_second": 4.241,
|
9200 |
"eval_steps_per_second": 1.06,
|
9201 |
"step": 1200
|
9202 |
+
},
|
9203 |
+
{
|
9204 |
+
"epoch": 1.3864897988609328,
|
9205 |
+
"grad_norm": 44.142526060771885,
|
9206 |
+
"learning_rate": 4.768450588515978e-08,
|
9207 |
+
"logits/chosen": -1.3997114896774292,
|
9208 |
+
"logits/rejected": -1.4610910415649414,
|
9209 |
+
"logps/chosen": -131.3092041015625,
|
9210 |
+
"logps/rejected": -175.20504760742188,
|
9211 |
+
"loss": 0.3635,
|
9212 |
+
"rewards/accuracies": 0.96875,
|
9213 |
+
"rewards/chosen": -0.47151079773902893,
|
9214 |
+
"rewards/margins": 1.4366676807403564,
|
9215 |
+
"rewards/rejected": -1.9081785678863525,
|
9216 |
+
"step": 1202
|
9217 |
+
},
|
9218 |
+
{
|
9219 |
+
"epoch": 1.3887967702400692,
|
9220 |
+
"grad_norm": 71.5222774685512,
|
9221 |
+
"learning_rate": 4.7356783712264406e-08,
|
9222 |
+
"logits/chosen": -1.2650327682495117,
|
9223 |
+
"logits/rejected": -1.266175627708435,
|
9224 |
+
"logps/chosen": -227.5773162841797,
|
9225 |
+
"logps/rejected": -301.1935119628906,
|
9226 |
+
"loss": 0.4205,
|
9227 |
+
"rewards/accuracies": 0.8125,
|
9228 |
+
"rewards/chosen": -1.2289221286773682,
|
9229 |
+
"rewards/margins": 1.7615307569503784,
|
9230 |
+
"rewards/rejected": -2.990453004837036,
|
9231 |
+
"step": 1204
|
9232 |
+
},
|
9233 |
+
{
|
9234 |
+
"epoch": 1.3911037416192056,
|
9235 |
+
"grad_norm": 49.387995302632135,
|
9236 |
+
"learning_rate": 4.7029841838829265e-08,
|
9237 |
+
"logits/chosen": -1.2980482578277588,
|
9238 |
+
"logits/rejected": -1.3223190307617188,
|
9239 |
+
"logps/chosen": -119.88655090332031,
|
9240 |
+
"logps/rejected": -152.50636291503906,
|
9241 |
+
"loss": 0.4627,
|
9242 |
+
"rewards/accuracies": 0.71875,
|
9243 |
+
"rewards/chosen": -0.888391375541687,
|
9244 |
+
"rewards/margins": 0.6946803331375122,
|
9245 |
+
"rewards/rejected": -1.5830717086791992,
|
9246 |
+
"step": 1206
|
9247 |
+
},
|
9248 |
+
{
|
9249 |
+
"epoch": 1.393410712998342,
|
9250 |
+
"grad_norm": 45.81858789813819,
|
9251 |
+
"learning_rate": 4.670368511092165e-08,
|
9252 |
+
"logits/chosen": -1.4421995878219604,
|
9253 |
+
"logits/rejected": -1.441506266593933,
|
9254 |
+
"logps/chosen": -104.14234161376953,
|
9255 |
+
"logps/rejected": -150.188720703125,
|
9256 |
+
"loss": 0.4279,
|
9257 |
+
"rewards/accuracies": 0.84375,
|
9258 |
+
"rewards/chosen": -0.8347434401512146,
|
9259 |
+
"rewards/margins": 1.1854957342147827,
|
9260 |
+
"rewards/rejected": -2.0202391147613525,
|
9261 |
+
"step": 1208
|
9262 |
+
},
|
9263 |
+
{
|
9264 |
+
"epoch": 1.3957176843774781,
|
9265 |
+
"grad_norm": 60.579372765659905,
|
9266 |
+
"learning_rate": 4.6378318362971024e-08,
|
9267 |
+
"logits/chosen": -1.5403013229370117,
|
9268 |
+
"logits/rejected": -1.49015474319458,
|
9269 |
+
"logps/chosen": -235.0990447998047,
|
9270 |
+
"logps/rejected": -266.61248779296875,
|
9271 |
+
"loss": 0.3995,
|
9272 |
+
"rewards/accuracies": 0.8125,
|
9273 |
+
"rewards/chosen": -1.1586740016937256,
|
9274 |
+
"rewards/margins": 1.0637941360473633,
|
9275 |
+
"rewards/rejected": -2.2224678993225098,
|
9276 |
+
"step": 1210
|
9277 |
+
},
|
9278 |
+
{
|
9279 |
+
"epoch": 1.3980246557566145,
|
9280 |
+
"grad_norm": 60.06295691045147,
|
9281 |
+
"learning_rate": 4.605374641769751e-08,
|
9282 |
+
"logits/chosen": -1.290532112121582,
|
9283 |
+
"logits/rejected": -1.332384467124939,
|
9284 |
+
"logps/chosen": -156.14129638671875,
|
9285 |
+
"logps/rejected": -215.77317810058594,
|
9286 |
+
"loss": 0.4003,
|
9287 |
+
"rewards/accuracies": 0.84375,
|
9288 |
+
"rewards/chosen": -1.1309702396392822,
|
9289 |
+
"rewards/margins": 1.253702163696289,
|
9290 |
+
"rewards/rejected": -2.3846724033355713,
|
9291 |
+
"step": 1212
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 1.4003316271357509,
|
9295 |
+
"grad_norm": 51.192598664527935,
|
9296 |
+
"learning_rate": 4.572997408604035e-08,
|
9297 |
+
"logits/chosen": -1.3888226747512817,
|
9298 |
+
"logits/rejected": -1.3567888736724854,
|
9299 |
+
"logps/chosen": -143.24537658691406,
|
9300 |
+
"logps/rejected": -161.50547790527344,
|
9301 |
+
"loss": 0.398,
|
9302 |
+
"rewards/accuracies": 0.75,
|
9303 |
+
"rewards/chosen": -0.7951155304908752,
|
9304 |
+
"rewards/margins": 0.9197046756744385,
|
9305 |
+
"rewards/rejected": -1.714820146560669,
|
9306 |
+
"step": 1214
|
9307 |
+
},
|
9308 |
+
{
|
9309 |
+
"epoch": 1.402638598514887,
|
9310 |
+
"grad_norm": 53.52691613905668,
|
9311 |
+
"learning_rate": 4.540700616708657e-08,
|
9312 |
+
"logits/chosen": -1.395592212677002,
|
9313 |
+
"logits/rejected": -1.339430570602417,
|
9314 |
+
"logps/chosen": -173.7642822265625,
|
9315 |
+
"logps/rejected": -199.0121612548828,
|
9316 |
+
"loss": 0.3771,
|
9317 |
+
"rewards/accuracies": 0.71875,
|
9318 |
+
"rewards/chosen": -0.9041624665260315,
|
9319 |
+
"rewards/margins": 1.4243049621582031,
|
9320 |
+
"rewards/rejected": -2.32846736907959,
|
9321 |
+
"step": 1216
|
9322 |
+
},
|
9323 |
+
{
|
9324 |
+
"epoch": 1.4049455698940236,
|
9325 |
+
"grad_norm": 52.455537691348205,
|
9326 |
+
"learning_rate": 4.5084847447999786e-08,
|
9327 |
+
"logits/chosen": -1.2974121570587158,
|
9328 |
+
"logits/rejected": -1.327599048614502,
|
9329 |
+
"logps/chosen": -140.09539794921875,
|
9330 |
+
"logps/rejected": -200.61834716796875,
|
9331 |
+
"loss": 0.4076,
|
9332 |
+
"rewards/accuracies": 0.78125,
|
9333 |
+
"rewards/chosen": -0.8636670708656311,
|
9334 |
+
"rewards/margins": 1.1840355396270752,
|
9335 |
+
"rewards/rejected": -2.0477025508880615,
|
9336 |
+
"step": 1218
|
9337 |
+
},
|
9338 |
+
{
|
9339 |
+
"epoch": 1.4072525412731598,
|
9340 |
+
"grad_norm": 54.793630592320525,
|
9341 |
+
"learning_rate": 4.476350270394942e-08,
|
9342 |
+
"logits/chosen": -1.3982492685317993,
|
9343 |
+
"logits/rejected": -1.4513742923736572,
|
9344 |
+
"logps/chosen": -146.10171508789062,
|
9345 |
+
"logps/rejected": -177.4093780517578,
|
9346 |
+
"loss": 0.4384,
|
9347 |
+
"rewards/accuracies": 0.8125,
|
9348 |
+
"rewards/chosen": -0.7920992970466614,
|
9349 |
+
"rewards/margins": 1.0156512260437012,
|
9350 |
+
"rewards/rejected": -1.8077504634857178,
|
9351 |
+
"step": 1220
|
9352 |
+
},
|
9353 |
+
{
|
9354 |
+
"epoch": 1.4095595126522962,
|
9355 |
+
"grad_norm": 51.63343493220621,
|
9356 |
+
"learning_rate": 4.44429766980398e-08,
|
9357 |
+
"logits/chosen": -1.4327033758163452,
|
9358 |
+
"logits/rejected": -1.3436576128005981,
|
9359 |
+
"logps/chosen": -204.16064453125,
|
9360 |
+
"logps/rejected": -280.8416748046875,
|
9361 |
+
"loss": 0.3825,
|
9362 |
+
"rewards/accuracies": 0.84375,
|
9363 |
+
"rewards/chosen": -1.0972062349319458,
|
9364 |
+
"rewards/margins": 1.7975908517837524,
|
9365 |
+
"rewards/rejected": -2.8947970867156982,
|
9366 |
+
"step": 1222
|
9367 |
+
},
|
9368 |
+
{
|
9369 |
+
"epoch": 1.4118664840314326,
|
9370 |
+
"grad_norm": 41.36257251797993,
|
9371 |
+
"learning_rate": 4.412327418123951e-08,
|
9372 |
+
"logits/chosen": -1.5090527534484863,
|
9373 |
+
"logits/rejected": -1.410886287689209,
|
9374 |
+
"logps/chosen": -151.58053588867188,
|
9375 |
+
"logps/rejected": -172.71141052246094,
|
9376 |
+
"loss": 0.3836,
|
9377 |
+
"rewards/accuracies": 0.75,
|
9378 |
+
"rewards/chosen": -0.9768989086151123,
|
9379 |
+
"rewards/margins": 1.0158425569534302,
|
9380 |
+
"rewards/rejected": -1.992741584777832,
|
9381 |
+
"step": 1224
|
9382 |
+
},
|
9383 |
+
{
|
9384 |
+
"epoch": 1.4141734554105687,
|
9385 |
+
"grad_norm": 56.70346907426679,
|
9386 |
+
"learning_rate": 4.3804399892311105e-08,
|
9387 |
+
"logits/chosen": -1.4848518371582031,
|
9388 |
+
"logits/rejected": -1.4474397897720337,
|
9389 |
+
"logps/chosen": -146.97665405273438,
|
9390 |
+
"logps/rejected": -157.70277404785156,
|
9391 |
+
"loss": 0.4153,
|
9392 |
+
"rewards/accuracies": 0.8125,
|
9393 |
+
"rewards/chosen": -0.8606613874435425,
|
9394 |
+
"rewards/margins": 0.8153095841407776,
|
9395 |
+
"rewards/rejected": -1.6759709119796753,
|
9396 |
+
"step": 1226
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 1.416480426789705,
|
9400 |
+
"grad_norm": 47.688929811885686,
|
9401 |
+
"learning_rate": 4.348635855774081e-08,
|
9402 |
+
"logits/chosen": -1.419485092163086,
|
9403 |
+
"logits/rejected": -1.461479902267456,
|
9404 |
+
"logps/chosen": -150.58755493164062,
|
9405 |
+
"logps/rejected": -242.1259765625,
|
9406 |
+
"loss": 0.4167,
|
9407 |
+
"rewards/accuracies": 0.9375,
|
9408 |
+
"rewards/chosen": -0.9045516848564148,
|
9409 |
+
"rewards/margins": 1.710062861442566,
|
9410 |
+
"rewards/rejected": -2.614614725112915,
|
9411 |
+
"step": 1228
|
9412 |
+
},
|
9413 |
+
{
|
9414 |
+
"epoch": 1.4187873981688415,
|
9415 |
+
"grad_norm": 45.84951550965181,
|
9416 |
+
"learning_rate": 4.316915489166846e-08,
|
9417 |
+
"logits/chosen": -1.3644304275512695,
|
9418 |
+
"logits/rejected": -1.4508379697799683,
|
9419 |
+
"logps/chosen": -203.9729766845703,
|
9420 |
+
"logps/rejected": -246.74143981933594,
|
9421 |
+
"loss": 0.3521,
|
9422 |
+
"rewards/accuracies": 0.90625,
|
9423 |
+
"rewards/chosen": -1.0053611993789673,
|
9424 |
+
"rewards/margins": 1.194942593574524,
|
9425 |
+
"rewards/rejected": -2.200303792953491,
|
9426 |
+
"step": 1230
|
9427 |
+
},
|
9428 |
+
{
|
9429 |
+
"epoch": 1.4210943695479779,
|
9430 |
+
"grad_norm": 46.447372971104215,
|
9431 |
+
"learning_rate": 4.2852793595817524e-08,
|
9432 |
+
"logits/chosen": -1.2737727165222168,
|
9433 |
+
"logits/rejected": -1.3535902500152588,
|
9434 |
+
"logps/chosen": -154.6007080078125,
|
9435 |
+
"logps/rejected": -199.8658447265625,
|
9436 |
+
"loss": 0.416,
|
9437 |
+
"rewards/accuracies": 0.78125,
|
9438 |
+
"rewards/chosen": -0.9416995048522949,
|
9439 |
+
"rewards/margins": 1.36875319480896,
|
9440 |
+
"rewards/rejected": -2.310452699661255,
|
9441 |
+
"step": 1232
|
9442 |
+
},
|
9443 |
+
{
|
9444 |
+
"epoch": 1.4234013409271142,
|
9445 |
+
"grad_norm": 48.96355848399301,
|
9446 |
+
"learning_rate": 4.2537279359425625e-08,
|
9447 |
+
"logits/chosen": -1.3873982429504395,
|
9448 |
+
"logits/rejected": -1.4506531953811646,
|
9449 |
+
"logps/chosen": -153.49920654296875,
|
9450 |
+
"logps/rejected": -220.96954345703125,
|
9451 |
+
"loss": 0.3912,
|
9452 |
+
"rewards/accuracies": 0.90625,
|
9453 |
+
"rewards/chosen": -0.8012152910232544,
|
9454 |
+
"rewards/margins": 1.5085324048995972,
|
9455 |
+
"rewards/rejected": -2.3097476959228516,
|
9456 |
+
"step": 1234
|
9457 |
+
},
|
9458 |
+
{
|
9459 |
+
"epoch": 1.4257083123062504,
|
9460 |
+
"grad_norm": 51.877984061457596,
|
9461 |
+
"learning_rate": 4.2222616859174884e-08,
|
9462 |
+
"logits/chosen": -1.2629234790802002,
|
9463 |
+
"logits/rejected": -1.1391026973724365,
|
9464 |
+
"logps/chosen": -237.13059997558594,
|
9465 |
+
"logps/rejected": -254.82974243164062,
|
9466 |
+
"loss": 0.394,
|
9467 |
+
"rewards/accuracies": 0.90625,
|
9468 |
+
"rewards/chosen": -1.0531830787658691,
|
9469 |
+
"rewards/margins": 1.6561241149902344,
|
9470 |
+
"rewards/rejected": -2.7093071937561035,
|
9471 |
+
"step": 1236
|
9472 |
+
},
|
9473 |
+
{
|
9474 |
+
"epoch": 1.4280152836853868,
|
9475 |
+
"grad_norm": 45.68765126004196,
|
9476 |
+
"learning_rate": 4.190881075912268e-08,
|
9477 |
+
"logits/chosen": -1.3556292057037354,
|
9478 |
+
"logits/rejected": -1.374952793121338,
|
9479 |
+
"logps/chosen": -207.07798767089844,
|
9480 |
+
"logps/rejected": -324.4890441894531,
|
9481 |
+
"loss": 0.3957,
|
9482 |
+
"rewards/accuracies": 0.84375,
|
9483 |
+
"rewards/chosen": -1.2260664701461792,
|
9484 |
+
"rewards/margins": 1.6907085180282593,
|
9485 |
+
"rewards/rejected": -2.9167752265930176,
|
9486 |
+
"step": 1238
|
9487 |
+
},
|
9488 |
+
{
|
9489 |
+
"epoch": 1.4303222550645232,
|
9490 |
+
"grad_norm": 58.52244034620422,
|
9491 |
+
"learning_rate": 4.1595865710632364e-08,
|
9492 |
+
"logits/chosen": -1.4333430528640747,
|
9493 |
+
"logits/rejected": -1.5848240852355957,
|
9494 |
+
"logps/chosen": -171.97352600097656,
|
9495 |
+
"logps/rejected": -274.1302795410156,
|
9496 |
+
"loss": 0.3436,
|
9497 |
+
"rewards/accuracies": 0.90625,
|
9498 |
+
"rewards/chosen": -0.5987138748168945,
|
9499 |
+
"rewards/margins": 1.624565839767456,
|
9500 |
+
"rewards/rejected": -2.2232794761657715,
|
9501 |
+
"step": 1240
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 1.4326292264436593,
|
9505 |
+
"grad_norm": 41.43586539854694,
|
9506 |
+
"learning_rate": 4.1283786352304494e-08,
|
9507 |
+
"logits/chosen": -1.37808358669281,
|
9508 |
+
"logits/rejected": -1.4364588260650635,
|
9509 |
+
"logps/chosen": -209.56759643554688,
|
9510 |
+
"logps/rejected": -294.7293395996094,
|
9511 |
+
"loss": 0.381,
|
9512 |
+
"rewards/accuracies": 0.90625,
|
9513 |
+
"rewards/chosen": -0.9219837188720703,
|
9514 |
+
"rewards/margins": 1.9135119915008545,
|
9515 |
+
"rewards/rejected": -2.835495710372925,
|
9516 |
+
"step": 1242
|
9517 |
+
},
|
9518 |
+
{
|
9519 |
+
"epoch": 1.4349361978227957,
|
9520 |
+
"grad_norm": 50.55832261861475,
|
9521 |
+
"learning_rate": 4.0972577309908054e-08,
|
9522 |
+
"logits/chosen": -1.5222772359848022,
|
9523 |
+
"logits/rejected": -1.4453645944595337,
|
9524 |
+
"logps/chosen": -186.14736938476562,
|
9525 |
+
"logps/rejected": -196.9586944580078,
|
9526 |
+
"loss": 0.421,
|
9527 |
+
"rewards/accuracies": 0.875,
|
9528 |
+
"rewards/chosen": -0.628247857093811,
|
9529 |
+
"rewards/margins": 1.214928388595581,
|
9530 |
+
"rewards/rejected": -1.8431761264801025,
|
9531 |
+
"step": 1244
|
9532 |
+
},
|
9533 |
+
{
|
9534 |
+
"epoch": 1.437243169201932,
|
9535 |
+
"grad_norm": 44.420648415221365,
|
9536 |
+
"learning_rate": 4.066224319631181e-08,
|
9537 |
+
"logits/chosen": -1.3419413566589355,
|
9538 |
+
"logits/rejected": -1.3609391450881958,
|
9539 |
+
"logps/chosen": -127.96074676513672,
|
9540 |
+
"logps/rejected": -184.03919982910156,
|
9541 |
+
"loss": 0.3564,
|
9542 |
+
"rewards/accuracies": 0.84375,
|
9543 |
+
"rewards/chosen": -0.8036874532699585,
|
9544 |
+
"rewards/margins": 1.4429806470870972,
|
9545 |
+
"rewards/rejected": -2.2466681003570557,
|
9546 |
+
"step": 1246
|
9547 |
+
},
|
9548 |
+
{
|
9549 |
+
"epoch": 1.4395501405810684,
|
9550 |
+
"grad_norm": 61.95216492377816,
|
9551 |
+
"learning_rate": 4.035278861141588e-08,
|
9552 |
+
"logits/chosen": -1.3702654838562012,
|
9553 |
+
"logits/rejected": -1.376900315284729,
|
9554 |
+
"logps/chosen": -208.74798583984375,
|
9555 |
+
"logps/rejected": -250.13096618652344,
|
9556 |
+
"loss": 0.4303,
|
9557 |
+
"rewards/accuracies": 0.8125,
|
9558 |
+
"rewards/chosen": -1.0059157609939575,
|
9559 |
+
"rewards/margins": 1.4688332080841064,
|
9560 |
+
"rewards/rejected": -2.4747488498687744,
|
9561 |
+
"step": 1248
|
9562 |
+
},
|
9563 |
+
{
|
9564 |
+
"epoch": 1.4418571119602048,
|
9565 |
+
"grad_norm": 41.2065328676138,
|
9566 |
+
"learning_rate": 4.004421814208381e-08,
|
9567 |
+
"logits/chosen": -1.4238879680633545,
|
9568 |
+
"logits/rejected": -1.3295542001724243,
|
9569 |
+
"logps/chosen": -211.55715942382812,
|
9570 |
+
"logps/rejected": -216.31297302246094,
|
9571 |
+
"loss": 0.3928,
|
9572 |
+
"rewards/accuracies": 0.875,
|
9573 |
+
"rewards/chosen": -1.0619137287139893,
|
9574 |
+
"rewards/margins": 0.779721200466156,
|
9575 |
+
"rewards/rejected": -1.8416348695755005,
|
9576 |
+
"step": 1250
|
9577 |
+
},
|
9578 |
+
{
|
9579 |
+
"epoch": 1.444164083339341,
|
9580 |
+
"grad_norm": 49.78351955053435,
|
9581 |
+
"learning_rate": 3.973653636207437e-08,
|
9582 |
+
"logits/chosen": -1.3463867902755737,
|
9583 |
+
"logits/rejected": -1.382056474685669,
|
9584 |
+
"logps/chosen": -150.3297882080078,
|
9585 |
+
"logps/rejected": -177.75344848632812,
|
9586 |
+
"loss": 0.4125,
|
9587 |
+
"rewards/accuracies": 0.8125,
|
9588 |
+
"rewards/chosen": -0.8665183186531067,
|
9589 |
+
"rewards/margins": 0.8241917490959167,
|
9590 |
+
"rewards/rejected": -1.690710186958313,
|
9591 |
+
"step": 1252
|
9592 |
+
},
|
9593 |
+
{
|
9594 |
+
"epoch": 1.4464710547184774,
|
9595 |
+
"grad_norm": 55.635458128144826,
|
9596 |
+
"learning_rate": 3.942974783197369e-08,
|
9597 |
+
"logits/chosen": -1.4604573249816895,
|
9598 |
+
"logits/rejected": -1.479122281074524,
|
9599 |
+
"logps/chosen": -202.9034423828125,
|
9600 |
+
"logps/rejected": -222.47259521484375,
|
9601 |
+
"loss": 0.4657,
|
9602 |
+
"rewards/accuracies": 0.75,
|
9603 |
+
"rewards/chosen": -0.9005415439605713,
|
9604 |
+
"rewards/margins": 0.963725209236145,
|
9605 |
+
"rewards/rejected": -1.8642667531967163,
|
9606 |
+
"step": 1254
|
9607 |
+
},
|
9608 |
+
{
|
9609 |
+
"epoch": 1.4487780260976137,
|
9610 |
+
"grad_norm": 59.59313665883058,
|
9611 |
+
"learning_rate": 3.912385709912793e-08,
|
9612 |
+
"logits/chosen": -1.3942638635635376,
|
9613 |
+
"logits/rejected": -1.3953242301940918,
|
9614 |
+
"logps/chosen": -201.85675048828125,
|
9615 |
+
"logps/rejected": -182.12155151367188,
|
9616 |
+
"loss": 0.4202,
|
9617 |
+
"rewards/accuracies": 0.8125,
|
9618 |
+
"rewards/chosen": -0.9314505457878113,
|
9619 |
+
"rewards/margins": 0.9069394469261169,
|
9620 |
+
"rewards/rejected": -1.8383899927139282,
|
9621 |
+
"step": 1256
|
9622 |
+
},
|
9623 |
+
{
|
9624 |
+
"epoch": 1.4510849974767501,
|
9625 |
+
"grad_norm": 48.47930918097777,
|
9626 |
+
"learning_rate": 3.881886869757565e-08,
|
9627 |
+
"logits/chosen": -1.3910021781921387,
|
9628 |
+
"logits/rejected": -1.3850898742675781,
|
9629 |
+
"logps/chosen": -198.0294189453125,
|
9630 |
+
"logps/rejected": -240.89625549316406,
|
9631 |
+
"loss": 0.3894,
|
9632 |
+
"rewards/accuracies": 0.78125,
|
9633 |
+
"rewards/chosen": -1.1357581615447998,
|
9634 |
+
"rewards/margins": 1.160947561264038,
|
9635 |
+
"rewards/rejected": -2.296705722808838,
|
9636 |
+
"step": 1258
|
9637 |
+
},
|
9638 |
+
{
|
9639 |
+
"epoch": 1.4533919688558865,
|
9640 |
+
"grad_norm": 59.379663832920095,
|
9641 |
+
"learning_rate": 3.851478714798075e-08,
|
9642 |
+
"logits/chosen": -1.4543538093566895,
|
9643 |
+
"logits/rejected": -1.3412946462631226,
|
9644 |
+
"logps/chosen": -175.93966674804688,
|
9645 |
+
"logps/rejected": -162.14071655273438,
|
9646 |
+
"loss": 0.4025,
|
9647 |
+
"rewards/accuracies": 0.9375,
|
9648 |
+
"rewards/chosen": -0.5226415991783142,
|
9649 |
+
"rewards/margins": 1.147411584854126,
|
9650 |
+
"rewards/rejected": -1.6700531244277954,
|
9651 |
+
"step": 1260
|
9652 |
+
},
|
9653 |
+
{
|
9654 |
+
"epoch": 1.4556989402350227,
|
9655 |
+
"grad_norm": 48.337458683899825,
|
9656 |
+
"learning_rate": 3.821161695756528e-08,
|
9657 |
+
"logits/chosen": -1.4448070526123047,
|
9658 |
+
"logits/rejected": -1.4677135944366455,
|
9659 |
+
"logps/chosen": -207.90625,
|
9660 |
+
"logps/rejected": -218.8758544921875,
|
9661 |
+
"loss": 0.3906,
|
9662 |
+
"rewards/accuracies": 0.84375,
|
9663 |
+
"rewards/chosen": -1.042921543121338,
|
9664 |
+
"rewards/margins": 0.8819532990455627,
|
9665 |
+
"rewards/rejected": -1.9248747825622559,
|
9666 |
+
"step": 1262
|
9667 |
+
},
|
9668 |
+
{
|
9669 |
+
"epoch": 1.458005911614159,
|
9670 |
+
"grad_norm": 37.810521769650734,
|
9671 |
+
"learning_rate": 3.790936262004286e-08,
|
9672 |
+
"logits/chosen": -1.4521461725234985,
|
9673 |
+
"logits/rejected": -1.4518852233886719,
|
9674 |
+
"logps/chosen": -164.5406494140625,
|
9675 |
+
"logps/rejected": -222.47146606445312,
|
9676 |
+
"loss": 0.3938,
|
9677 |
+
"rewards/accuracies": 0.78125,
|
9678 |
+
"rewards/chosen": -0.953307569026947,
|
9679 |
+
"rewards/margins": 1.0731128454208374,
|
9680 |
+
"rewards/rejected": -2.0264203548431396,
|
9681 |
+
"step": 1264
|
9682 |
+
},
|
9683 |
+
{
|
9684 |
+
"epoch": 1.4603128829932954,
|
9685 |
+
"grad_norm": 47.582874328113576,
|
9686 |
+
"learning_rate": 3.760802861555192e-08,
|
9687 |
+
"logits/chosen": -1.341475009918213,
|
9688 |
+
"logits/rejected": -1.4166816473007202,
|
9689 |
+
"logps/chosen": -169.36488342285156,
|
9690 |
+
"logps/rejected": -240.3351593017578,
|
9691 |
+
"loss": 0.3934,
|
9692 |
+
"rewards/accuracies": 0.8125,
|
9693 |
+
"rewards/chosen": -0.9554527997970581,
|
9694 |
+
"rewards/margins": 1.2030149698257446,
|
9695 |
+
"rewards/rejected": -2.1584675312042236,
|
9696 |
+
"step": 1266
|
9697 |
+
},
|
9698 |
+
{
|
9699 |
+
"epoch": 1.4626198543724316,
|
9700 |
+
"grad_norm": 45.64058383800231,
|
9701 |
+
"learning_rate": 3.7307619410589374e-08,
|
9702 |
+
"logits/chosen": -1.3934252262115479,
|
9703 |
+
"logits/rejected": -1.4444223642349243,
|
9704 |
+
"logps/chosen": -172.47576904296875,
|
9705 |
+
"logps/rejected": -233.11859130859375,
|
9706 |
+
"loss": 0.3604,
|
9707 |
+
"rewards/accuracies": 0.90625,
|
9708 |
+
"rewards/chosen": -0.9768462777137756,
|
9709 |
+
"rewards/margins": 1.4340298175811768,
|
9710 |
+
"rewards/rejected": -2.4108762741088867,
|
9711 |
+
"step": 1268
|
9712 |
+
},
|
9713 |
+
{
|
9714 |
+
"epoch": 1.464926825751568,
|
9715 |
+
"grad_norm": 53.5262973877338,
|
9716 |
+
"learning_rate": 3.7008139457944244e-08,
|
9717 |
+
"logits/chosen": -1.3181649446487427,
|
9718 |
+
"logits/rejected": -1.3409029245376587,
|
9719 |
+
"logps/chosen": -165.74822998046875,
|
9720 |
+
"logps/rejected": -195.7538604736328,
|
9721 |
+
"loss": 0.3998,
|
9722 |
+
"rewards/accuracies": 0.8125,
|
9723 |
+
"rewards/chosen": -0.8591657876968384,
|
9724 |
+
"rewards/margins": 1.1731674671173096,
|
9725 |
+
"rewards/rejected": -2.0323331356048584,
|
9726 |
+
"step": 1270
|
9727 |
+
},
|
9728 |
+
{
|
9729 |
+
"epoch": 1.4672337971307043,
|
9730 |
+
"grad_norm": 46.63543094780117,
|
9731 |
+
"learning_rate": 3.670959319663195e-08,
|
9732 |
+
"logits/chosen": -1.5043174028396606,
|
9733 |
+
"logits/rejected": -1.4369632005691528,
|
9734 |
+
"logps/chosen": -272.2401428222656,
|
9735 |
+
"logps/rejected": -299.15362548828125,
|
9736 |
+
"loss": 0.4018,
|
9737 |
+
"rewards/accuracies": 0.84375,
|
9738 |
+
"rewards/chosen": -1.246598243713379,
|
9739 |
+
"rewards/margins": 1.317877173423767,
|
9740 |
+
"rewards/rejected": -2.5644755363464355,
|
9741 |
+
"step": 1272
|
9742 |
+
},
|
9743 |
+
{
|
9744 |
+
"epoch": 1.4695407685098407,
|
9745 |
+
"grad_norm": 55.055744444477284,
|
9746 |
+
"learning_rate": 3.6411985051828266e-08,
|
9747 |
+
"logits/chosen": -1.3811333179473877,
|
9748 |
+
"logits/rejected": -1.388543963432312,
|
9749 |
+
"logps/chosen": -170.03436279296875,
|
9750 |
+
"logps/rejected": -235.34446716308594,
|
9751 |
+
"loss": 0.402,
|
9752 |
+
"rewards/accuracies": 0.9375,
|
9753 |
+
"rewards/chosen": -1.035598874092102,
|
9754 |
+
"rewards/margins": 1.4591599702835083,
|
9755 |
+
"rewards/rejected": -2.4947586059570312,
|
9756 |
+
"step": 1274
|
9757 |
+
},
|
9758 |
+
{
|
9759 |
+
"epoch": 1.471847739888977,
|
9760 |
+
"grad_norm": 50.20941285474848,
|
9761 |
+
"learning_rate": 3.611531943480389e-08,
|
9762 |
+
"logits/chosen": -1.3351385593414307,
|
9763 |
+
"logits/rejected": -1.3272579908370972,
|
9764 |
+
"logps/chosen": -175.16346740722656,
|
9765 |
+
"logps/rejected": -189.85296630859375,
|
9766 |
+
"loss": 0.4145,
|
9767 |
+
"rewards/accuracies": 0.875,
|
9768 |
+
"rewards/chosen": -0.9872362613677979,
|
9769 |
+
"rewards/margins": 1.0476864576339722,
|
9770 |
+
"rewards/rejected": -2.0349225997924805,
|
9771 |
+
"step": 1276
|
9772 |
+
},
|
9773 |
+
{
|
9774 |
+
"epoch": 1.4741547112681133,
|
9775 |
+
"grad_norm": 59.89422080082632,
|
9776 |
+
"learning_rate": 3.5819600742858844e-08,
|
9777 |
+
"logits/chosen": -1.3087190389633179,
|
9778 |
+
"logits/rejected": -1.3633005619049072,
|
9779 |
+
"logps/chosen": -158.3797607421875,
|
9780 |
+
"logps/rejected": -188.01727294921875,
|
9781 |
+
"loss": 0.4171,
|
9782 |
+
"rewards/accuracies": 0.84375,
|
9783 |
+
"rewards/chosen": -1.1256399154663086,
|
9784 |
+
"rewards/margins": 1.0341967344284058,
|
9785 |
+
"rewards/rejected": -2.159836530685425,
|
9786 |
+
"step": 1278
|
9787 |
+
},
|
9788 |
+
{
|
9789 |
+
"epoch": 1.4764616826472496,
|
9790 |
+
"grad_norm": 49.11561908258762,
|
9791 |
+
"learning_rate": 3.5524833359257575e-08,
|
9792 |
+
"logits/chosen": -1.5165667533874512,
|
9793 |
+
"logits/rejected": -1.462736964225769,
|
9794 |
+
"logps/chosen": -247.8217315673828,
|
9795 |
+
"logps/rejected": -262.7837219238281,
|
9796 |
+
"loss": 0.4012,
|
9797 |
+
"rewards/accuracies": 0.90625,
|
9798 |
+
"rewards/chosen": -0.883770763874054,
|
9799 |
+
"rewards/margins": 1.4304596185684204,
|
9800 |
+
"rewards/rejected": -2.314230442047119,
|
9801 |
+
"step": 1280
|
9802 |
+
},
|
9803 |
+
{
|
9804 |
+
"epoch": 1.478768654026386,
|
9805 |
+
"grad_norm": 45.113121140151996,
|
9806 |
+
"learning_rate": 3.5231021653163804e-08,
|
9807 |
+
"logits/chosen": -1.4450606107711792,
|
9808 |
+
"logits/rejected": -1.3937573432922363,
|
9809 |
+
"logps/chosen": -174.6051025390625,
|
9810 |
+
"logps/rejected": -222.32730102539062,
|
9811 |
+
"loss": 0.4,
|
9812 |
+
"rewards/accuracies": 0.8125,
|
9813 |
+
"rewards/chosen": -0.893982470035553,
|
9814 |
+
"rewards/margins": 1.3176380395889282,
|
9815 |
+
"rewards/rejected": -2.211620569229126,
|
9816 |
+
"step": 1282
|
9817 |
+
},
|
9818 |
+
{
|
9819 |
+
"epoch": 1.4810756254055222,
|
9820 |
+
"grad_norm": 52.74309051653843,
|
9821 |
+
"learning_rate": 3.493816997957582e-08,
|
9822 |
+
"logits/chosen": -1.1566662788391113,
|
9823 |
+
"logits/rejected": -1.0729938745498657,
|
9824 |
+
"logps/chosen": -164.8375701904297,
|
9825 |
+
"logps/rejected": -182.715087890625,
|
9826 |
+
"loss": 0.4019,
|
9827 |
+
"rewards/accuracies": 0.8125,
|
9828 |
+
"rewards/chosen": -0.991936206817627,
|
9829 |
+
"rewards/margins": 1.1979286670684814,
|
9830 |
+
"rewards/rejected": -2.1898648738861084,
|
9831 |
+
"step": 1284
|
9832 |
+
},
|
9833 |
+
{
|
9834 |
+
"epoch": 1.4833825967846586,
|
9835 |
+
"grad_norm": 59.365698205986185,
|
9836 |
+
"learning_rate": 3.464628267926181e-08,
|
9837 |
+
"logits/chosen": -1.5087039470672607,
|
9838 |
+
"logits/rejected": -1.4981944561004639,
|
9839 |
+
"logps/chosen": -142.24920654296875,
|
9840 |
+
"logps/rejected": -175.18621826171875,
|
9841 |
+
"loss": 0.4186,
|
9842 |
+
"rewards/accuracies": 0.875,
|
9843 |
+
"rewards/chosen": -0.8559743165969849,
|
9844 |
+
"rewards/margins": 0.9344435930252075,
|
9845 |
+
"rewards/rejected": -1.7904179096221924,
|
9846 |
+
"step": 1286
|
9847 |
+
},
|
9848 |
+
{
|
9849 |
+
"epoch": 1.485689568163795,
|
9850 |
+
"grad_norm": 57.44209478222106,
|
9851 |
+
"learning_rate": 3.435536407869575e-08,
|
9852 |
+
"logits/chosen": -1.3180654048919678,
|
9853 |
+
"logits/rejected": -1.3062459230422974,
|
9854 |
+
"logps/chosen": -161.09739685058594,
|
9855 |
+
"logps/rejected": -201.8702392578125,
|
9856 |
+
"loss": 0.3926,
|
9857 |
+
"rewards/accuracies": 0.875,
|
9858 |
+
"rewards/chosen": -0.852310061454773,
|
9859 |
+
"rewards/margins": 1.2749509811401367,
|
9860 |
+
"rewards/rejected": -2.127261161804199,
|
9861 |
+
"step": 1288
|
9862 |
+
},
|
9863 |
+
{
|
9864 |
+
"epoch": 1.4879965395429313,
|
9865 |
+
"grad_norm": 43.26619291788839,
|
9866 |
+
"learning_rate": 3.406541848999312e-08,
|
9867 |
+
"logits/chosen": -1.1940698623657227,
|
9868 |
+
"logits/rejected": -1.2222319841384888,
|
9869 |
+
"logps/chosen": -194.698974609375,
|
9870 |
+
"logps/rejected": -273.7522277832031,
|
9871 |
+
"loss": 0.3836,
|
9872 |
+
"rewards/accuracies": 0.875,
|
9873 |
+
"rewards/chosen": -1.1189277172088623,
|
9874 |
+
"rewards/margins": 1.7288103103637695,
|
9875 |
+
"rewards/rejected": -2.847738265991211,
|
9876 |
+
"step": 1290
|
9877 |
+
},
|
9878 |
+
{
|
9879 |
+
"epoch": 1.4903035109220677,
|
9880 |
+
"grad_norm": 53.62926570037078,
|
9881 |
+
"learning_rate": 3.377645021084701e-08,
|
9882 |
+
"logits/chosen": -1.2409629821777344,
|
9883 |
+
"logits/rejected": -1.3555923700332642,
|
9884 |
+
"logps/chosen": -138.0780029296875,
|
9885 |
+
"logps/rejected": -208.65414428710938,
|
9886 |
+
"loss": 0.3607,
|
9887 |
+
"rewards/accuracies": 0.96875,
|
9888 |
+
"rewards/chosen": -0.7014888525009155,
|
9889 |
+
"rewards/margins": 1.3151085376739502,
|
9890 |
+
"rewards/rejected": -2.016597270965576,
|
9891 |
+
"step": 1292
|
9892 |
+
},
|
9893 |
+
{
|
9894 |
+
"epoch": 1.4926104823012039,
|
9895 |
+
"grad_norm": 51.970100056876014,
|
9896 |
+
"learning_rate": 3.348846352446435e-08,
|
9897 |
+
"logits/chosen": -1.4000651836395264,
|
9898 |
+
"logits/rejected": -1.4792894124984741,
|
9899 |
+
"logps/chosen": -132.2428436279297,
|
9900 |
+
"logps/rejected": -182.43931579589844,
|
9901 |
+
"loss": 0.4103,
|
9902 |
+
"rewards/accuracies": 0.8125,
|
9903 |
+
"rewards/chosen": -0.824167013168335,
|
9904 |
+
"rewards/margins": 1.0639564990997314,
|
9905 |
+
"rewards/rejected": -1.888123631477356,
|
9906 |
+
"step": 1294
|
9907 |
+
},
|
9908 |
+
{
|
9909 |
+
"epoch": 1.4949174536803402,
|
9910 |
+
"grad_norm": 51.98384854834489,
|
9911 |
+
"learning_rate": 3.32014626995026e-08,
|
9912 |
+
"logits/chosen": -1.247258186340332,
|
9913 |
+
"logits/rejected": -1.2670161724090576,
|
9914 |
+
"logps/chosen": -173.65634155273438,
|
9915 |
+
"logps/rejected": -184.025390625,
|
9916 |
+
"loss": 0.3922,
|
9917 |
+
"rewards/accuracies": 0.78125,
|
9918 |
+
"rewards/chosen": -0.9547139406204224,
|
9919 |
+
"rewards/margins": 0.8975260257720947,
|
9920 |
+
"rewards/rejected": -1.852239966392517,
|
9921 |
+
"step": 1296
|
9922 |
+
},
|
9923 |
+
{
|
9924 |
+
"epoch": 1.4972244250594766,
|
9925 |
+
"grad_norm": 54.4822774885669,
|
9926 |
+
"learning_rate": 3.291545199000636e-08,
|
9927 |
+
"logits/chosen": -1.3456276655197144,
|
9928 |
+
"logits/rejected": -1.2962102890014648,
|
9929 |
+
"logps/chosen": -189.99917602539062,
|
9930 |
+
"logps/rejected": -226.09519958496094,
|
9931 |
+
"loss": 0.4015,
|
9932 |
+
"rewards/accuracies": 0.90625,
|
9933 |
+
"rewards/chosen": -1.1853322982788086,
|
9934 |
+
"rewards/margins": 1.2129353284835815,
|
9935 |
+
"rewards/rejected": -2.3982675075531006,
|
9936 |
+
"step": 1298
|
9937 |
+
},
|
9938 |
+
{
|
9939 |
+
"epoch": 1.499531396438613,
|
9940 |
+
"grad_norm": 49.557670987283394,
|
9941 |
+
"learning_rate": 3.263043563534428e-08,
|
9942 |
+
"logits/chosen": -1.4072293043136597,
|
9943 |
+
"logits/rejected": -1.403496265411377,
|
9944 |
+
"logps/chosen": -191.06785583496094,
|
9945 |
+
"logps/rejected": -222.01727294921875,
|
9946 |
+
"loss": 0.4256,
|
9947 |
+
"rewards/accuracies": 0.84375,
|
9948 |
+
"rewards/chosen": -1.1364355087280273,
|
9949 |
+
"rewards/margins": 0.8410711288452148,
|
9950 |
+
"rewards/rejected": -1.9775067567825317,
|
9951 |
+
"step": 1300
|
9952 |
+
},
|
9953 |
+
{
|
9954 |
+
"epoch": 1.499531396438613,
|
9955 |
+
"eval_logits/chosen": -1.349912405014038,
|
9956 |
+
"eval_logits/rejected": -1.2728021144866943,
|
9957 |
+
"eval_logps/chosen": -198.44601440429688,
|
9958 |
+
"eval_logps/rejected": -170.63462829589844,
|
9959 |
+
"eval_loss": 0.5290127396583557,
|
9960 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
9961 |
+
"eval_rewards/chosen": -1.3263821601867676,
|
9962 |
+
"eval_rewards/margins": 0.6855400204658508,
|
9963 |
+
"eval_rewards/rejected": -2.0119218826293945,
|
9964 |
+
"eval_runtime": 23.425,
|
9965 |
+
"eval_samples_per_second": 4.269,
|
9966 |
+
"eval_steps_per_second": 1.067,
|
9967 |
+
"step": 1300
|
9968 |
+
},
|
9969 |
+
{
|
9970 |
+
"epoch": 1.5018383678177494,
|
9971 |
+
"grad_norm": 45.842584597120144,
|
9972 |
+
"learning_rate": 3.23464178601463e-08,
|
9973 |
+
"logits/chosen": -1.3447407484054565,
|
9974 |
+
"logits/rejected": -1.3684443235397339,
|
9975 |
+
"logps/chosen": -190.647216796875,
|
9976 |
+
"logps/rejected": -204.70425415039062,
|
9977 |
+
"loss": 0.3852,
|
9978 |
+
"rewards/accuracies": 0.8125,
|
9979 |
+
"rewards/chosen": -1.0248242616653442,
|
9980 |
+
"rewards/margins": 1.146057367324829,
|
9981 |
+
"rewards/rejected": -2.1708812713623047,
|
9982 |
+
"step": 1302
|
9983 |
+
},
|
9984 |
+
{
|
9985 |
+
"epoch": 1.5041453391968855,
|
9986 |
+
"grad_norm": 47.37239527648157,
|
9987 |
+
"learning_rate": 3.206340287424102e-08,
|
9988 |
+
"logits/chosen": -1.278390884399414,
|
9989 |
+
"logits/rejected": -1.2682162523269653,
|
9990 |
+
"logps/chosen": -196.60675048828125,
|
9991 |
+
"logps/rejected": -235.6786346435547,
|
9992 |
+
"loss": 0.4076,
|
9993 |
+
"rewards/accuracies": 0.90625,
|
9994 |
+
"rewards/chosen": -1.1913988590240479,
|
9995 |
+
"rewards/margins": 1.3531779050827026,
|
9996 |
+
"rewards/rejected": -2.544576644897461,
|
9997 |
+
"step": 1304
|
9998 |
+
},
|
9999 |
+
{
|
10000 |
+
"epoch": 1.506452310576022,
|
10001 |
+
"grad_norm": 54.28789803813994,
|
10002 |
+
"learning_rate": 3.178139487259329e-08,
|
10003 |
+
"logits/chosen": -1.4512503147125244,
|
10004 |
+
"logits/rejected": -1.4933993816375732,
|
10005 |
+
"logps/chosen": -179.09815979003906,
|
10006 |
+
"logps/rejected": -240.13143920898438,
|
10007 |
+
"loss": 0.3969,
|
10008 |
+
"rewards/accuracies": 0.84375,
|
10009 |
+
"rewards/chosen": -0.9774467945098877,
|
10010 |
+
"rewards/margins": 1.2948458194732666,
|
10011 |
+
"rewards/rejected": -2.2722926139831543,
|
10012 |
+
"step": 1306
|
10013 |
+
},
|
10014 |
+
{
|
10015 |
+
"epoch": 1.5087592819551583,
|
10016 |
+
"grad_norm": 46.589365963047385,
|
10017 |
+
"learning_rate": 3.1500398035241936e-08,
|
10018 |
+
"logits/chosen": -1.4314634799957275,
|
10019 |
+
"logits/rejected": -1.4590768814086914,
|
10020 |
+
"logps/chosen": -126.36434936523438,
|
10021 |
+
"logps/rejected": -152.21775817871094,
|
10022 |
+
"loss": 0.4119,
|
10023 |
+
"rewards/accuracies": 0.78125,
|
10024 |
+
"rewards/chosen": -0.7625494003295898,
|
10025 |
+
"rewards/margins": 0.9721799492835999,
|
10026 |
+
"rewards/rejected": -1.7347294092178345,
|
10027 |
+
"step": 1308
|
10028 |
+
},
|
10029 |
+
{
|
10030 |
+
"epoch": 1.5110662533342945,
|
10031 |
+
"grad_norm": 52.15126576008971,
|
10032 |
+
"learning_rate": 3.1220416527238005e-08,
|
10033 |
+
"logits/chosen": -1.4616860151290894,
|
10034 |
+
"logits/rejected": -1.435218334197998,
|
10035 |
+
"logps/chosen": -159.05282592773438,
|
10036 |
+
"logps/rejected": -175.42967224121094,
|
10037 |
+
"loss": 0.4312,
|
10038 |
+
"rewards/accuracies": 0.8125,
|
10039 |
+
"rewards/chosen": -0.903962254524231,
|
10040 |
+
"rewards/margins": 0.8932026624679565,
|
10041 |
+
"rewards/rejected": -1.797164797782898,
|
10042 |
+
"step": 1310
|
10043 |
+
},
|
10044 |
+
{
|
10045 |
+
"epoch": 1.513373224713431,
|
10046 |
+
"grad_norm": 58.832075530463044,
|
10047 |
+
"learning_rate": 3.094145449858284e-08,
|
10048 |
+
"logits/chosen": -1.442615032196045,
|
10049 |
+
"logits/rejected": -1.3926329612731934,
|
10050 |
+
"logps/chosen": -185.3905029296875,
|
10051 |
+
"logps/rejected": -191.25869750976562,
|
10052 |
+
"loss": 0.4393,
|
10053 |
+
"rewards/accuracies": 0.78125,
|
10054 |
+
"rewards/chosen": -1.0720655918121338,
|
10055 |
+
"rewards/margins": 0.6762962341308594,
|
10056 |
+
"rewards/rejected": -1.7483618259429932,
|
10057 |
+
"step": 1312
|
10058 |
+
},
|
10059 |
+
{
|
10060 |
+
"epoch": 1.5156801960925672,
|
10061 |
+
"grad_norm": 44.7076995014112,
|
10062 |
+
"learning_rate": 3.0663516084166706e-08,
|
10063 |
+
"logits/chosen": -1.2298978567123413,
|
10064 |
+
"logits/rejected": -1.3115289211273193,
|
10065 |
+
"logps/chosen": -187.91552734375,
|
10066 |
+
"logps/rejected": -252.78433227539062,
|
10067 |
+
"loss": 0.3453,
|
10068 |
+
"rewards/accuracies": 0.90625,
|
10069 |
+
"rewards/chosen": -1.2967660427093506,
|
10070 |
+
"rewards/margins": 1.4439966678619385,
|
10071 |
+
"rewards/rejected": -2.740762710571289,
|
10072 |
+
"step": 1314
|
10073 |
+
},
|
10074 |
+
{
|
10075 |
+
"epoch": 1.5179871674717036,
|
10076 |
+
"grad_norm": 44.67859571904197,
|
10077 |
+
"learning_rate": 3.038660540370735e-08,
|
10078 |
+
"logits/chosen": -1.3734447956085205,
|
10079 |
+
"logits/rejected": -1.3580108880996704,
|
10080 |
+
"logps/chosen": -132.02508544921875,
|
10081 |
+
"logps/rejected": -184.58181762695312,
|
10082 |
+
"loss": 0.393,
|
10083 |
+
"rewards/accuracies": 0.8125,
|
10084 |
+
"rewards/chosen": -0.8986393809318542,
|
10085 |
+
"rewards/margins": 1.3148921728134155,
|
10086 |
+
"rewards/rejected": -2.213531255722046,
|
10087 |
+
"step": 1316
|
10088 |
+
},
|
10089 |
+
{
|
10090 |
+
"epoch": 1.52029413885084,
|
10091 |
+
"grad_norm": 47.98876193889907,
|
10092 |
+
"learning_rate": 3.011072656168906e-08,
|
10093 |
+
"logits/chosen": -1.3537302017211914,
|
10094 |
+
"logits/rejected": -1.359252691268921,
|
10095 |
+
"logps/chosen": -139.8520965576172,
|
10096 |
+
"logps/rejected": -180.31007385253906,
|
10097 |
+
"loss": 0.4064,
|
10098 |
+
"rewards/accuracies": 0.84375,
|
10099 |
+
"rewards/chosen": -0.850949764251709,
|
10100 |
+
"rewards/margins": 1.204286813735962,
|
10101 |
+
"rewards/rejected": -2.055236577987671,
|
10102 |
+
"step": 1318
|
10103 |
+
},
|
10104 |
+
{
|
10105 |
+
"epoch": 1.5226011102299761,
|
10106 |
+
"grad_norm": 46.0224640343323,
|
10107 |
+
"learning_rate": 2.9835883647301826e-08,
|
10108 |
+
"logits/chosen": -1.3717573881149292,
|
10109 |
+
"logits/rejected": -1.3601585626602173,
|
10110 |
+
"logps/chosen": -219.96463012695312,
|
10111 |
+
"logps/rejected": -260.22247314453125,
|
10112 |
+
"loss": 0.3923,
|
10113 |
+
"rewards/accuracies": 0.8125,
|
10114 |
+
"rewards/chosen": -1.1594874858856201,
|
10115 |
+
"rewards/margins": 1.7778315544128418,
|
10116 |
+
"rewards/rejected": -2.937319278717041,
|
10117 |
+
"step": 1320
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 1.5249080816091125,
|
10121 |
+
"grad_norm": 42.94236856581764,
|
10122 |
+
"learning_rate": 2.9562080734380678e-08,
|
10123 |
+
"logits/chosen": -1.3365228176116943,
|
10124 |
+
"logits/rejected": -1.2907369136810303,
|
10125 |
+
"logps/chosen": -206.2550048828125,
|
10126 |
+
"logps/rejected": -295.9128723144531,
|
10127 |
+
"loss": 0.3811,
|
10128 |
+
"rewards/accuracies": 0.9375,
|
10129 |
+
"rewards/chosen": -0.8169040679931641,
|
10130 |
+
"rewards/margins": 1.9202640056610107,
|
10131 |
+
"rewards/rejected": -2.737168312072754,
|
10132 |
+
"step": 1322
|
10133 |
+
},
|
10134 |
+
{
|
10135 |
+
"epoch": 1.5272150529882489,
|
10136 |
+
"grad_norm": 52.34290409904556,
|
10137 |
+
"learning_rate": 2.928932188134525e-08,
|
10138 |
+
"logits/chosen": -1.3455016613006592,
|
10139 |
+
"logits/rejected": -1.2972326278686523,
|
10140 |
+
"logps/chosen": -155.3500518798828,
|
10141 |
+
"logps/rejected": -162.52243041992188,
|
10142 |
+
"loss": 0.4153,
|
10143 |
+
"rewards/accuracies": 0.875,
|
10144 |
+
"rewards/chosen": -0.7506276965141296,
|
10145 |
+
"rewards/margins": 1.141640305519104,
|
10146 |
+
"rewards/rejected": -1.8922679424285889,
|
10147 |
+
"step": 1324
|
10148 |
+
},
|
10149 |
+
{
|
10150 |
+
"epoch": 1.529522024367385,
|
10151 |
+
"grad_norm": 49.03147359925327,
|
10152 |
+
"learning_rate": 2.9017611131139762e-08,
|
10153 |
+
"logits/chosen": -1.4706536531448364,
|
10154 |
+
"logits/rejected": -1.4308120012283325,
|
10155 |
+
"logps/chosen": -184.1258544921875,
|
10156 |
+
"logps/rejected": -195.16162109375,
|
10157 |
+
"loss": 0.3929,
|
10158 |
+
"rewards/accuracies": 0.84375,
|
10159 |
+
"rewards/chosen": -0.9032193422317505,
|
10160 |
+
"rewards/margins": 1.1196848154067993,
|
10161 |
+
"rewards/rejected": -2.02290415763855,
|
10162 |
+
"step": 1326
|
10163 |
+
},
|
10164 |
+
{
|
10165 |
+
"epoch": 1.5318289957465216,
|
10166 |
+
"grad_norm": 49.97091160023798,
|
10167 |
+
"learning_rate": 2.874695251117303e-08,
|
10168 |
+
"logits/chosen": -1.4324705600738525,
|
10169 |
+
"logits/rejected": -1.3871915340423584,
|
10170 |
+
"logps/chosen": -147.20001220703125,
|
10171 |
+
"logps/rejected": -225.0901336669922,
|
10172 |
+
"loss": 0.4162,
|
10173 |
+
"rewards/accuracies": 0.84375,
|
10174 |
+
"rewards/chosen": -0.7438746690750122,
|
10175 |
+
"rewards/margins": 1.2811241149902344,
|
10176 |
+
"rewards/rejected": -2.024998664855957,
|
10177 |
+
"step": 1328
|
10178 |
+
},
|
10179 |
+
{
|
10180 |
+
"epoch": 1.5341359671256578,
|
10181 |
+
"grad_norm": 43.41525443911614,
|
10182 |
+
"learning_rate": 2.8477350033258672e-08,
|
10183 |
+
"logits/chosen": -1.522569179534912,
|
10184 |
+
"logits/rejected": -1.4508986473083496,
|
10185 |
+
"logps/chosen": -162.15855407714844,
|
10186 |
+
"logps/rejected": -165.8858642578125,
|
10187 |
+
"loss": 0.3882,
|
10188 |
+
"rewards/accuracies": 0.8125,
|
10189 |
+
"rewards/chosen": -0.6052396893501282,
|
10190 |
+
"rewards/margins": 0.978552520275116,
|
10191 |
+
"rewards/rejected": -1.5837922096252441,
|
10192 |
+
"step": 1330
|
10193 |
+
},
|
10194 |
+
{
|
10195 |
+
"epoch": 1.5364429385047942,
|
10196 |
+
"grad_norm": 49.48217123812548,
|
10197 |
+
"learning_rate": 2.8208807693555814e-08,
|
10198 |
+
"logits/chosen": -1.376870036125183,
|
10199 |
+
"logits/rejected": -1.3188726902008057,
|
10200 |
+
"logps/chosen": -202.34939575195312,
|
10201 |
+
"logps/rejected": -221.72894287109375,
|
10202 |
+
"loss": 0.4297,
|
10203 |
+
"rewards/accuracies": 0.78125,
|
10204 |
+
"rewards/chosen": -1.0866365432739258,
|
10205 |
+
"rewards/margins": 1.028623104095459,
|
10206 |
+
"rewards/rejected": -2.115259885787964,
|
10207 |
+
"step": 1332
|
10208 |
+
},
|
10209 |
+
{
|
10210 |
+
"epoch": 1.5387499098839306,
|
10211 |
+
"grad_norm": 46.51618814969754,
|
10212 |
+
"learning_rate": 2.7941329472509767e-08,
|
10213 |
+
"logits/chosen": -1.4210199117660522,
|
10214 |
+
"logits/rejected": -1.4886133670806885,
|
10215 |
+
"logps/chosen": -191.4695281982422,
|
10216 |
+
"logps/rejected": -216.92849731445312,
|
10217 |
+
"loss": 0.3674,
|
10218 |
+
"rewards/accuracies": 0.875,
|
10219 |
+
"rewards/chosen": -1.083839774131775,
|
10220 |
+
"rewards/margins": 1.2328987121582031,
|
10221 |
+
"rewards/rejected": -2.3167383670806885,
|
10222 |
+
"step": 1334
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 1.5410568812630667,
|
10226 |
+
"grad_norm": 50.002556121820184,
|
10227 |
+
"learning_rate": 2.7674919334793033e-08,
|
10228 |
+
"logits/chosen": -1.3935497999191284,
|
10229 |
+
"logits/rejected": -1.4638290405273438,
|
10230 |
+
"logps/chosen": -188.38775634765625,
|
10231 |
+
"logps/rejected": -218.97442626953125,
|
10232 |
+
"loss": 0.3643,
|
10233 |
+
"rewards/accuracies": 0.90625,
|
10234 |
+
"rewards/chosen": -0.9267150163650513,
|
10235 |
+
"rewards/margins": 1.4599485397338867,
|
10236 |
+
"rewards/rejected": -2.3866634368896484,
|
10237 |
+
"step": 1336
|
10238 |
+
},
|
10239 |
+
{
|
10240 |
+
"epoch": 1.543363852642203,
|
10241 |
+
"grad_norm": 43.96736215737179,
|
10242 |
+
"learning_rate": 2.7409581229246493e-08,
|
10243 |
+
"logits/chosen": -1.4587171077728271,
|
10244 |
+
"logits/rejected": -1.3520967960357666,
|
10245 |
+
"logps/chosen": -186.90631103515625,
|
10246 |
+
"logps/rejected": -198.11570739746094,
|
10247 |
+
"loss": 0.4022,
|
10248 |
+
"rewards/accuracies": 0.84375,
|
10249 |
+
"rewards/chosen": -0.6897175908088684,
|
10250 |
+
"rewards/margins": 1.123156189918518,
|
10251 |
+
"rewards/rejected": -1.8128737211227417,
|
10252 |
+
"step": 1338
|
10253 |
+
},
|
10254 |
+
{
|
10255 |
+
"epoch": 1.5456708240213395,
|
10256 |
+
"grad_norm": 48.95299994888663,
|
10257 |
+
"learning_rate": 2.7145319088820985e-08,
|
10258 |
+
"logits/chosen": -1.4856892824172974,
|
10259 |
+
"logits/rejected": -1.4046401977539062,
|
10260 |
+
"logps/chosen": -157.6865234375,
|
10261 |
+
"logps/rejected": -185.38278198242188,
|
10262 |
+
"loss": 0.3834,
|
10263 |
+
"rewards/accuracies": 0.8125,
|
10264 |
+
"rewards/chosen": -0.9328738451004028,
|
10265 |
+
"rewards/margins": 1.4649465084075928,
|
10266 |
+
"rewards/rejected": -2.397820234298706,
|
10267 |
+
"step": 1340
|
10268 |
+
},
|
10269 |
+
{
|
10270 |
+
"epoch": 1.5479777954004759,
|
10271 |
+
"grad_norm": 47.48346304531486,
|
10272 |
+
"learning_rate": 2.688213683051892e-08,
|
10273 |
+
"logits/chosen": -1.3646446466445923,
|
10274 |
+
"logits/rejected": -1.3468654155731201,
|
10275 |
+
"logps/chosen": -199.64413452148438,
|
10276 |
+
"logps/rejected": -227.54318237304688,
|
10277 |
+
"loss": 0.3599,
|
10278 |
+
"rewards/accuracies": 0.875,
|
10279 |
+
"rewards/chosen": -1.035663366317749,
|
10280 |
+
"rewards/margins": 1.2792800664901733,
|
10281 |
+
"rewards/rejected": -2.314943552017212,
|
10282 |
+
"step": 1342
|
10283 |
+
},
|
10284 |
+
{
|
10285 |
+
"epoch": 1.5502847667796122,
|
10286 |
+
"grad_norm": 47.27417390076413,
|
10287 |
+
"learning_rate": 2.6620038355336305e-08,
|
10288 |
+
"logits/chosen": -1.4909706115722656,
|
10289 |
+
"logits/rejected": -1.4687060117721558,
|
10290 |
+
"logps/chosen": -160.08041381835938,
|
10291 |
+
"logps/rejected": -195.14886474609375,
|
10292 |
+
"loss": 0.3712,
|
10293 |
+
"rewards/accuracies": 0.875,
|
10294 |
+
"rewards/chosen": -0.8120248317718506,
|
10295 |
+
"rewards/margins": 1.2791612148284912,
|
10296 |
+
"rewards/rejected": -2.091186285018921,
|
10297 |
+
"step": 1344
|
10298 |
+
},
|
10299 |
+
{
|
10300 |
+
"epoch": 1.5525917381587484,
|
10301 |
+
"grad_norm": 39.1002186836634,
|
10302 |
+
"learning_rate": 2.635902754820475e-08,
|
10303 |
+
"logits/chosen": -1.4342174530029297,
|
10304 |
+
"logits/rejected": -1.4962186813354492,
|
10305 |
+
"logps/chosen": -203.3734588623047,
|
10306 |
+
"logps/rejected": -263.7275695800781,
|
10307 |
+
"loss": 0.3782,
|
10308 |
+
"rewards/accuracies": 0.84375,
|
10309 |
+
"rewards/chosen": -0.8007220029830933,
|
10310 |
+
"rewards/margins": 1.4874347448349,
|
10311 |
+
"rewards/rejected": -2.288156747817993,
|
10312 |
+
"step": 1346
|
10313 |
+
},
|
10314 |
+
{
|
10315 |
+
"epoch": 1.5548987095378848,
|
10316 |
+
"grad_norm": 44.5283434969193,
|
10317 |
+
"learning_rate": 2.60991082779341e-08,
|
10318 |
+
"logits/chosen": -1.2880148887634277,
|
10319 |
+
"logits/rejected": -1.3925414085388184,
|
10320 |
+
"logps/chosen": -148.82077026367188,
|
10321 |
+
"logps/rejected": -200.53903198242188,
|
10322 |
+
"loss": 0.3493,
|
10323 |
+
"rewards/accuracies": 0.90625,
|
10324 |
+
"rewards/chosen": -0.9107972383499146,
|
10325 |
+
"rewards/margins": 1.1858881711959839,
|
10326 |
+
"rewards/rejected": -2.0966851711273193,
|
10327 |
+
"step": 1348
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 1.5572056809170212,
|
10331 |
+
"grad_norm": 45.32965113835206,
|
10332 |
+
"learning_rate": 2.5840284397154965e-08,
|
10333 |
+
"logits/chosen": -1.4253007173538208,
|
10334 |
+
"logits/rejected": -1.3697370290756226,
|
10335 |
+
"logps/chosen": -140.25965881347656,
|
10336 |
+
"logps/rejected": -157.40565490722656,
|
10337 |
+
"loss": 0.3721,
|
10338 |
+
"rewards/accuracies": 0.96875,
|
10339 |
+
"rewards/chosen": -0.6943234205245972,
|
10340 |
+
"rewards/margins": 1.344458818435669,
|
10341 |
+
"rewards/rejected": -2.0387821197509766,
|
10342 |
+
"step": 1350
|
10343 |
+
},
|
10344 |
+
{
|
10345 |
+
"epoch": 1.5595126522961573,
|
10346 |
+
"grad_norm": 43.36149821146732,
|
10347 |
+
"learning_rate": 2.5582559742261645e-08,
|
10348 |
+
"logits/chosen": -1.2941721677780151,
|
10349 |
+
"logits/rejected": -1.2105193138122559,
|
10350 |
+
"logps/chosen": -151.37664794921875,
|
10351 |
+
"logps/rejected": -192.40484619140625,
|
10352 |
+
"loss": 0.4241,
|
10353 |
+
"rewards/accuracies": 0.8125,
|
10354 |
+
"rewards/chosen": -0.9218543171882629,
|
10355 |
+
"rewards/margins": 1.150537133216858,
|
10356 |
+
"rewards/rejected": -2.0723917484283447,
|
10357 |
+
"step": 1352
|
10358 |
+
},
|
10359 |
+
{
|
10360 |
+
"epoch": 1.561819623675294,
|
10361 |
+
"grad_norm": 48.73204722090299,
|
10362 |
+
"learning_rate": 2.532593813335524e-08,
|
10363 |
+
"logits/chosen": -1.4378621578216553,
|
10364 |
+
"logits/rejected": -1.4481985569000244,
|
10365 |
+
"logps/chosen": -165.80274963378906,
|
10366 |
+
"logps/rejected": -181.1370391845703,
|
10367 |
+
"loss": 0.4308,
|
10368 |
+
"rewards/accuracies": 0.71875,
|
10369 |
+
"rewards/chosen": -0.9549030661582947,
|
10370 |
+
"rewards/margins": 0.6894029378890991,
|
10371 |
+
"rewards/rejected": -1.6443060636520386,
|
10372 |
+
"step": 1354
|
10373 |
+
},
|
10374 |
+
{
|
10375 |
+
"epoch": 1.56412659505443,
|
10376 |
+
"grad_norm": 48.96214324123758,
|
10377 |
+
"learning_rate": 2.5070423374187066e-08,
|
10378 |
+
"logits/chosen": -1.3675079345703125,
|
10379 |
+
"logits/rejected": -1.3449468612670898,
|
10380 |
+
"logps/chosen": -179.66668701171875,
|
10381 |
+
"logps/rejected": -205.29490661621094,
|
10382 |
+
"loss": 0.4202,
|
10383 |
+
"rewards/accuracies": 0.9375,
|
10384 |
+
"rewards/chosen": -0.7179181575775146,
|
10385 |
+
"rewards/margins": 1.2601079940795898,
|
10386 |
+
"rewards/rejected": -1.9780261516571045,
|
10387 |
+
"step": 1356
|
10388 |
+
},
|
10389 |
+
{
|
10390 |
+
"epoch": 1.5664335664335665,
|
10391 |
+
"grad_norm": 55.81959533812462,
|
10392 |
+
"learning_rate": 2.4816019252102272e-08,
|
10393 |
+
"logits/chosen": -1.3808372020721436,
|
10394 |
+
"logits/rejected": -1.2713388204574585,
|
10395 |
+
"logps/chosen": -192.23965454101562,
|
10396 |
+
"logps/rejected": -183.83566284179688,
|
10397 |
+
"loss": 0.4231,
|
10398 |
+
"rewards/accuracies": 0.8125,
|
10399 |
+
"rewards/chosen": -1.2753582000732422,
|
10400 |
+
"rewards/margins": 0.6576382517814636,
|
10401 |
+
"rewards/rejected": -1.932996392250061,
|
10402 |
+
"step": 1358
|
10403 |
+
},
|
10404 |
+
{
|
10405 |
+
"epoch": 1.5687405378127028,
|
10406 |
+
"grad_norm": 56.59847700001908,
|
10407 |
+
"learning_rate": 2.4562729537983605e-08,
|
10408 |
+
"logits/chosen": -1.2573238611221313,
|
10409 |
+
"logits/rejected": -1.2922176122665405,
|
10410 |
+
"logps/chosen": -149.29942321777344,
|
10411 |
+
"logps/rejected": -166.0135955810547,
|
10412 |
+
"loss": 0.4638,
|
10413 |
+
"rewards/accuracies": 0.71875,
|
10414 |
+
"rewards/chosen": -0.8205444812774658,
|
10415 |
+
"rewards/margins": 0.7928743958473206,
|
10416 |
+
"rewards/rejected": -1.6134188175201416,
|
10417 |
+
"step": 1360
|
10418 |
+
},
|
10419 |
+
{
|
10420 |
+
"epoch": 1.571047509191839,
|
10421 |
+
"grad_norm": 37.26011990037049,
|
10422 |
+
"learning_rate": 2.4310557986195702e-08,
|
10423 |
+
"logits/chosen": -1.3010238409042358,
|
10424 |
+
"logits/rejected": -1.2920893430709839,
|
10425 |
+
"logps/chosen": -299.04022216796875,
|
10426 |
+
"logps/rejected": -379.41375732421875,
|
10427 |
+
"loss": 0.2954,
|
10428 |
+
"rewards/accuracies": 0.9375,
|
10429 |
+
"rewards/chosen": -1.4708349704742432,
|
10430 |
+
"rewards/margins": 2.2122201919555664,
|
10431 |
+
"rewards/rejected": -3.6830554008483887,
|
10432 |
+
"step": 1362
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 1.5733544805709754,
|
10436 |
+
"grad_norm": 49.54139763435727,
|
10437 |
+
"learning_rate": 2.4059508334529277e-08,
|
10438 |
+
"logits/chosen": -1.3956283330917358,
|
10439 |
+
"logits/rejected": -1.457297921180725,
|
10440 |
+
"logps/chosen": -191.56198120117188,
|
10441 |
+
"logps/rejected": -222.62847900390625,
|
10442 |
+
"loss": 0.4168,
|
10443 |
+
"rewards/accuracies": 0.875,
|
10444 |
+
"rewards/chosen": -0.9353391528129578,
|
10445 |
+
"rewards/margins": 1.0330374240875244,
|
10446 |
+
"rewards/rejected": -1.968376636505127,
|
10447 |
+
"step": 1364
|
10448 |
+
},
|
10449 |
+
{
|
10450 |
+
"epoch": 1.5756614519501118,
|
10451 |
+
"grad_norm": 41.5799173152666,
|
10452 |
+
"learning_rate": 2.3809584304145824e-08,
|
10453 |
+
"logits/chosen": -1.3520365953445435,
|
10454 |
+
"logits/rejected": -1.3251811265945435,
|
10455 |
+
"logps/chosen": -155.65635681152344,
|
10456 |
+
"logps/rejected": -256.5829162597656,
|
10457 |
+
"loss": 0.3825,
|
10458 |
+
"rewards/accuracies": 0.78125,
|
10459 |
+
"rewards/chosen": -0.817021369934082,
|
10460 |
+
"rewards/margins": 1.9787249565124512,
|
10461 |
+
"rewards/rejected": -2.795746326446533,
|
10462 |
+
"step": 1366
|
10463 |
+
},
|
10464 |
+
{
|
10465 |
+
"epoch": 1.577968423329248,
|
10466 |
+
"grad_norm": 42.84011718919187,
|
10467 |
+
"learning_rate": 2.3560789599522324e-08,
|
10468 |
+
"logits/chosen": -1.544500708580017,
|
10469 |
+
"logits/rejected": -1.4618444442749023,
|
10470 |
+
"logps/chosen": -161.96481323242188,
|
10471 |
+
"logps/rejected": -154.49684143066406,
|
10472 |
+
"loss": 0.376,
|
10473 |
+
"rewards/accuracies": 0.8125,
|
10474 |
+
"rewards/chosen": -0.8055549263954163,
|
10475 |
+
"rewards/margins": 0.7844254374504089,
|
10476 |
+
"rewards/rejected": -1.5899803638458252,
|
10477 |
+
"step": 1368
|
10478 |
+
},
|
10479 |
+
{
|
10480 |
+
"epoch": 1.5802753947083845,
|
10481 |
+
"grad_norm": 41.774939807969034,
|
10482 |
+
"learning_rate": 2.3313127908396513e-08,
|
10483 |
+
"logits/chosen": -1.3718278408050537,
|
10484 |
+
"logits/rejected": -1.4343537092208862,
|
10485 |
+
"logps/chosen": -206.5829315185547,
|
10486 |
+
"logps/rejected": -266.7309265136719,
|
10487 |
+
"loss": 0.3721,
|
10488 |
+
"rewards/accuracies": 0.9375,
|
10489 |
+
"rewards/chosen": -0.9916315674781799,
|
10490 |
+
"rewards/margins": 1.702213168144226,
|
10491 |
+
"rewards/rejected": -2.69384503364563,
|
10492 |
+
"step": 1370
|
10493 |
+
},
|
10494 |
+
{
|
10495 |
+
"epoch": 1.5825823660875207,
|
10496 |
+
"grad_norm": 44.23281683503162,
|
10497 |
+
"learning_rate": 2.3066602901712107e-08,
|
10498 |
+
"logits/chosen": -1.4820692539215088,
|
10499 |
+
"logits/rejected": -1.411734700202942,
|
10500 |
+
"logps/chosen": -131.54576110839844,
|
10501 |
+
"logps/rejected": -182.11221313476562,
|
10502 |
+
"loss": 0.4274,
|
10503 |
+
"rewards/accuracies": 0.875,
|
10504 |
+
"rewards/chosen": -0.6956377029418945,
|
10505 |
+
"rewards/margins": 1.3419928550720215,
|
10506 |
+
"rewards/rejected": -2.037630558013916,
|
10507 |
+
"step": 1372
|
10508 |
+
},
|
10509 |
+
{
|
10510 |
+
"epoch": 1.584889337466657,
|
10511 |
+
"grad_norm": 45.40441226988886,
|
10512 |
+
"learning_rate": 2.282121823356443e-08,
|
10513 |
+
"logits/chosen": -1.382628321647644,
|
10514 |
+
"logits/rejected": -1.3758268356323242,
|
10515 |
+
"logps/chosen": -180.58364868164062,
|
10516 |
+
"logps/rejected": -184.94630432128906,
|
10517 |
+
"loss": 0.3771,
|
10518 |
+
"rewards/accuracies": 0.8125,
|
10519 |
+
"rewards/chosen": -0.8638473749160767,
|
10520 |
+
"rewards/margins": 1.0921387672424316,
|
10521 |
+
"rewards/rejected": -1.9559861421585083,
|
10522 |
+
"step": 1374
|
10523 |
+
},
|
10524 |
+
{
|
10525 |
+
"epoch": 1.5871963088457934,
|
10526 |
+
"grad_norm": 53.33501581540229,
|
10527 |
+
"learning_rate": 2.2576977541146192e-08,
|
10528 |
+
"logits/chosen": -1.4102963209152222,
|
10529 |
+
"logits/rejected": -1.3674509525299072,
|
10530 |
+
"logps/chosen": -155.62570190429688,
|
10531 |
+
"logps/rejected": -192.2836151123047,
|
10532 |
+
"loss": 0.4065,
|
10533 |
+
"rewards/accuracies": 0.84375,
|
10534 |
+
"rewards/chosen": -0.7117963433265686,
|
10535 |
+
"rewards/margins": 1.3161264657974243,
|
10536 |
+
"rewards/rejected": -2.0279228687286377,
|
10537 |
+
"step": 1376
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 1.5895032802249296,
|
10541 |
+
"grad_norm": 51.75166271383647,
|
10542 |
+
"learning_rate": 2.233388444469365e-08,
|
10543 |
+
"logits/chosen": -1.4923657178878784,
|
10544 |
+
"logits/rejected": -1.4987457990646362,
|
10545 |
+
"logps/chosen": -155.1583709716797,
|
10546 |
+
"logps/rejected": -197.0376434326172,
|
10547 |
+
"loss": 0.3875,
|
10548 |
+
"rewards/accuracies": 0.875,
|
10549 |
+
"rewards/chosen": -0.562934398651123,
|
10550 |
+
"rewards/margins": 1.135040044784546,
|
10551 |
+
"rewards/rejected": -1.6979745626449585,
|
10552 |
+
"step": 1378
|
10553 |
+
},
|
10554 |
+
{
|
10555 |
+
"epoch": 1.5918102516040662,
|
10556 |
+
"grad_norm": 42.14115881844709,
|
10557 |
+
"learning_rate": 2.2091942547432952e-08,
|
10558 |
+
"logits/chosen": -1.3474974632263184,
|
10559 |
+
"logits/rejected": -1.4093090295791626,
|
10560 |
+
"logps/chosen": -168.27703857421875,
|
10561 |
+
"logps/rejected": -235.56069946289062,
|
10562 |
+
"loss": 0.3403,
|
10563 |
+
"rewards/accuracies": 0.875,
|
10564 |
+
"rewards/chosen": -0.7459644675254822,
|
10565 |
+
"rewards/margins": 1.911803960800171,
|
10566 |
+
"rewards/rejected": -2.6577682495117188,
|
10567 |
+
"step": 1380
|
10568 |
+
},
|
10569 |
+
{
|
10570 |
+
"epoch": 1.5941172229832024,
|
10571 |
+
"grad_norm": 45.28202449928773,
|
10572 |
+
"learning_rate": 2.185115543552668e-08,
|
10573 |
+
"logits/chosen": -1.3790557384490967,
|
10574 |
+
"logits/rejected": -1.3599779605865479,
|
10575 |
+
"logps/chosen": -207.94744873046875,
|
10576 |
+
"logps/rejected": -290.3175048828125,
|
10577 |
+
"loss": 0.349,
|
10578 |
+
"rewards/accuracies": 0.8125,
|
10579 |
+
"rewards/chosen": -1.0387717485427856,
|
10580 |
+
"rewards/margins": 1.9479621648788452,
|
10581 |
+
"rewards/rejected": -2.986733913421631,
|
10582 |
+
"step": 1382
|
10583 |
+
},
|
10584 |
+
{
|
10585 |
+
"epoch": 1.5964241943623387,
|
10586 |
+
"grad_norm": 46.671094507627366,
|
10587 |
+
"learning_rate": 2.161152667802065e-08,
|
10588 |
+
"logits/chosen": -1.4180598258972168,
|
10589 |
+
"logits/rejected": -1.4577977657318115,
|
10590 |
+
"logps/chosen": -204.82492065429688,
|
10591 |
+
"logps/rejected": -241.60826110839844,
|
10592 |
+
"loss": 0.3675,
|
10593 |
+
"rewards/accuracies": 0.875,
|
10594 |
+
"rewards/chosen": -1.1089719533920288,
|
10595 |
+
"rewards/margins": 1.2917826175689697,
|
10596 |
+
"rewards/rejected": -2.400754690170288,
|
10597 |
+
"step": 1384
|
10598 |
+
},
|
10599 |
+
{
|
10600 |
+
"epoch": 1.5987311657414751,
|
10601 |
+
"grad_norm": 56.85629358101348,
|
10602 |
+
"learning_rate": 2.137305982679114e-08,
|
10603 |
+
"logits/chosen": -1.3005365133285522,
|
10604 |
+
"logits/rejected": -1.319321632385254,
|
10605 |
+
"logps/chosen": -199.8241729736328,
|
10606 |
+
"logps/rejected": -261.0871887207031,
|
10607 |
+
"loss": 0.4032,
|
10608 |
+
"rewards/accuracies": 0.84375,
|
10609 |
+
"rewards/chosen": -1.0821079015731812,
|
10610 |
+
"rewards/margins": 1.5878570079803467,
|
10611 |
+
"rewards/rejected": -2.6699647903442383,
|
10612 |
+
"step": 1386
|
10613 |
+
},
|
10614 |
+
{
|
10615 |
+
"epoch": 1.6010381371206113,
|
10616 |
+
"grad_norm": 46.07174902767819,
|
10617 |
+
"learning_rate": 2.1135758416492165e-08,
|
10618 |
+
"logits/chosen": -1.5705550909042358,
|
10619 |
+
"logits/rejected": -1.4742302894592285,
|
10620 |
+
"logps/chosen": -198.67947387695312,
|
10621 |
+
"logps/rejected": -229.13302612304688,
|
10622 |
+
"loss": 0.3705,
|
10623 |
+
"rewards/accuracies": 0.9375,
|
10624 |
+
"rewards/chosen": -0.9288389682769775,
|
10625 |
+
"rewards/margins": 1.4559340476989746,
|
10626 |
+
"rewards/rejected": -2.384772777557373,
|
10627 |
+
"step": 1388
|
10628 |
+
},
|
10629 |
+
{
|
10630 |
+
"epoch": 1.6033451084997477,
|
10631 |
+
"grad_norm": 42.39566003219877,
|
10632 |
+
"learning_rate": 2.089962596450311e-08,
|
10633 |
+
"logits/chosen": -1.4445676803588867,
|
10634 |
+
"logits/rejected": -1.5513989925384521,
|
10635 |
+
"logps/chosen": -166.5742950439453,
|
10636 |
+
"logps/rejected": -210.93817138671875,
|
10637 |
+
"loss": 0.3723,
|
10638 |
+
"rewards/accuracies": 0.75,
|
10639 |
+
"rewards/chosen": -1.0038912296295166,
|
10640 |
+
"rewards/margins": 1.1079350709915161,
|
10641 |
+
"rewards/rejected": -2.1118264198303223,
|
10642 |
+
"step": 1390
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 1.605652079878884,
|
10646 |
+
"grad_norm": 45.674613502568626,
|
10647 |
+
"learning_rate": 2.0664665970876492e-08,
|
10648 |
+
"logits/chosen": -1.4405865669250488,
|
10649 |
+
"logits/rejected": -1.3463035821914673,
|
10650 |
+
"logps/chosen": -192.42994689941406,
|
10651 |
+
"logps/rejected": -198.7202606201172,
|
10652 |
+
"loss": 0.3608,
|
10653 |
+
"rewards/accuracies": 0.875,
|
10654 |
+
"rewards/chosen": -0.9397283792495728,
|
10655 |
+
"rewards/margins": 1.1064425706863403,
|
10656 |
+
"rewards/rejected": -2.046170949935913,
|
10657 |
+
"step": 1392
|
10658 |
+
},
|
10659 |
+
{
|
10660 |
+
"epoch": 1.6079590512580202,
|
10661 |
+
"grad_norm": 51.58508472735133,
|
10662 |
+
"learning_rate": 2.043088191828627e-08,
|
10663 |
+
"logits/chosen": -1.4186500310897827,
|
10664 |
+
"logits/rejected": -1.4255372285842896,
|
10665 |
+
"logps/chosen": -118.36923217773438,
|
10666 |
+
"logps/rejected": -183.34219360351562,
|
10667 |
+
"loss": 0.4034,
|
10668 |
+
"rewards/accuracies": 0.875,
|
10669 |
+
"rewards/chosen": -0.6923995018005371,
|
10670 |
+
"rewards/margins": 1.1543622016906738,
|
10671 |
+
"rewards/rejected": -1.84676194190979,
|
10672 |
+
"step": 1394
|
10673 |
+
},
|
10674 |
+
{
|
10675 |
+
"epoch": 1.6102660226371568,
|
10676 |
+
"grad_norm": 44.70959574418251,
|
10677 |
+
"learning_rate": 2.019827727197605e-08,
|
10678 |
+
"logits/chosen": -1.4978314638137817,
|
10679 |
+
"logits/rejected": -1.4510836601257324,
|
10680 |
+
"logps/chosen": -230.2447967529297,
|
10681 |
+
"logps/rejected": -276.96380615234375,
|
10682 |
+
"loss": 0.3901,
|
10683 |
+
"rewards/accuracies": 0.875,
|
10684 |
+
"rewards/chosen": -0.9087953567504883,
|
10685 |
+
"rewards/margins": 1.4139595031738281,
|
10686 |
+
"rewards/rejected": -2.3227550983428955,
|
10687 |
+
"step": 1396
|
10688 |
+
},
|
10689 |
+
{
|
10690 |
+
"epoch": 1.612572994016293,
|
10691 |
+
"grad_norm": 44.12838016221303,
|
10692 |
+
"learning_rate": 1.9966855479707868e-08,
|
10693 |
+
"logits/chosen": -1.327124834060669,
|
10694 |
+
"logits/rejected": -1.3261754512786865,
|
10695 |
+
"logps/chosen": -154.4375,
|
10696 |
+
"logps/rejected": -215.845458984375,
|
10697 |
+
"loss": 0.4076,
|
10698 |
+
"rewards/accuracies": 0.8125,
|
10699 |
+
"rewards/chosen": -0.8609018921852112,
|
10700 |
+
"rewards/margins": 1.4171010255813599,
|
10701 |
+
"rewards/rejected": -2.278002977371216,
|
10702 |
+
"step": 1398
|
10703 |
+
},
|
10704 |
+
{
|
10705 |
+
"epoch": 1.6148799653954293,
|
10706 |
+
"grad_norm": 44.23536998519936,
|
10707 |
+
"learning_rate": 1.9736619971710888e-08,
|
10708 |
+
"logits/chosen": -1.3296552896499634,
|
10709 |
+
"logits/rejected": -1.3575528860092163,
|
10710 |
+
"logps/chosen": -153.06039428710938,
|
10711 |
+
"logps/rejected": -195.69317626953125,
|
10712 |
+
"loss": 0.3428,
|
10713 |
+
"rewards/accuracies": 0.9375,
|
10714 |
+
"rewards/chosen": -0.6315370202064514,
|
10715 |
+
"rewards/margins": 1.1925398111343384,
|
10716 |
+
"rewards/rejected": -1.8240768909454346,
|
10717 |
+
"step": 1400
|
10718 |
+
},
|
10719 |
+
{
|
10720 |
+
"epoch": 1.6148799653954293,
|
10721 |
+
"eval_logits/chosen": -1.3480746746063232,
|
10722 |
+
"eval_logits/rejected": -1.2724710702896118,
|
10723 |
+
"eval_logps/chosen": -198.91085815429688,
|
10724 |
+
"eval_logps/rejected": -171.2626495361328,
|
10725 |
+
"eval_loss": 0.5261049866676331,
|
10726 |
+
"eval_rewards/accuracies": 0.6800000071525574,
|
10727 |
+
"eval_rewards/chosen": -1.3728693723678589,
|
10728 |
+
"eval_rewards/margins": 0.7018558382987976,
|
10729 |
+
"eval_rewards/rejected": -2.0747251510620117,
|
10730 |
+
"eval_runtime": 22.7346,
|
10731 |
+
"eval_samples_per_second": 4.399,
|
10732 |
+
"eval_steps_per_second": 1.1,
|
10733 |
+
"step": 1400
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 1.6171869367745657,
|
10737 |
+
"grad_norm": 52.08080969982687,
|
10738 |
+
"learning_rate": 1.9507574160630767e-08,
|
10739 |
+
"logits/chosen": -1.2814185619354248,
|
10740 |
+
"logits/rejected": -1.2460808753967285,
|
10741 |
+
"logps/chosen": -144.47117614746094,
|
10742 |
+
"logps/rejected": -188.13999938964844,
|
10743 |
+
"loss": 0.384,
|
10744 |
+
"rewards/accuracies": 0.875,
|
10745 |
+
"rewards/chosen": -0.7697983980178833,
|
10746 |
+
"rewards/margins": 1.3541502952575684,
|
10747 |
+
"rewards/rejected": -2.123948574066162,
|
10748 |
+
"step": 1402
|
10749 |
+
},
|
10750 |
+
{
|
10751 |
+
"epoch": 1.6194939081537019,
|
10752 |
+
"grad_norm": 45.36665871108164,
|
10753 |
+
"learning_rate": 1.9279721441479046e-08,
|
10754 |
+
"logits/chosen": -1.4106312990188599,
|
10755 |
+
"logits/rejected": -1.458733320236206,
|
10756 |
+
"logps/chosen": -192.62257385253906,
|
10757 |
+
"logps/rejected": -218.3051300048828,
|
10758 |
+
"loss": 0.4132,
|
10759 |
+
"rewards/accuracies": 0.84375,
|
10760 |
+
"rewards/chosen": -1.014463186264038,
|
10761 |
+
"rewards/margins": 1.271317958831787,
|
10762 |
+
"rewards/rejected": -2.2857813835144043,
|
10763 |
+
"step": 1404
|
10764 |
+
},
|
10765 |
+
{
|
10766 |
+
"epoch": 1.6218008795328382,
|
10767 |
+
"grad_norm": 58.50577692485291,
|
10768 |
+
"learning_rate": 1.9053065191582606e-08,
|
10769 |
+
"logits/chosen": -1.5028626918792725,
|
10770 |
+
"logits/rejected": -1.3959300518035889,
|
10771 |
+
"logps/chosen": -174.7528076171875,
|
10772 |
+
"logps/rejected": -221.61056518554688,
|
10773 |
+
"loss": 0.3783,
|
10774 |
+
"rewards/accuracies": 0.875,
|
10775 |
+
"rewards/chosen": -0.924181342124939,
|
10776 |
+
"rewards/margins": 1.244417428970337,
|
10777 |
+
"rewards/rejected": -2.1685988903045654,
|
10778 |
+
"step": 1406
|
10779 |
+
},
|
10780 |
+
{
|
10781 |
+
"epoch": 1.6241078509119746,
|
10782 |
+
"grad_norm": 55.69642576738633,
|
10783 |
+
"learning_rate": 1.8827608770533877e-08,
|
10784 |
+
"logits/chosen": -1.301309585571289,
|
10785 |
+
"logits/rejected": -1.219205617904663,
|
10786 |
+
"logps/chosen": -178.25352478027344,
|
10787 |
+
"logps/rejected": -169.32664489746094,
|
10788 |
+
"loss": 0.4345,
|
10789 |
+
"rewards/accuracies": 0.71875,
|
10790 |
+
"rewards/chosen": -0.880225419998169,
|
10791 |
+
"rewards/margins": 0.7701900005340576,
|
10792 |
+
"rewards/rejected": -1.6504155397415161,
|
10793 |
+
"step": 1408
|
10794 |
+
},
|
10795 |
+
{
|
10796 |
+
"epoch": 1.6264148222911108,
|
10797 |
+
"grad_norm": 50.05529030380214,
|
10798 |
+
"learning_rate": 1.8603355520140895e-08,
|
10799 |
+
"logits/chosen": -1.368369221687317,
|
10800 |
+
"logits/rejected": -1.25881826877594,
|
10801 |
+
"logps/chosen": -166.48806762695312,
|
10802 |
+
"logps/rejected": -160.21493530273438,
|
10803 |
+
"loss": 0.4011,
|
10804 |
+
"rewards/accuracies": 0.90625,
|
10805 |
+
"rewards/chosen": -1.0832921266555786,
|
10806 |
+
"rewards/margins": 1.3331881761550903,
|
10807 |
+
"rewards/rejected": -2.416480302810669,
|
10808 |
+
"step": 1410
|
10809 |
+
},
|
10810 |
+
{
|
10811 |
+
"epoch": 1.6287217936702474,
|
10812 |
+
"grad_norm": 35.31423347756389,
|
10813 |
+
"learning_rate": 1.838030876437784e-08,
|
10814 |
+
"logits/chosen": -1.4292563199996948,
|
10815 |
+
"logits/rejected": -1.424263596534729,
|
10816 |
+
"logps/chosen": -182.7539825439453,
|
10817 |
+
"logps/rejected": -216.91290283203125,
|
10818 |
+
"loss": 0.3719,
|
10819 |
+
"rewards/accuracies": 0.84375,
|
10820 |
+
"rewards/chosen": -0.6027979254722595,
|
10821 |
+
"rewards/margins": 1.2666302919387817,
|
10822 |
+
"rewards/rejected": -1.8694281578063965,
|
10823 |
+
"step": 1412
|
10824 |
+
},
|
10825 |
+
{
|
10826 |
+
"epoch": 1.6310287650493835,
|
10827 |
+
"grad_norm": 59.70248933056549,
|
10828 |
+
"learning_rate": 1.815847180933565e-08,
|
10829 |
+
"logits/chosen": -1.3337175846099854,
|
10830 |
+
"logits/rejected": -1.327998161315918,
|
10831 |
+
"logps/chosen": -158.14678955078125,
|
10832 |
+
"logps/rejected": -170.66796875,
|
10833 |
+
"loss": 0.4364,
|
10834 |
+
"rewards/accuracies": 0.78125,
|
10835 |
+
"rewards/chosen": -0.9497777819633484,
|
10836 |
+
"rewards/margins": 1.0439199209213257,
|
10837 |
+
"rewards/rejected": -1.9936977624893188,
|
10838 |
+
"step": 1414
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 1.63333573642852,
|
10842 |
+
"grad_norm": 57.90179651786705,
|
10843 |
+
"learning_rate": 1.793784794317319e-08,
|
10844 |
+
"logits/chosen": -1.3915679454803467,
|
10845 |
+
"logits/rejected": -1.3838558197021484,
|
10846 |
+
"logps/chosen": -183.7068634033203,
|
10847 |
+
"logps/rejected": -220.16253662109375,
|
10848 |
+
"loss": 0.4023,
|
10849 |
+
"rewards/accuracies": 0.84375,
|
10850 |
+
"rewards/chosen": -0.9595727920532227,
|
10851 |
+
"rewards/margins": 1.3129405975341797,
|
10852 |
+
"rewards/rejected": -2.2725133895874023,
|
10853 |
+
"step": 1416
|
10854 |
+
},
|
10855 |
+
{
|
10856 |
+
"epoch": 1.6356427078076563,
|
10857 |
+
"grad_norm": 51.167487196457614,
|
10858 |
+
"learning_rate": 1.7718440436068382e-08,
|
10859 |
+
"logits/chosen": -1.3140525817871094,
|
10860 |
+
"logits/rejected": -1.340272068977356,
|
10861 |
+
"logps/chosen": -176.0500030517578,
|
10862 |
+
"logps/rejected": -204.65216064453125,
|
10863 |
+
"loss": 0.474,
|
10864 |
+
"rewards/accuracies": 0.8125,
|
10865 |
+
"rewards/chosen": -0.8645345568656921,
|
10866 |
+
"rewards/margins": 1.0821396112442017,
|
10867 |
+
"rewards/rejected": -1.9466743469238281,
|
10868 |
+
"step": 1418
|
10869 |
+
},
|
10870 |
+
{
|
10871 |
+
"epoch": 1.6379496791867925,
|
10872 |
+
"grad_norm": 56.94252600307747,
|
10873 |
+
"learning_rate": 1.750025254016978e-08,
|
10874 |
+
"logits/chosen": -1.2692408561706543,
|
10875 |
+
"logits/rejected": -1.3052905797958374,
|
10876 |
+
"logps/chosen": -175.3609161376953,
|
10877 |
+
"logps/rejected": -211.70640563964844,
|
10878 |
+
"loss": 0.3708,
|
10879 |
+
"rewards/accuracies": 0.84375,
|
10880 |
+
"rewards/chosen": -0.8849305510520935,
|
10881 |
+
"rewards/margins": 1.1714421510696411,
|
10882 |
+
"rewards/rejected": -2.05637264251709,
|
10883 |
+
"step": 1420
|
10884 |
+
},
|
10885 |
+
{
|
10886 |
+
"epoch": 1.640256650565929,
|
10887 |
+
"grad_norm": 44.444243917868796,
|
10888 |
+
"learning_rate": 1.7283287489548316e-08,
|
10889 |
+
"logits/chosen": -1.4492988586425781,
|
10890 |
+
"logits/rejected": -1.4249682426452637,
|
10891 |
+
"logps/chosen": -196.2264404296875,
|
10892 |
+
"logps/rejected": -216.29348754882812,
|
10893 |
+
"loss": 0.3663,
|
10894 |
+
"rewards/accuracies": 0.9375,
|
10895 |
+
"rewards/chosen": -0.5758548974990845,
|
10896 |
+
"rewards/margins": 1.248561143875122,
|
10897 |
+
"rewards/rejected": -1.824416160583496,
|
10898 |
+
"step": 1422
|
10899 |
+
},
|
10900 |
+
{
|
10901 |
+
"epoch": 1.6425636219450652,
|
10902 |
+
"grad_norm": 52.679148376432316,
|
10903 |
+
"learning_rate": 1.7067548500149453e-08,
|
10904 |
+
"logits/chosen": -1.359799861907959,
|
10905 |
+
"logits/rejected": -1.344727635383606,
|
10906 |
+
"logps/chosen": -164.31396484375,
|
10907 |
+
"logps/rejected": -226.0536651611328,
|
10908 |
+
"loss": 0.378,
|
10909 |
+
"rewards/accuracies": 0.9375,
|
10910 |
+
"rewards/chosen": -0.9102058410644531,
|
10911 |
+
"rewards/margins": 1.4322762489318848,
|
10912 |
+
"rewards/rejected": -2.342482328414917,
|
10913 |
+
"step": 1424
|
10914 |
+
},
|
10915 |
+
{
|
10916 |
+
"epoch": 1.6448705933242016,
|
10917 |
+
"grad_norm": 49.61927724673604,
|
10918 |
+
"learning_rate": 1.6853038769745463e-08,
|
10919 |
+
"logits/chosen": -1.2480251789093018,
|
10920 |
+
"logits/rejected": -1.3352330923080444,
|
10921 |
+
"logps/chosen": -154.8423614501953,
|
10922 |
+
"logps/rejected": -193.94419860839844,
|
10923 |
+
"loss": 0.4382,
|
10924 |
+
"rewards/accuracies": 0.84375,
|
10925 |
+
"rewards/chosen": -0.9381543397903442,
|
10926 |
+
"rewards/margins": 1.1628456115722656,
|
10927 |
+
"rewards/rejected": -2.1010000705718994,
|
10928 |
+
"step": 1426
|
10929 |
+
},
|
10930 |
+
{
|
10931 |
+
"epoch": 1.647177564703338,
|
10932 |
+
"grad_norm": 49.640113889302896,
|
10933 |
+
"learning_rate": 1.663976147788806e-08,
|
10934 |
+
"logits/chosen": -1.3284053802490234,
|
10935 |
+
"logits/rejected": -1.3385878801345825,
|
10936 |
+
"logps/chosen": -165.2407989501953,
|
10937 |
+
"logps/rejected": -197.4837188720703,
|
10938 |
+
"loss": 0.3626,
|
10939 |
+
"rewards/accuracies": 0.84375,
|
10940 |
+
"rewards/chosen": -0.84311842918396,
|
10941 |
+
"rewards/margins": 1.2521381378173828,
|
10942 |
+
"rewards/rejected": -2.0952565670013428,
|
10943 |
+
"step": 1428
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 1.6494845360824741,
|
10947 |
+
"grad_norm": 43.114553829090994,
|
10948 |
+
"learning_rate": 1.642771978586116e-08,
|
10949 |
+
"logits/chosen": -1.2242742776870728,
|
10950 |
+
"logits/rejected": -1.2630822658538818,
|
10951 |
+
"logps/chosen": -154.83834838867188,
|
10952 |
+
"logps/rejected": -206.02879333496094,
|
10953 |
+
"loss": 0.427,
|
10954 |
+
"rewards/accuracies": 0.84375,
|
10955 |
+
"rewards/chosen": -0.9801341891288757,
|
10956 |
+
"rewards/margins": 1.2844398021697998,
|
10957 |
+
"rewards/rejected": -2.264573812484741,
|
10958 |
+
"step": 1430
|
10959 |
+
},
|
10960 |
+
{
|
10961 |
+
"epoch": 1.6517915074616105,
|
10962 |
+
"grad_norm": 51.33726232803117,
|
10963 |
+
"learning_rate": 1.6216916836634177e-08,
|
10964 |
+
"logits/chosen": -1.284468650817871,
|
10965 |
+
"logits/rejected": -1.2946577072143555,
|
10966 |
+
"logps/chosen": -209.00216674804688,
|
10967 |
+
"logps/rejected": -308.66400146484375,
|
10968 |
+
"loss": 0.36,
|
10969 |
+
"rewards/accuracies": 0.96875,
|
10970 |
+
"rewards/chosen": -1.1411385536193848,
|
10971 |
+
"rewards/margins": 1.973024606704712,
|
10972 |
+
"rewards/rejected": -3.114163637161255,
|
10973 |
+
"step": 1432
|
10974 |
+
},
|
10975 |
+
{
|
10976 |
+
"epoch": 1.654098478840747,
|
10977 |
+
"grad_norm": 48.222540985367566,
|
10978 |
+
"learning_rate": 1.6007355754815378e-08,
|
10979 |
+
"logits/chosen": -1.3908207416534424,
|
10980 |
+
"logits/rejected": -1.3934330940246582,
|
10981 |
+
"logps/chosen": -152.45028686523438,
|
10982 |
+
"logps/rejected": -180.3572235107422,
|
10983 |
+
"loss": 0.3947,
|
10984 |
+
"rewards/accuracies": 0.78125,
|
10985 |
+
"rewards/chosen": -0.8398849368095398,
|
10986 |
+
"rewards/margins": 0.8340297341346741,
|
10987 |
+
"rewards/rejected": -1.6739145517349243,
|
10988 |
+
"step": 1434
|
10989 |
+
},
|
10990 |
+
{
|
10991 |
+
"epoch": 1.656405450219883,
|
10992 |
+
"grad_norm": 46.78220848929133,
|
10993 |
+
"learning_rate": 1.5799039646605484e-08,
|
10994 |
+
"logits/chosen": -1.4227409362792969,
|
10995 |
+
"logits/rejected": -1.341314673423767,
|
10996 |
+
"logps/chosen": -119.38903045654297,
|
10997 |
+
"logps/rejected": -148.2276153564453,
|
10998 |
+
"loss": 0.4058,
|
10999 |
+
"rewards/accuracies": 0.84375,
|
11000 |
+
"rewards/chosen": -0.9205418825149536,
|
11001 |
+
"rewards/margins": 1.0745038986206055,
|
11002 |
+
"rewards/rejected": -1.995045781135559,
|
11003 |
+
"step": 1436
|
11004 |
+
},
|
11005 |
+
{
|
11006 |
+
"epoch": 1.6587124215990197,
|
11007 |
+
"grad_norm": 42.27815969127513,
|
11008 |
+
"learning_rate": 1.5591971599751795e-08,
|
11009 |
+
"logits/chosen": -1.243879795074463,
|
11010 |
+
"logits/rejected": -1.291917324066162,
|
11011 |
+
"logps/chosen": -138.65306091308594,
|
11012 |
+
"logps/rejected": -196.24070739746094,
|
11013 |
+
"loss": 0.4255,
|
11014 |
+
"rewards/accuracies": 0.875,
|
11015 |
+
"rewards/chosen": -0.8021218776702881,
|
11016 |
+
"rewards/margins": 1.5537731647491455,
|
11017 |
+
"rewards/rejected": -2.3558952808380127,
|
11018 |
+
"step": 1438
|
11019 |
+
},
|
11020 |
+
{
|
11021 |
+
"epoch": 1.6610193929781558,
|
11022 |
+
"grad_norm": 48.207824810254635,
|
11023 |
+
"learning_rate": 1.5386154683502274e-08,
|
11024 |
+
"logits/chosen": -1.4134782552719116,
|
11025 |
+
"logits/rejected": -1.3608386516571045,
|
11026 |
+
"logps/chosen": -188.5570068359375,
|
11027 |
+
"logps/rejected": -204.60919189453125,
|
11028 |
+
"loss": 0.4035,
|
11029 |
+
"rewards/accuracies": 0.8125,
|
11030 |
+
"rewards/chosen": -1.1449999809265137,
|
11031 |
+
"rewards/margins": 1.1091419458389282,
|
11032 |
+
"rewards/rejected": -2.2541420459747314,
|
11033 |
+
"step": 1440
|
11034 |
+
},
|
11035 |
+
{
|
11036 |
+
"epoch": 1.6633263643572922,
|
11037 |
+
"grad_norm": 57.39443136319576,
|
11038 |
+
"learning_rate": 1.5181591948560158e-08,
|
11039 |
+
"logits/chosen": -1.3800638914108276,
|
11040 |
+
"logits/rejected": -1.3380552530288696,
|
11041 |
+
"logps/chosen": -193.9412841796875,
|
11042 |
+
"logps/rejected": -215.79400634765625,
|
11043 |
+
"loss": 0.3816,
|
11044 |
+
"rewards/accuracies": 0.875,
|
11045 |
+
"rewards/chosen": -0.9195079803466797,
|
11046 |
+
"rewards/margins": 1.2903274297714233,
|
11047 |
+
"rewards/rejected": -2.2098352909088135,
|
11048 |
+
"step": 1442
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 1.6656333357364286,
|
11052 |
+
"grad_norm": 43.25151944767239,
|
11053 |
+
"learning_rate": 1.49782864270386e-08,
|
11054 |
+
"logits/chosen": -1.4888612031936646,
|
11055 |
+
"logits/rejected": -1.4488850831985474,
|
11056 |
+
"logps/chosen": -151.032470703125,
|
11057 |
+
"logps/rejected": -182.21556091308594,
|
11058 |
+
"loss": 0.3453,
|
11059 |
+
"rewards/accuracies": 0.8125,
|
11060 |
+
"rewards/chosen": -1.026658535003662,
|
11061 |
+
"rewards/margins": 1.0582255125045776,
|
11062 |
+
"rewards/rejected": -2.0848841667175293,
|
11063 |
+
"step": 1444
|
11064 |
+
},
|
11065 |
+
{
|
11066 |
+
"epoch": 1.6679403071155647,
|
11067 |
+
"grad_norm": 38.80329184083557,
|
11068 |
+
"learning_rate": 1.4776241132415911e-08,
|
11069 |
+
"logits/chosen": -1.3972203731536865,
|
11070 |
+
"logits/rejected": -1.3395717144012451,
|
11071 |
+
"logps/chosen": -219.14697265625,
|
11072 |
+
"logps/rejected": -254.46820068359375,
|
11073 |
+
"loss": 0.3645,
|
11074 |
+
"rewards/accuracies": 0.9375,
|
11075 |
+
"rewards/chosen": -1.0151716470718384,
|
11076 |
+
"rewards/margins": 1.6079694032669067,
|
11077 |
+
"rewards/rejected": -2.623141050338745,
|
11078 |
+
"step": 1446
|
11079 |
+
},
|
11080 |
+
{
|
11081 |
+
"epoch": 1.6702472784947013,
|
11082 |
+
"grad_norm": 54.87680843012438,
|
11083 |
+
"learning_rate": 1.4575459059490769e-08,
|
11084 |
+
"logits/chosen": -1.3980488777160645,
|
11085 |
+
"logits/rejected": -1.5601296424865723,
|
11086 |
+
"logps/chosen": -173.1259765625,
|
11087 |
+
"logps/rejected": -261.9532470703125,
|
11088 |
+
"loss": 0.4345,
|
11089 |
+
"rewards/accuracies": 0.90625,
|
11090 |
+
"rewards/chosen": -0.8360046744346619,
|
11091 |
+
"rewards/margins": 1.6964924335479736,
|
11092 |
+
"rewards/rejected": -2.5324971675872803,
|
11093 |
+
"step": 1448
|
11094 |
+
},
|
11095 |
+
{
|
11096 |
+
"epoch": 1.6725542498738375,
|
11097 |
+
"grad_norm": 56.403772234535865,
|
11098 |
+
"learning_rate": 1.4375943184337869e-08,
|
11099 |
+
"logits/chosen": -1.2514413595199585,
|
11100 |
+
"logits/rejected": -1.2857670783996582,
|
11101 |
+
"logps/chosen": -148.14613342285156,
|
11102 |
+
"logps/rejected": -192.71554565429688,
|
11103 |
+
"loss": 0.3637,
|
11104 |
+
"rewards/accuracies": 0.84375,
|
11105 |
+
"rewards/chosen": -0.924572229385376,
|
11106 |
+
"rewards/margins": 1.3504137992858887,
|
11107 |
+
"rewards/rejected": -2.2749857902526855,
|
11108 |
+
"step": 1450
|
11109 |
+
},
|
11110 |
+
{
|
11111 |
+
"epoch": 1.6748612212529739,
|
11112 |
+
"grad_norm": 41.663170151108154,
|
11113 |
+
"learning_rate": 1.4177696464263722e-08,
|
11114 |
+
"logits/chosen": -1.554652452468872,
|
11115 |
+
"logits/rejected": -1.556633710861206,
|
11116 |
+
"logps/chosen": -173.3590545654297,
|
11117 |
+
"logps/rejected": -214.38238525390625,
|
11118 |
+
"loss": 0.437,
|
11119 |
+
"rewards/accuracies": 0.6875,
|
11120 |
+
"rewards/chosen": -0.890671968460083,
|
11121 |
+
"rewards/margins": 1.1289126873016357,
|
11122 |
+
"rewards/rejected": -2.0195844173431396,
|
11123 |
+
"step": 1452
|
11124 |
+
},
|
11125 |
+
{
|
11126 |
+
"epoch": 1.6771681926321103,
|
11127 |
+
"grad_norm": 55.4468403681626,
|
11128 |
+
"learning_rate": 1.3980721837763032e-08,
|
11129 |
+
"logits/chosen": -1.522512674331665,
|
11130 |
+
"logits/rejected": -1.4707545042037964,
|
11131 |
+
"logps/chosen": -187.02899169921875,
|
11132 |
+
"logps/rejected": -202.90972900390625,
|
11133 |
+
"loss": 0.385,
|
11134 |
+
"rewards/accuracies": 0.96875,
|
11135 |
+
"rewards/chosen": -1.1045527458190918,
|
11136 |
+
"rewards/margins": 1.0547027587890625,
|
11137 |
+
"rewards/rejected": -2.159255266189575,
|
11138 |
+
"step": 1454
|
11139 |
+
},
|
11140 |
+
{
|
11141 |
+
"epoch": 1.6794751640112464,
|
11142 |
+
"grad_norm": 45.11818002402701,
|
11143 |
+
"learning_rate": 1.378502222447494e-08,
|
11144 |
+
"logits/chosen": -1.2670139074325562,
|
11145 |
+
"logits/rejected": -1.2748316526412964,
|
11146 |
+
"logps/chosen": -163.23745727539062,
|
11147 |
+
"logps/rejected": -186.1403350830078,
|
11148 |
+
"loss": 0.3824,
|
11149 |
+
"rewards/accuracies": 0.90625,
|
11150 |
+
"rewards/chosen": -0.9486604928970337,
|
11151 |
+
"rewards/margins": 1.1920839548110962,
|
11152 |
+
"rewards/rejected": -2.14074444770813,
|
11153 |
+
"step": 1456
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 1.6817821353903828,
|
11157 |
+
"grad_norm": 45.36839329689688,
|
11158 |
+
"learning_rate": 1.3590600525139762e-08,
|
11159 |
+
"logits/chosen": -1.427920937538147,
|
11160 |
+
"logits/rejected": -1.4643090963363647,
|
11161 |
+
"logps/chosen": -152.86471557617188,
|
11162 |
+
"logps/rejected": -143.46755981445312,
|
11163 |
+
"loss": 0.4027,
|
11164 |
+
"rewards/accuracies": 0.78125,
|
11165 |
+
"rewards/chosen": -0.7307279109954834,
|
11166 |
+
"rewards/margins": 1.0662072896957397,
|
11167 |
+
"rewards/rejected": -1.7969350814819336,
|
11168 |
+
"step": 1458
|
11169 |
+
},
|
11170 |
+
{
|
11171 |
+
"epoch": 1.6840891067695192,
|
11172 |
+
"grad_norm": 42.1130276807721,
|
11173 |
+
"learning_rate": 1.3397459621556128e-08,
|
11174 |
+
"logits/chosen": -1.4458112716674805,
|
11175 |
+
"logits/rejected": -1.405861496925354,
|
11176 |
+
"logps/chosen": -204.7304229736328,
|
11177 |
+
"logps/rejected": -236.4501953125,
|
11178 |
+
"loss": 0.37,
|
11179 |
+
"rewards/accuracies": 0.90625,
|
11180 |
+
"rewards/chosen": -1.182440996170044,
|
11181 |
+
"rewards/margins": 1.2389183044433594,
|
11182 |
+
"rewards/rejected": -2.421359062194824,
|
11183 |
+
"step": 1460
|
11184 |
+
},
|
11185 |
+
{
|
11186 |
+
"epoch": 1.6863960781486553,
|
11187 |
+
"grad_norm": 47.391757305539294,
|
11188 |
+
"learning_rate": 1.320560237653816e-08,
|
11189 |
+
"logits/chosen": -1.3273866176605225,
|
11190 |
+
"logits/rejected": -1.343310832977295,
|
11191 |
+
"logps/chosen": -167.19651794433594,
|
11192 |
+
"logps/rejected": -222.67767333984375,
|
11193 |
+
"loss": 0.4054,
|
11194 |
+
"rewards/accuracies": 0.84375,
|
11195 |
+
"rewards/chosen": -0.9558027982711792,
|
11196 |
+
"rewards/margins": 1.4386895895004272,
|
11197 |
+
"rewards/rejected": -2.3944923877716064,
|
11198 |
+
"step": 1462
|
11199 |
+
},
|
11200 |
+
{
|
11201 |
+
"epoch": 1.688703049527792,
|
11202 |
+
"grad_norm": 45.31930327974149,
|
11203 |
+
"learning_rate": 1.3015031633873075e-08,
|
11204 |
+
"logits/chosen": -1.3923242092132568,
|
11205 |
+
"logits/rejected": -1.3101178407669067,
|
11206 |
+
"logps/chosen": -155.1905517578125,
|
11207 |
+
"logps/rejected": -161.7040557861328,
|
11208 |
+
"loss": 0.4541,
|
11209 |
+
"rewards/accuracies": 0.84375,
|
11210 |
+
"rewards/chosen": -0.7979952096939087,
|
11211 |
+
"rewards/margins": 0.887161135673523,
|
11212 |
+
"rewards/rejected": -1.6851563453674316,
|
11213 |
+
"step": 1464
|
11214 |
+
},
|
11215 |
+
{
|
11216 |
+
"epoch": 1.691010020906928,
|
11217 |
+
"grad_norm": 47.965640105650365,
|
11218 |
+
"learning_rate": 1.2825750218278963e-08,
|
11219 |
+
"logits/chosen": -1.376510500907898,
|
11220 |
+
"logits/rejected": -1.351030945777893,
|
11221 |
+
"logps/chosen": -184.95018005371094,
|
11222 |
+
"logps/rejected": -222.78271484375,
|
11223 |
+
"loss": 0.3602,
|
11224 |
+
"rewards/accuracies": 0.8125,
|
11225 |
+
"rewards/chosen": -1.0790867805480957,
|
11226 |
+
"rewards/margins": 1.4311017990112305,
|
11227 |
+
"rewards/rejected": -2.510188579559326,
|
11228 |
+
"step": 1466
|
11229 |
+
},
|
11230 |
+
{
|
11231 |
+
"epoch": 1.6933169922860645,
|
11232 |
+
"grad_norm": 44.34985796124818,
|
11233 |
+
"learning_rate": 1.2637760935363052e-08,
|
11234 |
+
"logits/chosen": -1.5339727401733398,
|
11235 |
+
"logits/rejected": -1.4939508438110352,
|
11236 |
+
"logps/chosen": -172.8440399169922,
|
11237 |
+
"logps/rejected": -214.15769958496094,
|
11238 |
+
"loss": 0.4033,
|
11239 |
+
"rewards/accuracies": 0.78125,
|
11240 |
+
"rewards/chosen": -0.8286362290382385,
|
11241 |
+
"rewards/margins": 1.027283787727356,
|
11242 |
+
"rewards/rejected": -1.8559203147888184,
|
11243 |
+
"step": 1468
|
11244 |
+
},
|
11245 |
+
{
|
11246 |
+
"epoch": 1.6956239636652009,
|
11247 |
+
"grad_norm": 42.43166448841978,
|
11248 |
+
"learning_rate": 1.2451066571579993e-08,
|
11249 |
+
"logits/chosen": -1.4077023267745972,
|
11250 |
+
"logits/rejected": -1.3327652215957642,
|
11251 |
+
"logps/chosen": -180.84344482421875,
|
11252 |
+
"logps/rejected": -198.8247528076172,
|
11253 |
+
"loss": 0.3264,
|
11254 |
+
"rewards/accuracies": 0.90625,
|
11255 |
+
"rewards/chosen": -0.9496182203292847,
|
11256 |
+
"rewards/margins": 1.4146476984024048,
|
11257 |
+
"rewards/rejected": -2.3642659187316895,
|
11258 |
+
"step": 1470
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 1.697930935044337,
|
11262 |
+
"grad_norm": 49.74620204940183,
|
11263 |
+
"learning_rate": 1.2265669894190667e-08,
|
11264 |
+
"logits/chosen": -1.3190773725509644,
|
11265 |
+
"logits/rejected": -1.2969920635223389,
|
11266 |
+
"logps/chosen": -210.91648864746094,
|
11267 |
+
"logps/rejected": -215.4746551513672,
|
11268 |
+
"loss": 0.4142,
|
11269 |
+
"rewards/accuracies": 0.875,
|
11270 |
+
"rewards/chosen": -1.1324176788330078,
|
11271 |
+
"rewards/margins": 1.0822185277938843,
|
11272 |
+
"rewards/rejected": -2.2146360874176025,
|
11273 |
+
"step": 1472
|
11274 |
+
},
|
11275 |
+
{
|
11276 |
+
"epoch": 1.7002379064234734,
|
11277 |
+
"grad_norm": 47.636203771282304,
|
11278 |
+
"learning_rate": 1.2081573651221034e-08,
|
11279 |
+
"logits/chosen": -1.4100513458251953,
|
11280 |
+
"logits/rejected": -1.3401373624801636,
|
11281 |
+
"logps/chosen": -198.85769653320312,
|
11282 |
+
"logps/rejected": -232.75079345703125,
|
11283 |
+
"loss": 0.4262,
|
11284 |
+
"rewards/accuracies": 0.90625,
|
11285 |
+
"rewards/chosen": -0.6716212630271912,
|
11286 |
+
"rewards/margins": 1.5022387504577637,
|
11287 |
+
"rewards/rejected": -2.1738598346710205,
|
11288 |
+
"step": 1474
|
11289 |
+
},
|
11290 |
+
{
|
11291 |
+
"epoch": 1.7025448778026098,
|
11292 |
+
"grad_norm": 41.23379055239197,
|
11293 |
+
"learning_rate": 1.1898780571421552e-08,
|
11294 |
+
"logits/chosen": -1.3071932792663574,
|
11295 |
+
"logits/rejected": -1.3262195587158203,
|
11296 |
+
"logps/chosen": -245.61962890625,
|
11297 |
+
"logps/rejected": -291.36395263671875,
|
11298 |
+
"loss": 0.366,
|
11299 |
+
"rewards/accuracies": 0.90625,
|
11300 |
+
"rewards/chosen": -1.0852869749069214,
|
11301 |
+
"rewards/margins": 2.006357431411743,
|
11302 |
+
"rewards/rejected": -3.091644287109375,
|
11303 |
+
"step": 1476
|
11304 |
+
},
|
11305 |
+
{
|
11306 |
+
"epoch": 1.704851849181746,
|
11307 |
+
"grad_norm": 38.8308365926166,
|
11308 |
+
"learning_rate": 1.171729336422661e-08,
|
11309 |
+
"logits/chosen": -1.3179391622543335,
|
11310 |
+
"logits/rejected": -1.2845818996429443,
|
11311 |
+
"logps/chosen": -148.53089904785156,
|
11312 |
+
"logps/rejected": -196.87857055664062,
|
11313 |
+
"loss": 0.3507,
|
11314 |
+
"rewards/accuracies": 0.9375,
|
11315 |
+
"rewards/chosen": -0.8545979261398315,
|
11316 |
+
"rewards/margins": 1.5212167501449585,
|
11317 |
+
"rewards/rejected": -2.375814437866211,
|
11318 |
+
"step": 1478
|
11319 |
+
},
|
11320 |
+
{
|
11321 |
+
"epoch": 1.7071588205608825,
|
11322 |
+
"grad_norm": 46.3266972725699,
|
11323 |
+
"learning_rate": 1.153711471971448e-08,
|
11324 |
+
"logits/chosen": -1.3267916440963745,
|
11325 |
+
"logits/rejected": -1.3675000667572021,
|
11326 |
+
"logps/chosen": -223.65921020507812,
|
11327 |
+
"logps/rejected": -267.6008605957031,
|
11328 |
+
"loss": 0.332,
|
11329 |
+
"rewards/accuracies": 0.875,
|
11330 |
+
"rewards/chosen": -1.1026298999786377,
|
11331 |
+
"rewards/margins": 1.4217543601989746,
|
11332 |
+
"rewards/rejected": -2.5243842601776123,
|
11333 |
+
"step": 1480
|
11334 |
+
},
|
11335 |
+
{
|
11336 |
+
"epoch": 1.7094657919400187,
|
11337 |
+
"grad_norm": 42.30797369115846,
|
11338 |
+
"learning_rate": 1.135824730856726e-08,
|
11339 |
+
"logits/chosen": -1.4186185598373413,
|
11340 |
+
"logits/rejected": -1.37624990940094,
|
11341 |
+
"logps/chosen": -180.34097290039062,
|
11342 |
+
"logps/rejected": -212.8303985595703,
|
11343 |
+
"loss": 0.3619,
|
11344 |
+
"rewards/accuracies": 1.0,
|
11345 |
+
"rewards/chosen": -0.7309121489524841,
|
11346 |
+
"rewards/margins": 1.391485333442688,
|
11347 |
+
"rewards/rejected": -2.1223974227905273,
|
11348 |
+
"step": 1482
|
11349 |
+
},
|
11350 |
+
{
|
11351 |
+
"epoch": 1.711772763319155,
|
11352 |
+
"grad_norm": 42.86481474468262,
|
11353 |
+
"learning_rate": 1.1180693782031514e-08,
|
11354 |
+
"logits/chosen": -1.4671962261199951,
|
11355 |
+
"logits/rejected": -1.362795352935791,
|
11356 |
+
"logps/chosen": -227.28549194335938,
|
11357 |
+
"logps/rejected": -223.4291229248047,
|
11358 |
+
"loss": 0.3623,
|
11359 |
+
"rewards/accuracies": 0.8125,
|
11360 |
+
"rewards/chosen": -1.074944257736206,
|
11361 |
+
"rewards/margins": 1.3030946254730225,
|
11362 |
+
"rewards/rejected": -2.3780391216278076,
|
11363 |
+
"step": 1484
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 1.7140797346982914,
|
11367 |
+
"grad_norm": 50.32433653433989,
|
11368 |
+
"learning_rate": 1.1004456771878834e-08,
|
11369 |
+
"logits/chosen": -1.2398756742477417,
|
11370 |
+
"logits/rejected": -1.259413242340088,
|
11371 |
+
"logps/chosen": -166.67294311523438,
|
11372 |
+
"logps/rejected": -197.03199768066406,
|
11373 |
+
"loss": 0.3835,
|
11374 |
+
"rewards/accuracies": 0.8125,
|
11375 |
+
"rewards/chosen": -1.0385042428970337,
|
11376 |
+
"rewards/margins": 1.2296409606933594,
|
11377 |
+
"rewards/rejected": -2.2681450843811035,
|
11378 |
+
"step": 1486
|
11379 |
+
},
|
11380 |
+
{
|
11381 |
+
"epoch": 1.7163867060774276,
|
11382 |
+
"grad_norm": 44.51587268033867,
|
11383 |
+
"learning_rate": 1.0829538890366863e-08,
|
11384 |
+
"logits/chosen": -1.339663028717041,
|
11385 |
+
"logits/rejected": -1.4015851020812988,
|
11386 |
+
"logps/chosen": -171.54469299316406,
|
11387 |
+
"logps/rejected": -203.06884765625,
|
11388 |
+
"loss": 0.3534,
|
11389 |
+
"rewards/accuracies": 0.90625,
|
11390 |
+
"rewards/chosen": -0.7753598093986511,
|
11391 |
+
"rewards/margins": 1.3360155820846558,
|
11392 |
+
"rewards/rejected": -2.111375331878662,
|
11393 |
+
"step": 1488
|
11394 |
+
},
|
11395 |
+
{
|
11396 |
+
"epoch": 1.7186936774565642,
|
11397 |
+
"grad_norm": 49.445893229920515,
|
11398 |
+
"learning_rate": 1.065594273020055e-08,
|
11399 |
+
"logits/chosen": -1.3083471059799194,
|
11400 |
+
"logits/rejected": -1.340012550354004,
|
11401 |
+
"logps/chosen": -202.39198303222656,
|
11402 |
+
"logps/rejected": -247.21881103515625,
|
11403 |
+
"loss": 0.3148,
|
11404 |
+
"rewards/accuracies": 0.96875,
|
11405 |
+
"rewards/chosen": -0.9150687456130981,
|
11406 |
+
"rewards/margins": 1.5993800163269043,
|
11407 |
+
"rewards/rejected": -2.514448881149292,
|
11408 |
+
"step": 1490
|
11409 |
+
},
|
11410 |
+
{
|
11411 |
+
"epoch": 1.7210006488357004,
|
11412 |
+
"grad_norm": 55.13220155509763,
|
11413 |
+
"learning_rate": 1.0483670864493776e-08,
|
11414 |
+
"logits/chosen": -1.4246532917022705,
|
11415 |
+
"logits/rejected": -1.3996690511703491,
|
11416 |
+
"logps/chosen": -177.84730529785156,
|
11417 |
+
"logps/rejected": -263.4200744628906,
|
11418 |
+
"loss": 0.3937,
|
11419 |
+
"rewards/accuracies": 0.75,
|
11420 |
+
"rewards/chosen": -1.3569920063018799,
|
11421 |
+
"rewards/margins": 1.5898300409317017,
|
11422 |
+
"rewards/rejected": -2.946821928024292,
|
11423 |
+
"step": 1492
|
11424 |
+
},
|
11425 |
+
{
|
11426 |
+
"epoch": 1.7233076202148367,
|
11427 |
+
"grad_norm": 52.87877048383229,
|
11428 |
+
"learning_rate": 1.0312725846731175e-08,
|
11429 |
+
"logits/chosen": -1.535531997680664,
|
11430 |
+
"logits/rejected": -1.5023539066314697,
|
11431 |
+
"logps/chosen": -204.4673614501953,
|
11432 |
+
"logps/rejected": -205.1877899169922,
|
11433 |
+
"loss": 0.4313,
|
11434 |
+
"rewards/accuracies": 0.90625,
|
11435 |
+
"rewards/chosen": -0.8635197877883911,
|
11436 |
+
"rewards/margins": 1.0750128030776978,
|
11437 |
+
"rewards/rejected": -1.9385325908660889,
|
11438 |
+
"step": 1494
|
11439 |
+
},
|
11440 |
+
{
|
11441 |
+
"epoch": 1.7256145915939731,
|
11442 |
+
"grad_norm": 58.012698038087876,
|
11443 |
+
"learning_rate": 1.014311021073031e-08,
|
11444 |
+
"logits/chosen": -1.3397972583770752,
|
11445 |
+
"logits/rejected": -1.3506940603256226,
|
11446 |
+
"logps/chosen": -167.1254425048828,
|
11447 |
+
"logps/rejected": -176.68478393554688,
|
11448 |
+
"loss": 0.4236,
|
11449 |
+
"rewards/accuracies": 0.6875,
|
11450 |
+
"rewards/chosen": -1.0245790481567383,
|
11451 |
+
"rewards/margins": 0.8268385529518127,
|
11452 |
+
"rewards/rejected": -1.8514174222946167,
|
11453 |
+
"step": 1496
|
11454 |
+
},
|
11455 |
+
{
|
11456 |
+
"epoch": 1.7279215629731093,
|
11457 |
+
"grad_norm": 45.63432516497851,
|
11458 |
+
"learning_rate": 9.974826470604047e-09,
|
11459 |
+
"logits/chosen": -1.4238135814666748,
|
11460 |
+
"logits/rejected": -1.4215826988220215,
|
11461 |
+
"logps/chosen": -200.67486572265625,
|
11462 |
+
"logps/rejected": -243.21206665039062,
|
11463 |
+
"loss": 0.312,
|
11464 |
+
"rewards/accuracies": 0.875,
|
11465 |
+
"rewards/chosen": -0.8548165559768677,
|
11466 |
+
"rewards/margins": 1.6108603477478027,
|
11467 |
+
"rewards/rejected": -2.465676784515381,
|
11468 |
+
"step": 1498
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 1.7302285343522457,
|
11472 |
+
"grad_norm": 45.67289683342217,
|
11473 |
+
"learning_rate": 9.807877120723395e-09,
|
11474 |
+
"logits/chosen": -1.3849635124206543,
|
11475 |
+
"logits/rejected": -1.4313040971755981,
|
11476 |
+
"logps/chosen": -172.61903381347656,
|
11477 |
+
"logps/rejected": -212.602783203125,
|
11478 |
+
"loss": 0.3868,
|
11479 |
+
"rewards/accuracies": 0.9375,
|
11480 |
+
"rewards/chosen": -0.7782556414604187,
|
11481 |
+
"rewards/margins": 1.354672908782959,
|
11482 |
+
"rewards/rejected": -2.1329286098480225,
|
11483 |
+
"step": 1500
|
11484 |
+
},
|
11485 |
+
{
|
11486 |
+
"epoch": 1.7302285343522457,
|
11487 |
+
"eval_logits/chosen": -1.342780351638794,
|
11488 |
+
"eval_logits/rejected": -1.2655624151229858,
|
11489 |
+
"eval_logps/chosen": -198.90330505371094,
|
11490 |
+
"eval_logps/rejected": -171.5903778076172,
|
11491 |
+
"eval_loss": 0.5269267559051514,
|
11492 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
11493 |
+
"eval_rewards/chosen": -1.3721123933792114,
|
11494 |
+
"eval_rewards/margins": 0.7353845834732056,
|
11495 |
+
"eval_rewards/rejected": -2.107496976852417,
|
11496 |
+
"eval_runtime": 23.0274,
|
11497 |
+
"eval_samples_per_second": 4.343,
|
11498 |
+
"eval_steps_per_second": 1.086,
|
11499 |
+
"step": 1500
|
11500 |
}
|
11501 |
],
|
11502 |
"logging_steps": 2,
|