Training in progress, step 1752, checkpoint
Browse files- last-checkpoint/global_step1752/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1752/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1752/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1752/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1752/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1752/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1752/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1752/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1925 -3
last-checkpoint/global_step1752/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38c93c1a87f13462b5e0a5655712f0b5c4c012fffd291dcc75382e0879b36d9e
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1752/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab31a681deaa44cca1b5ed9bc3093e52b27f87ad098476a6f765ed1702235565
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1752/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db1c7f5fed1fb99aea9d6c75635912272ce175d4bebe42e8ad47b5e85730f8f5
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1752/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f69a4fa888baac249d427aa2ac2dc127a91fd0f7bee11e1b0be999f15e4438b7
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1752/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c7d2aa6a6d4c0a30e10c39ed299af9b8aef9d66b6ff8d7a84c0a25a45701cd9
|
3 |
+
size 150693
|
last-checkpoint/global_step1752/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bc324cb078fc2735eda0f2ec2e2afdefed8e49fbf43a1c59c83521b42cc9657
|
3 |
+
size 150693
|
last-checkpoint/global_step1752/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37f1bc8bdd8263b75afd7b99b3c7ae1cf493a40191c5c0ed770efb3243c0528c
|
3 |
+
size 150693
|
last-checkpoint/global_step1752/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0aef5cf0e0dc06562d25766e6f1e03bd601dbe432d4bbd0b9eb3fbc4b6fdac0
|
3 |
+
size 150693
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1752
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09a78ea9ea6e0e17f21e9463d421205fee33f06a038f692cdbc15cc5da5406e6
|
3 |
size 4976698672
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67ebff0a52efc66dec3162b74b95953b162147486a84d2998d089213d5696860
|
3 |
size 4999802720
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9fbac7a8745fc91c66963c503ba1c883ddde4758539b7301fb550afd9e30274
|
3 |
size 4915916176
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:254f93f00f3d0d8c2a810b3561ea09850b051a412761bbe113d75573d3f92b1b
|
3 |
size 1168138808
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8f22ced19e790cc864cefe3b7c711d9ae631c44f95d42fb4829688cc3de0153
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e0407513eba77d34cbf3adf0e59a58bd80716f4f00f414854253637e82be43d
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6060636c023258ce9b965e244b8a58b4c99d5784dde4405b39737550ef50cd4f
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c24ccdfdcde39cb2265c82c50c36ffdfcc670f757aba4bcf4bb0fdc6d1373c4c
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2800f1191a8da3fdb8a578f3e45335b90e0bd680c7897d41e35ad73896db01cd
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11497,6 +11497,1928 @@
|
|
11497 |
"eval_samples_per_second": 4.648,
|
11498 |
"eval_steps_per_second": 1.162,
|
11499 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11500 |
}
|
11501 |
],
|
11502 |
"logging_steps": 2,
|
@@ -11511,7 +13433,7 @@
|
|
11511 |
"should_evaluate": false,
|
11512 |
"should_log": false,
|
11513 |
"should_save": true,
|
11514 |
-
"should_training_stop":
|
11515 |
},
|
11516 |
"attributes": {}
|
11517 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9978618772717553,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1752,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11497 |
"eval_samples_per_second": 4.648,
|
11498 |
"eval_steps_per_second": 1.162,
|
11499 |
"step": 1500
|
11500 |
+
},
|
11501 |
+
{
|
11502 |
+
"epoch": 1.712778846839142,
|
11503 |
+
"grad_norm": 59.337175090307106,
|
11504 |
+
"learning_rate": 1.1090063682116202e-08,
|
11505 |
+
"logits/chosen": -1.2650439739227295,
|
11506 |
+
"logits/rejected": -1.3502650260925293,
|
11507 |
+
"logps/chosen": -183.25546264648438,
|
11508 |
+
"logps/rejected": -224.42698669433594,
|
11509 |
+
"loss": 0.4722,
|
11510 |
+
"rewards/accuracies": 0.6875,
|
11511 |
+
"rewards/chosen": -0.49561411142349243,
|
11512 |
+
"rewards/margins": 1.1255251169204712,
|
11513 |
+
"rewards/rejected": -1.6211390495300293,
|
11514 |
+
"step": 1502
|
11515 |
+
},
|
11516 |
+
{
|
11517 |
+
"epoch": 1.7150595110826028,
|
11518 |
+
"grad_norm": 65.4693723853712,
|
11519 |
+
"learning_rate": 1.091662101338714e-08,
|
11520 |
+
"logits/chosen": -1.198454737663269,
|
11521 |
+
"logits/rejected": -1.1747905015945435,
|
11522 |
+
"logps/chosen": -176.22344970703125,
|
11523 |
+
"logps/rejected": -209.0,
|
11524 |
+
"loss": 0.428,
|
11525 |
+
"rewards/accuracies": 0.875,
|
11526 |
+
"rewards/chosen": -0.4550539553165436,
|
11527 |
+
"rewards/margins": 1.0402750968933105,
|
11528 |
+
"rewards/rejected": -1.4953290224075317,
|
11529 |
+
"step": 1504
|
11530 |
+
},
|
11531 |
+
{
|
11532 |
+
"epoch": 1.7173401753260638,
|
11533 |
+
"grad_norm": 56.55232069175886,
|
11534 |
+
"learning_rate": 1.0744466997123425e-08,
|
11535 |
+
"logits/chosen": -1.131535291671753,
|
11536 |
+
"logits/rejected": -1.2650810480117798,
|
11537 |
+
"logps/chosen": -149.54214477539062,
|
11538 |
+
"logps/rejected": -207.89288330078125,
|
11539 |
+
"loss": 0.4043,
|
11540 |
+
"rewards/accuracies": 0.78125,
|
11541 |
+
"rewards/chosen": -0.31441810727119446,
|
11542 |
+
"rewards/margins": 1.342848777770996,
|
11543 |
+
"rewards/rejected": -1.6572668552398682,
|
11544 |
+
"step": 1506
|
11545 |
+
},
|
11546 |
+
{
|
11547 |
+
"epoch": 1.7196208395695245,
|
11548 |
+
"grad_norm": 51.8349753003821,
|
11549 |
+
"learning_rate": 1.0573604123651503e-08,
|
11550 |
+
"logits/chosen": -1.3305891752243042,
|
11551 |
+
"logits/rejected": -1.357151985168457,
|
11552 |
+
"logps/chosen": -193.94952392578125,
|
11553 |
+
"logps/rejected": -216.302490234375,
|
11554 |
+
"loss": 0.3987,
|
11555 |
+
"rewards/accuracies": 0.875,
|
11556 |
+
"rewards/chosen": -0.3934152126312256,
|
11557 |
+
"rewards/margins": 1.1823620796203613,
|
11558 |
+
"rewards/rejected": -1.575777292251587,
|
11559 |
+
"step": 1508
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 1.7219015038129855,
|
11563 |
+
"grad_norm": 63.60872727771506,
|
11564 |
+
"learning_rate": 1.0404034864620603e-08,
|
11565 |
+
"logits/chosen": -1.174712061882019,
|
11566 |
+
"logits/rejected": -1.2022875547409058,
|
11567 |
+
"logps/chosen": -148.9698486328125,
|
11568 |
+
"logps/rejected": -179.8749542236328,
|
11569 |
+
"loss": 0.4382,
|
11570 |
+
"rewards/accuracies": 0.9375,
|
11571 |
+
"rewards/chosen": -0.2437065690755844,
|
11572 |
+
"rewards/margins": 1.0777262449264526,
|
11573 |
+
"rewards/rejected": -1.3214329481124878,
|
11574 |
+
"step": 1510
|
11575 |
+
},
|
11576 |
+
{
|
11577 |
+
"epoch": 1.7241821680564464,
|
11578 |
+
"grad_norm": 56.263603035763026,
|
11579 |
+
"learning_rate": 1.0235761672966902e-08,
|
11580 |
+
"logits/chosen": -1.2788983583450317,
|
11581 |
+
"logits/rejected": -1.2880148887634277,
|
11582 |
+
"logps/chosen": -158.33840942382812,
|
11583 |
+
"logps/rejected": -187.15737915039062,
|
11584 |
+
"loss": 0.397,
|
11585 |
+
"rewards/accuracies": 0.9375,
|
11586 |
+
"rewards/chosen": -0.34705379605293274,
|
11587 |
+
"rewards/margins": 1.0756170749664307,
|
11588 |
+
"rewards/rejected": -1.422670841217041,
|
11589 |
+
"step": 1512
|
11590 |
+
},
|
11591 |
+
{
|
11592 |
+
"epoch": 1.7264628322999074,
|
11593 |
+
"grad_norm": 54.16757407624544,
|
11594 |
+
"learning_rate": 1.0068786982878087e-08,
|
11595 |
+
"logits/chosen": -1.3718278408050537,
|
11596 |
+
"logits/rejected": -1.337512731552124,
|
11597 |
+
"logps/chosen": -133.3784942626953,
|
11598 |
+
"logps/rejected": -142.33865356445312,
|
11599 |
+
"loss": 0.3983,
|
11600 |
+
"rewards/accuracies": 0.84375,
|
11601 |
+
"rewards/chosen": -0.2702075242996216,
|
11602 |
+
"rewards/margins": 0.9396376013755798,
|
11603 |
+
"rewards/rejected": -1.2098451852798462,
|
11604 |
+
"step": 1514
|
11605 |
+
},
|
11606 |
+
{
|
11607 |
+
"epoch": 1.7287434965433683,
|
11608 |
+
"grad_norm": 65.43992152706379,
|
11609 |
+
"learning_rate": 9.903113209758097e-09,
|
11610 |
+
"logits/chosen": -1.174783706665039,
|
11611 |
+
"logits/rejected": -1.1753756999969482,
|
11612 |
+
"logps/chosen": -141.90370178222656,
|
11613 |
+
"logps/rejected": -165.7357940673828,
|
11614 |
+
"loss": 0.3958,
|
11615 |
+
"rewards/accuracies": 0.84375,
|
11616 |
+
"rewards/chosen": -0.39702969789505005,
|
11617 |
+
"rewards/margins": 1.0974403619766235,
|
11618 |
+
"rewards/rejected": -1.494470238685608,
|
11619 |
+
"step": 1516
|
11620 |
+
},
|
11621 |
+
{
|
11622 |
+
"epoch": 1.7310241607868293,
|
11623 |
+
"grad_norm": 53.07155795474272,
|
11624 |
+
"learning_rate": 9.73874275019223e-09,
|
11625 |
+
"logits/chosen": -1.2876778841018677,
|
11626 |
+
"logits/rejected": -1.2978581190109253,
|
11627 |
+
"logps/chosen": -172.82862854003906,
|
11628 |
+
"logps/rejected": -199.86740112304688,
|
11629 |
+
"loss": 0.4196,
|
11630 |
+
"rewards/accuracies": 0.84375,
|
11631 |
+
"rewards/chosen": -0.360828161239624,
|
11632 |
+
"rewards/margins": 0.7934844493865967,
|
11633 |
+
"rewards/rejected": -1.1543126106262207,
|
11634 |
+
"step": 1518
|
11635 |
+
},
|
11636 |
+
{
|
11637 |
+
"epoch": 1.73330482503029,
|
11638 |
+
"grad_norm": 56.60655829896917,
|
11639 |
+
"learning_rate": 9.575677981912456e-09,
|
11640 |
+
"logits/chosen": -1.3419064283370972,
|
11641 |
+
"logits/rejected": -1.4197977781295776,
|
11642 |
+
"logps/chosen": -215.0706787109375,
|
11643 |
+
"logps/rejected": -234.6818084716797,
|
11644 |
+
"loss": 0.4216,
|
11645 |
+
"rewards/accuracies": 0.875,
|
11646 |
+
"rewards/chosen": -0.36836767196655273,
|
11647 |
+
"rewards/margins": 0.9383161067962646,
|
11648 |
+
"rewards/rejected": -1.3066837787628174,
|
11649 |
+
"step": 1520
|
11650 |
+
},
|
11651 |
+
{
|
11652 |
+
"epoch": 1.735585489273751,
|
11653 |
+
"grad_norm": 60.4365358347784,
|
11654 |
+
"learning_rate": 9.413921263763024e-09,
|
11655 |
+
"logits/chosen": -1.1901061534881592,
|
11656 |
+
"logits/rejected": -1.1732714176177979,
|
11657 |
+
"logps/chosen": -148.09774780273438,
|
11658 |
+
"logps/rejected": -138.72329711914062,
|
11659 |
+
"loss": 0.4147,
|
11660 |
+
"rewards/accuracies": 0.84375,
|
11661 |
+
"rewards/chosen": -0.24259831011295319,
|
11662 |
+
"rewards/margins": 0.738941490650177,
|
11663 |
+
"rewards/rejected": -0.9815397262573242,
|
11664 |
+
"step": 1522
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 1.7378661535172117,
|
11668 |
+
"grad_norm": 61.21131572090355,
|
11669 |
+
"learning_rate": 9.253474935666338e-09,
|
11670 |
+
"logits/chosen": -1.319941520690918,
|
11671 |
+
"logits/rejected": -1.32881498336792,
|
11672 |
+
"logps/chosen": -186.58230590820312,
|
11673 |
+
"logps/rejected": -211.57525634765625,
|
11674 |
+
"loss": 0.5162,
|
11675 |
+
"rewards/accuracies": 0.84375,
|
11676 |
+
"rewards/chosen": -0.4148866832256317,
|
11677 |
+
"rewards/margins": 1.0180983543395996,
|
11678 |
+
"rewards/rejected": -1.4329850673675537,
|
11679 |
+
"step": 1524
|
11680 |
+
},
|
11681 |
+
{
|
11682 |
+
"epoch": 1.7401468177606727,
|
11683 |
+
"grad_norm": 55.57275725271543,
|
11684 |
+
"learning_rate": 9.094341318589071e-09,
|
11685 |
+
"logits/chosen": -1.1223199367523193,
|
11686 |
+
"logits/rejected": -1.2205249071121216,
|
11687 |
+
"logps/chosen": -123.37957763671875,
|
11688 |
+
"logps/rejected": -155.87530517578125,
|
11689 |
+
"loss": 0.4639,
|
11690 |
+
"rewards/accuracies": 0.875,
|
11691 |
+
"rewards/chosen": -0.4156235158443451,
|
11692 |
+
"rewards/margins": 0.8936042785644531,
|
11693 |
+
"rewards/rejected": -1.309227705001831,
|
11694 |
+
"step": 1526
|
11695 |
+
},
|
11696 |
+
{
|
11697 |
+
"epoch": 1.7424274820041337,
|
11698 |
+
"grad_norm": 75.32187086371678,
|
11699 |
+
"learning_rate": 8.936522714508676e-09,
|
11700 |
+
"logits/chosen": -1.1820390224456787,
|
11701 |
+
"logits/rejected": -1.227403163909912,
|
11702 |
+
"logps/chosen": -240.14578247070312,
|
11703 |
+
"logps/rejected": -284.03839111328125,
|
11704 |
+
"loss": 0.4268,
|
11705 |
+
"rewards/accuracies": 0.71875,
|
11706 |
+
"rewards/chosen": -0.9300968647003174,
|
11707 |
+
"rewards/margins": 1.1027811765670776,
|
11708 |
+
"rewards/rejected": -2.0328779220581055,
|
11709 |
+
"step": 1528
|
11710 |
+
},
|
11711 |
+
{
|
11712 |
+
"epoch": 1.7447081462475946,
|
11713 |
+
"grad_norm": 56.43871330540775,
|
11714 |
+
"learning_rate": 8.780021406380011e-09,
|
11715 |
+
"logits/chosen": -1.1268565654754639,
|
11716 |
+
"logits/rejected": -1.2307937145233154,
|
11717 |
+
"logps/chosen": -176.69204711914062,
|
11718 |
+
"logps/rejected": -203.42315673828125,
|
11719 |
+
"loss": 0.433,
|
11720 |
+
"rewards/accuracies": 0.84375,
|
11721 |
+
"rewards/chosen": -0.7230830788612366,
|
11722 |
+
"rewards/margins": 1.0807398557662964,
|
11723 |
+
"rewards/rejected": -1.8038227558135986,
|
11724 |
+
"step": 1530
|
11725 |
+
},
|
11726 |
+
{
|
11727 |
+
"epoch": 1.7469888104910556,
|
11728 |
+
"grad_norm": 53.44401452377932,
|
11729 |
+
"learning_rate": 8.624839658102345e-09,
|
11730 |
+
"logits/chosen": -1.172848105430603,
|
11731 |
+
"logits/rejected": -1.2623870372772217,
|
11732 |
+
"logps/chosen": -172.2826385498047,
|
11733 |
+
"logps/rejected": -218.0788116455078,
|
11734 |
+
"loss": 0.4167,
|
11735 |
+
"rewards/accuracies": 0.9375,
|
11736 |
+
"rewards/chosen": -0.4317859411239624,
|
11737 |
+
"rewards/margins": 1.20097017288208,
|
11738 |
+
"rewards/rejected": -1.632756233215332,
|
11739 |
+
"step": 1532
|
11740 |
+
},
|
11741 |
+
{
|
11742 |
+
"epoch": 1.7492694747345165,
|
11743 |
+
"grad_norm": 44.672996526234876,
|
11744 |
+
"learning_rate": 8.470979714486626e-09,
|
11745 |
+
"logits/chosen": -1.2581678628921509,
|
11746 |
+
"logits/rejected": -1.3436583280563354,
|
11747 |
+
"logps/chosen": -164.71560668945312,
|
11748 |
+
"logps/rejected": -197.1131134033203,
|
11749 |
+
"loss": 0.3891,
|
11750 |
+
"rewards/accuracies": 0.84375,
|
11751 |
+
"rewards/chosen": -0.48163390159606934,
|
11752 |
+
"rewards/margins": 1.20646333694458,
|
11753 |
+
"rewards/rejected": -1.688097357749939,
|
11754 |
+
"step": 1534
|
11755 |
+
},
|
11756 |
+
{
|
11757 |
+
"epoch": 1.7515501389779773,
|
11758 |
+
"grad_norm": 59.7183121930943,
|
11759 |
+
"learning_rate": 8.318443801222918e-09,
|
11760 |
+
"logits/chosen": -1.1442248821258545,
|
11761 |
+
"logits/rejected": -1.2501918077468872,
|
11762 |
+
"logps/chosen": -155.9255828857422,
|
11763 |
+
"logps/rejected": -185.97073364257812,
|
11764 |
+
"loss": 0.4332,
|
11765 |
+
"rewards/accuracies": 0.875,
|
11766 |
+
"rewards/chosen": -0.4522356390953064,
|
11767 |
+
"rewards/margins": 0.7766329050064087,
|
11768 |
+
"rewards/rejected": -1.2288686037063599,
|
11769 |
+
"step": 1536
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 1.7538308032214383,
|
11773 |
+
"grad_norm": 52.4727599794761,
|
11774 |
+
"learning_rate": 8.167234124848343e-09,
|
11775 |
+
"logits/chosen": -1.2721006870269775,
|
11776 |
+
"logits/rejected": -1.3032159805297852,
|
11777 |
+
"logps/chosen": -213.73348999023438,
|
11778 |
+
"logps/rejected": -231.73304748535156,
|
11779 |
+
"loss": 0.4188,
|
11780 |
+
"rewards/accuracies": 0.875,
|
11781 |
+
"rewards/chosen": -0.38457486033439636,
|
11782 |
+
"rewards/margins": 1.214958667755127,
|
11783 |
+
"rewards/rejected": -1.5995336771011353,
|
11784 |
+
"step": 1538
|
11785 |
+
},
|
11786 |
+
{
|
11787 |
+
"epoch": 1.756111467464899,
|
11788 |
+
"grad_norm": 56.28899819514317,
|
11789 |
+
"learning_rate": 8.017352872715077e-09,
|
11790 |
+
"logits/chosen": -1.246867299079895,
|
11791 |
+
"logits/rejected": -1.321312665939331,
|
11792 |
+
"logps/chosen": -166.28347778320312,
|
11793 |
+
"logps/rejected": -189.37855529785156,
|
11794 |
+
"loss": 0.3864,
|
11795 |
+
"rewards/accuracies": 0.9375,
|
11796 |
+
"rewards/chosen": -0.6479480266571045,
|
11797 |
+
"rewards/margins": 0.916532039642334,
|
11798 |
+
"rewards/rejected": -1.5644800662994385,
|
11799 |
+
"step": 1540
|
11800 |
+
},
|
11801 |
+
{
|
11802 |
+
"epoch": 1.75839213170836,
|
11803 |
+
"grad_norm": 65.96019718362385,
|
11804 |
+
"learning_rate": 7.868802212958703e-09,
|
11805 |
+
"logits/chosen": -1.2408630847930908,
|
11806 |
+
"logits/rejected": -1.3144943714141846,
|
11807 |
+
"logps/chosen": -148.76805114746094,
|
11808 |
+
"logps/rejected": -169.2152099609375,
|
11809 |
+
"loss": 0.4679,
|
11810 |
+
"rewards/accuracies": 0.8125,
|
11811 |
+
"rewards/chosen": -0.313798189163208,
|
11812 |
+
"rewards/margins": 0.8195222020149231,
|
11813 |
+
"rewards/rejected": -1.1333203315734863,
|
11814 |
+
"step": 1542
|
11815 |
+
},
|
11816 |
+
{
|
11817 |
+
"epoch": 1.760672795951821,
|
11818 |
+
"grad_norm": 67.11700647965242,
|
11819 |
+
"learning_rate": 7.721584294466899e-09,
|
11820 |
+
"logits/chosen": -1.3456957340240479,
|
11821 |
+
"logits/rejected": -1.3798415660858154,
|
11822 |
+
"logps/chosen": -180.2974090576172,
|
11823 |
+
"logps/rejected": -203.2277374267578,
|
11824 |
+
"loss": 0.4066,
|
11825 |
+
"rewards/accuracies": 0.78125,
|
11826 |
+
"rewards/chosen": -0.5308500528335571,
|
11827 |
+
"rewards/margins": 0.968846321105957,
|
11828 |
+
"rewards/rejected": -1.4996963739395142,
|
11829 |
+
"step": 1544
|
11830 |
+
},
|
11831 |
+
{
|
11832 |
+
"epoch": 1.7629534601952819,
|
11833 |
+
"grad_norm": 59.467637180664155,
|
11834 |
+
"learning_rate": 7.575701246848299e-09,
|
11835 |
+
"logits/chosen": -1.2550122737884521,
|
11836 |
+
"logits/rejected": -1.4017306566238403,
|
11837 |
+
"logps/chosen": -176.01060485839844,
|
11838 |
+
"logps/rejected": -221.2160186767578,
|
11839 |
+
"loss": 0.4151,
|
11840 |
+
"rewards/accuracies": 0.84375,
|
11841 |
+
"rewards/chosen": -0.6037588715553284,
|
11842 |
+
"rewards/margins": 1.1545339822769165,
|
11843 |
+
"rewards/rejected": -1.7582929134368896,
|
11844 |
+
"step": 1546
|
11845 |
+
},
|
11846 |
+
{
|
11847 |
+
"epoch": 1.7652341244387428,
|
11848 |
+
"grad_norm": 59.34463361306739,
|
11849 |
+
"learning_rate": 7.431155180401704e-09,
|
11850 |
+
"logits/chosen": -1.1449761390686035,
|
11851 |
+
"logits/rejected": -1.2112215757369995,
|
11852 |
+
"logps/chosen": -175.2537384033203,
|
11853 |
+
"logps/rejected": -213.36187744140625,
|
11854 |
+
"loss": 0.3787,
|
11855 |
+
"rewards/accuracies": 0.90625,
|
11856 |
+
"rewards/chosen": -0.547699511051178,
|
11857 |
+
"rewards/margins": 1.1472948789596558,
|
11858 |
+
"rewards/rejected": -1.694994568824768,
|
11859 |
+
"step": 1548
|
11860 |
+
},
|
11861 |
+
{
|
11862 |
+
"epoch": 1.7675147886822038,
|
11863 |
+
"grad_norm": 107.3697379891581,
|
11864 |
+
"learning_rate": 7.287948186085613e-09,
|
11865 |
+
"logits/chosen": -1.1891409158706665,
|
11866 |
+
"logits/rejected": -1.2180662155151367,
|
11867 |
+
"logps/chosen": -101.96951293945312,
|
11868 |
+
"logps/rejected": -131.66488647460938,
|
11869 |
+
"loss": 0.5419,
|
11870 |
+
"rewards/accuracies": 0.90625,
|
11871 |
+
"rewards/chosen": -0.35958331823349,
|
11872 |
+
"rewards/margins": 0.8678293228149414,
|
11873 |
+
"rewards/rejected": -1.2274125814437866,
|
11874 |
+
"step": 1550
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 1.7697954529256648,
|
11878 |
+
"grad_norm": 58.46793417479867,
|
11879 |
+
"learning_rate": 7.146082335487824e-09,
|
11880 |
+
"logits/chosen": -1.2643113136291504,
|
11881 |
+
"logits/rejected": -1.2983956336975098,
|
11882 |
+
"logps/chosen": -184.83563232421875,
|
11883 |
+
"logps/rejected": -210.6478729248047,
|
11884 |
+
"loss": 0.417,
|
11885 |
+
"rewards/accuracies": 0.75,
|
11886 |
+
"rewards/chosen": -0.5568149089813232,
|
11887 |
+
"rewards/margins": 0.8875846862792969,
|
11888 |
+
"rewards/rejected": -1.4443995952606201,
|
11889 |
+
"step": 1552
|
11890 |
+
},
|
11891 |
+
{
|
11892 |
+
"epoch": 1.7720761171691255,
|
11893 |
+
"grad_norm": 68.29885233859635,
|
11894 |
+
"learning_rate": 7.005559680795658e-09,
|
11895 |
+
"logits/chosen": -1.255906105041504,
|
11896 |
+
"logits/rejected": -1.2899165153503418,
|
11897 |
+
"logps/chosen": -123.41840362548828,
|
11898 |
+
"logps/rejected": -138.2176055908203,
|
11899 |
+
"loss": 0.4641,
|
11900 |
+
"rewards/accuracies": 0.84375,
|
11901 |
+
"rewards/chosen": -0.31046992540359497,
|
11902 |
+
"rewards/margins": 0.6126350164413452,
|
11903 |
+
"rewards/rejected": -0.9231049418449402,
|
11904 |
+
"step": 1554
|
11905 |
+
},
|
11906 |
+
{
|
11907 |
+
"epoch": 1.7743567814125865,
|
11908 |
+
"grad_norm": 54.33302324585405,
|
11909 |
+
"learning_rate": 6.866382254766156e-09,
|
11910 |
+
"logits/chosen": -1.330174207687378,
|
11911 |
+
"logits/rejected": -1.363855242729187,
|
11912 |
+
"logps/chosen": -196.70680236816406,
|
11913 |
+
"logps/rejected": -213.6434326171875,
|
11914 |
+
"loss": 0.4372,
|
11915 |
+
"rewards/accuracies": 0.78125,
|
11916 |
+
"rewards/chosen": -0.5205326676368713,
|
11917 |
+
"rewards/margins": 1.0018987655639648,
|
11918 |
+
"rewards/rejected": -1.5224316120147705,
|
11919 |
+
"step": 1556
|
11920 |
+
},
|
11921 |
+
{
|
11922 |
+
"epoch": 1.7766374456560472,
|
11923 |
+
"grad_norm": 66.94592762976625,
|
11924 |
+
"learning_rate": 6.7285520706966914e-09,
|
11925 |
+
"logits/chosen": -1.3225196599960327,
|
11926 |
+
"logits/rejected": -1.3494971990585327,
|
11927 |
+
"logps/chosen": -185.50836181640625,
|
11928 |
+
"logps/rejected": -207.73159790039062,
|
11929 |
+
"loss": 0.4265,
|
11930 |
+
"rewards/accuracies": 0.8125,
|
11931 |
+
"rewards/chosen": -0.5292472839355469,
|
11932 |
+
"rewards/margins": 0.988534152507782,
|
11933 |
+
"rewards/rejected": -1.5177814960479736,
|
11934 |
+
"step": 1558
|
11935 |
+
},
|
11936 |
+
{
|
11937 |
+
"epoch": 1.7789181098995082,
|
11938 |
+
"grad_norm": 66.14902165695068,
|
11939 |
+
"learning_rate": 6.592071122395848e-09,
|
11940 |
+
"logits/chosen": -1.2869586944580078,
|
11941 |
+
"logits/rejected": -1.2955509424209595,
|
11942 |
+
"logps/chosen": -203.05213928222656,
|
11943 |
+
"logps/rejected": -221.87176513671875,
|
11944 |
+
"loss": 0.4555,
|
11945 |
+
"rewards/accuracies": 0.90625,
|
11946 |
+
"rewards/chosen": -0.4785195589065552,
|
11947 |
+
"rewards/margins": 0.8982763886451721,
|
11948 |
+
"rewards/rejected": -1.3767958879470825,
|
11949 |
+
"step": 1560
|
11950 |
+
},
|
11951 |
+
{
|
11952 |
+
"epoch": 1.7811987741429691,
|
11953 |
+
"grad_norm": 56.82906531266912,
|
11954 |
+
"learning_rate": 6.4569413841546124e-09,
|
11955 |
+
"logits/chosen": -1.3752797842025757,
|
11956 |
+
"logits/rejected": -1.342197060585022,
|
11957 |
+
"logps/chosen": -213.2560577392578,
|
11958 |
+
"logps/rejected": -226.26138305664062,
|
11959 |
+
"loss": 0.3949,
|
11960 |
+
"rewards/accuracies": 0.78125,
|
11961 |
+
"rewards/chosen": -0.6319646835327148,
|
11962 |
+
"rewards/margins": 0.7403106093406677,
|
11963 |
+
"rewards/rejected": -1.3722753524780273,
|
11964 |
+
"step": 1562
|
11965 |
+
},
|
11966 |
+
{
|
11967 |
+
"epoch": 1.78347943838643,
|
11968 |
+
"grad_norm": 57.01107575636888,
|
11969 |
+
"learning_rate": 6.323164810717751e-09,
|
11970 |
+
"logits/chosen": -1.4058406352996826,
|
11971 |
+
"logits/rejected": -1.4284158945083618,
|
11972 |
+
"logps/chosen": -164.4686279296875,
|
11973 |
+
"logps/rejected": -175.3655548095703,
|
11974 |
+
"loss": 0.4426,
|
11975 |
+
"rewards/accuracies": 0.84375,
|
11976 |
+
"rewards/chosen": -0.2919442057609558,
|
11977 |
+
"rewards/margins": 0.8540188074111938,
|
11978 |
+
"rewards/rejected": -1.1459629535675049,
|
11979 |
+
"step": 1564
|
11980 |
+
},
|
11981 |
+
{
|
11982 |
+
"epoch": 1.785760102629891,
|
11983 |
+
"grad_norm": 53.78725622915069,
|
11984 |
+
"learning_rate": 6.1907433372555885e-09,
|
11985 |
+
"logits/chosen": -1.227329969406128,
|
11986 |
+
"logits/rejected": -1.2803211212158203,
|
11987 |
+
"logps/chosen": -215.01806640625,
|
11988 |
+
"logps/rejected": -240.11415100097656,
|
11989 |
+
"loss": 0.4856,
|
11990 |
+
"rewards/accuracies": 0.78125,
|
11991 |
+
"rewards/chosen": -0.37019410729408264,
|
11992 |
+
"rewards/margins": 1.2054839134216309,
|
11993 |
+
"rewards/rejected": -1.5756779909133911,
|
11994 |
+
"step": 1566
|
11995 |
+
},
|
11996 |
+
{
|
11997 |
+
"epoch": 1.788040766873352,
|
11998 |
+
"grad_norm": 50.06277787185665,
|
11999 |
+
"learning_rate": 6.0596788793360055e-09,
|
12000 |
+
"logits/chosen": -1.2019214630126953,
|
12001 |
+
"logits/rejected": -1.167099952697754,
|
12002 |
+
"logps/chosen": -128.18609619140625,
|
12003 |
+
"logps/rejected": -156.6576690673828,
|
12004 |
+
"loss": 0.3964,
|
12005 |
+
"rewards/accuracies": 0.84375,
|
12006 |
+
"rewards/chosen": -0.24505016207695007,
|
12007 |
+
"rewards/margins": 0.7954214811325073,
|
12008 |
+
"rewards/rejected": -1.0404715538024902,
|
12009 |
+
"step": 1568
|
12010 |
+
},
|
12011 |
+
{
|
12012 |
+
"epoch": 1.7903214311168127,
|
12013 |
+
"grad_norm": 58.820202526748965,
|
12014 |
+
"learning_rate": 5.929973332896676e-09,
|
12015 |
+
"logits/chosen": -1.295718789100647,
|
12016 |
+
"logits/rejected": -1.3617008924484253,
|
12017 |
+
"logps/chosen": -177.0591583251953,
|
12018 |
+
"logps/rejected": -229.13153076171875,
|
12019 |
+
"loss": 0.3765,
|
12020 |
+
"rewards/accuracies": 0.84375,
|
12021 |
+
"rewards/chosen": -0.3693293035030365,
|
12022 |
+
"rewards/margins": 1.0683492422103882,
|
12023 |
+
"rewards/rejected": -1.437678575515747,
|
12024 |
+
"step": 1570
|
12025 |
+
},
|
12026 |
+
{
|
12027 |
+
"epoch": 1.7926020953602737,
|
12028 |
+
"grad_norm": 69.22569134675261,
|
12029 |
+
"learning_rate": 5.801628574217732e-09,
|
12030 |
+
"logits/chosen": -1.231567621231079,
|
12031 |
+
"logits/rejected": -1.2869716882705688,
|
12032 |
+
"logps/chosen": -115.6909408569336,
|
12033 |
+
"logps/rejected": -169.88333129882812,
|
12034 |
+
"loss": 0.4175,
|
12035 |
+
"rewards/accuracies": 0.8125,
|
12036 |
+
"rewards/chosen": -0.2287057340145111,
|
12037 |
+
"rewards/margins": 0.9424973130226135,
|
12038 |
+
"rewards/rejected": -1.1712028980255127,
|
12039 |
+
"step": 1572
|
12040 |
+
},
|
12041 |
+
{
|
12042 |
+
"epoch": 1.7948827596037344,
|
12043 |
+
"grad_norm": 57.45741658297443,
|
12044 |
+
"learning_rate": 5.674646459894539e-09,
|
12045 |
+
"logits/chosen": -1.3439488410949707,
|
12046 |
+
"logits/rejected": -1.3449054956436157,
|
12047 |
+
"logps/chosen": -169.16116333007812,
|
12048 |
+
"logps/rejected": -214.2422332763672,
|
12049 |
+
"loss": 0.4044,
|
12050 |
+
"rewards/accuracies": 0.84375,
|
12051 |
+
"rewards/chosen": -0.29708918929100037,
|
12052 |
+
"rewards/margins": 1.2709224224090576,
|
12053 |
+
"rewards/rejected": -1.5680116415023804,
|
12054 |
+
"step": 1574
|
12055 |
+
},
|
12056 |
+
{
|
12057 |
+
"epoch": 1.7971634238471954,
|
12058 |
+
"grad_norm": 53.78313839896124,
|
12059 |
+
"learning_rate": 5.549028826810886e-09,
|
12060 |
+
"logits/chosen": -1.3002756834030151,
|
12061 |
+
"logits/rejected": -1.305463433265686,
|
12062 |
+
"logps/chosen": -191.0225067138672,
|
12063 |
+
"logps/rejected": -210.54293823242188,
|
12064 |
+
"loss": 0.4336,
|
12065 |
+
"rewards/accuracies": 0.90625,
|
12066 |
+
"rewards/chosen": -0.6976014971733093,
|
12067 |
+
"rewards/margins": 1.1452404260635376,
|
12068 |
+
"rewards/rejected": -1.8428419828414917,
|
12069 |
+
"step": 1576
|
12070 |
+
},
|
12071 |
+
{
|
12072 |
+
"epoch": 1.7994440880906564,
|
12073 |
+
"grad_norm": 54.73230632912025,
|
12074 |
+
"learning_rate": 5.42477749211242e-09,
|
12075 |
+
"logits/chosen": -1.1779245138168335,
|
12076 |
+
"logits/rejected": -1.1519317626953125,
|
12077 |
+
"logps/chosen": -162.1126708984375,
|
12078 |
+
"logps/rejected": -177.6374053955078,
|
12079 |
+
"loss": 0.4314,
|
12080 |
+
"rewards/accuracies": 0.875,
|
12081 |
+
"rewards/chosen": -0.23180986940860748,
|
12082 |
+
"rewards/margins": 1.1439672708511353,
|
12083 |
+
"rewards/rejected": -1.375777244567871,
|
12084 |
+
"step": 1578
|
12085 |
+
},
|
12086 |
+
{
|
12087 |
+
"epoch": 1.8017247523341173,
|
12088 |
+
"grad_norm": 72.74484543771715,
|
12089 |
+
"learning_rate": 5.301894253180295e-09,
|
12090 |
+
"logits/chosen": -1.179969310760498,
|
12091 |
+
"logits/rejected": -1.2561529874801636,
|
12092 |
+
"logps/chosen": -142.0485382080078,
|
12093 |
+
"logps/rejected": -183.1311492919922,
|
12094 |
+
"loss": 0.4323,
|
12095 |
+
"rewards/accuracies": 0.75,
|
12096 |
+
"rewards/chosen": -0.3770468533039093,
|
12097 |
+
"rewards/margins": 0.8587576150894165,
|
12098 |
+
"rewards/rejected": -1.2358046770095825,
|
12099 |
+
"step": 1580
|
12100 |
+
},
|
12101 |
+
{
|
12102 |
+
"epoch": 1.8040054165775783,
|
12103 |
+
"grad_norm": 52.220335591750064,
|
12104 |
+
"learning_rate": 5.180380887605252e-09,
|
12105 |
+
"logits/chosen": -1.292273998260498,
|
12106 |
+
"logits/rejected": -1.3601034879684448,
|
12107 |
+
"logps/chosen": -198.21466064453125,
|
12108 |
+
"logps/rejected": -246.2793731689453,
|
12109 |
+
"loss": 0.3947,
|
12110 |
+
"rewards/accuracies": 0.90625,
|
12111 |
+
"rewards/chosen": -0.4539499282836914,
|
12112 |
+
"rewards/margins": 1.7145434617996216,
|
12113 |
+
"rewards/rejected": -2.1684935092926025,
|
12114 |
+
"step": 1582
|
12115 |
+
},
|
12116 |
+
{
|
12117 |
+
"epoch": 1.8062860808210393,
|
12118 |
+
"grad_norm": 53.485899864483656,
|
12119 |
+
"learning_rate": 5.060239153161872e-09,
|
12120 |
+
"logits/chosen": -1.1770296096801758,
|
12121 |
+
"logits/rejected": -1.2744455337524414,
|
12122 |
+
"logps/chosen": -188.80868530273438,
|
12123 |
+
"logps/rejected": -251.5846405029297,
|
12124 |
+
"loss": 0.4114,
|
12125 |
+
"rewards/accuracies": 0.84375,
|
12126 |
+
"rewards/chosen": -0.5241818428039551,
|
12127 |
+
"rewards/margins": 1.110929250717163,
|
12128 |
+
"rewards/rejected": -1.6351109743118286,
|
12129 |
+
"step": 1584
|
12130 |
+
},
|
12131 |
+
{
|
12132 |
+
"epoch": 1.8085667450645,
|
12133 |
+
"grad_norm": 71.54349228655005,
|
12134 |
+
"learning_rate": 4.941470787783131e-09,
|
12135 |
+
"logits/chosen": -1.2420802116394043,
|
12136 |
+
"logits/rejected": -1.2955926656723022,
|
12137 |
+
"logps/chosen": -153.46408081054688,
|
12138 |
+
"logps/rejected": -185.44371032714844,
|
12139 |
+
"loss": 0.4323,
|
12140 |
+
"rewards/accuracies": 0.8125,
|
12141 |
+
"rewards/chosen": -0.4371834695339203,
|
12142 |
+
"rewards/margins": 1.0911630392074585,
|
12143 |
+
"rewards/rejected": -1.5283464193344116,
|
12144 |
+
"step": 1586
|
12145 |
+
},
|
12146 |
+
{
|
12147 |
+
"epoch": 1.810847409307961,
|
12148 |
+
"grad_norm": 43.02051086457159,
|
12149 |
+
"learning_rate": 4.8240775095352515e-09,
|
12150 |
+
"logits/chosen": -1.2829195261001587,
|
12151 |
+
"logits/rejected": -1.3175885677337646,
|
12152 |
+
"logps/chosen": -147.54412841796875,
|
12153 |
+
"logps/rejected": -172.52725219726562,
|
12154 |
+
"loss": 0.3904,
|
12155 |
+
"rewards/accuracies": 0.84375,
|
12156 |
+
"rewards/chosen": -0.4066880941390991,
|
12157 |
+
"rewards/margins": 0.901046633720398,
|
12158 |
+
"rewards/rejected": -1.307734727859497,
|
12159 |
+
"step": 1588
|
12160 |
+
},
|
12161 |
+
{
|
12162 |
+
"epoch": 1.8131280735514217,
|
12163 |
+
"grad_norm": 56.502783078071204,
|
12164 |
+
"learning_rate": 4.708061016592923e-09,
|
12165 |
+
"logits/chosen": -1.327852725982666,
|
12166 |
+
"logits/rejected": -1.3697575330734253,
|
12167 |
+
"logps/chosen": -178.9647216796875,
|
12168 |
+
"logps/rejected": -206.71951293945312,
|
12169 |
+
"loss": 0.4688,
|
12170 |
+
"rewards/accuracies": 0.78125,
|
12171 |
+
"rewards/chosen": -0.5031104683876038,
|
12172 |
+
"rewards/margins": 1.0050235986709595,
|
12173 |
+
"rewards/rejected": -1.5081340074539185,
|
12174 |
+
"step": 1590
|
12175 |
+
},
|
12176 |
+
{
|
12177 |
+
"epoch": 1.8154087377948827,
|
12178 |
+
"grad_norm": 52.124874452064724,
|
12179 |
+
"learning_rate": 4.593422987214668e-09,
|
12180 |
+
"logits/chosen": -1.2299569845199585,
|
12181 |
+
"logits/rejected": -1.276241421699524,
|
12182 |
+
"logps/chosen": -146.35475158691406,
|
12183 |
+
"logps/rejected": -174.0493621826172,
|
12184 |
+
"loss": 0.4263,
|
12185 |
+
"rewards/accuracies": 0.96875,
|
12186 |
+
"rewards/chosen": 0.01635855622589588,
|
12187 |
+
"rewards/margins": 1.0491864681243896,
|
12188 |
+
"rewards/rejected": -1.0328278541564941,
|
12189 |
+
"step": 1592
|
12190 |
+
},
|
12191 |
+
{
|
12192 |
+
"epoch": 1.8176894020383436,
|
12193 |
+
"grad_norm": 52.42542929704426,
|
12194 |
+
"learning_rate": 4.480165079718568e-09,
|
12195 |
+
"logits/chosen": -1.2522296905517578,
|
12196 |
+
"logits/rejected": -1.2631944417953491,
|
12197 |
+
"logps/chosen": -195.42518615722656,
|
12198 |
+
"logps/rejected": -221.43161010742188,
|
12199 |
+
"loss": 0.4216,
|
12200 |
+
"rewards/accuracies": 0.90625,
|
12201 |
+
"rewards/chosen": -0.45845019817352295,
|
12202 |
+
"rewards/margins": 1.135830044746399,
|
12203 |
+
"rewards/rejected": -1.5942802429199219,
|
12204 |
+
"step": 1594
|
12205 |
+
},
|
12206 |
+
{
|
12207 |
+
"epoch": 1.8199700662818046,
|
12208 |
+
"grad_norm": 64.83240480126588,
|
12209 |
+
"learning_rate": 4.368288932458308e-09,
|
12210 |
+
"logits/chosen": -1.3056426048278809,
|
12211 |
+
"logits/rejected": -1.2827690839767456,
|
12212 |
+
"logps/chosen": -172.3594207763672,
|
12213 |
+
"logps/rejected": -187.4986114501953,
|
12214 |
+
"loss": 0.4437,
|
12215 |
+
"rewards/accuracies": 0.8125,
|
12216 |
+
"rewards/chosen": -0.4361146092414856,
|
12217 |
+
"rewards/margins": 0.8268535733222961,
|
12218 |
+
"rewards/rejected": -1.2629680633544922,
|
12219 |
+
"step": 1596
|
12220 |
+
},
|
12221 |
+
{
|
12222 |
+
"epoch": 1.8222507305252655,
|
12223 |
+
"grad_norm": 52.10599638998205,
|
12224 |
+
"learning_rate": 4.257796163799454e-09,
|
12225 |
+
"logits/chosen": -1.1653319597244263,
|
12226 |
+
"logits/rejected": -1.2118003368377686,
|
12227 |
+
"logps/chosen": -109.62616729736328,
|
12228 |
+
"logps/rejected": -133.0435028076172,
|
12229 |
+
"loss": 0.4041,
|
12230 |
+
"rewards/accuracies": 0.75,
|
12231 |
+
"rewards/chosen": -0.14802826941013336,
|
12232 |
+
"rewards/margins": 0.935161828994751,
|
12233 |
+
"rewards/rejected": -1.0831902027130127,
|
12234 |
+
"step": 1598
|
12235 |
+
},
|
12236 |
+
{
|
12237 |
+
"epoch": 1.8245313947687265,
|
12238 |
+
"grad_norm": 55.52964788045627,
|
12239 |
+
"learning_rate": 4.1486883720960435e-09,
|
12240 |
+
"logits/chosen": -1.1933330297470093,
|
12241 |
+
"logits/rejected": -1.228639006614685,
|
12242 |
+
"logps/chosen": -149.2159423828125,
|
12243 |
+
"logps/rejected": -200.82742309570312,
|
12244 |
+
"loss": 0.3988,
|
12245 |
+
"rewards/accuracies": 0.875,
|
12246 |
+
"rewards/chosen": -0.32056179642677307,
|
12247 |
+
"rewards/margins": 1.0430424213409424,
|
12248 |
+
"rewards/rejected": -1.3636044263839722,
|
12249 |
+
"step": 1600
|
12250 |
+
},
|
12251 |
+
{
|
12252 |
+
"epoch": 1.8245313947687265,
|
12253 |
+
"eval_logits/chosen": -1.3431406021118164,
|
12254 |
+
"eval_logits/rejected": -1.3253653049468994,
|
12255 |
+
"eval_logps/chosen": -132.28553771972656,
|
12256 |
+
"eval_logps/rejected": -139.2881317138672,
|
12257 |
+
"eval_loss": 0.5448750853538513,
|
12258 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
12259 |
+
"eval_rewards/chosen": -0.23875679075717926,
|
12260 |
+
"eval_rewards/margins": 0.48378121852874756,
|
12261 |
+
"eval_rewards/rejected": -0.7225379943847656,
|
12262 |
+
"eval_runtime": 20.9101,
|
12263 |
+
"eval_samples_per_second": 4.782,
|
12264 |
+
"eval_steps_per_second": 1.196,
|
12265 |
+
"step": 1600
|
12266 |
+
},
|
12267 |
+
{
|
12268 |
+
"epoch": 1.8268120590121875,
|
12269 |
+
"grad_norm": 52.449005735628376,
|
12270 |
+
"learning_rate": 4.040967135667472e-09,
|
12271 |
+
"logits/chosen": -1.3316993713378906,
|
12272 |
+
"logits/rejected": -1.4035625457763672,
|
12273 |
+
"logps/chosen": -134.9228057861328,
|
12274 |
+
"logps/rejected": -165.58567810058594,
|
12275 |
+
"loss": 0.408,
|
12276 |
+
"rewards/accuracies": 0.75,
|
12277 |
+
"rewards/chosen": -0.16615627706050873,
|
12278 |
+
"rewards/margins": 0.7779840230941772,
|
12279 |
+
"rewards/rejected": -0.9441402554512024,
|
12280 |
+
"step": 1602
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 1.8290927232556482,
|
12284 |
+
"grad_norm": 58.59433738136844,
|
12285 |
+
"learning_rate": 3.9346340127756616e-09,
|
12286 |
+
"logits/chosen": -1.3039021492004395,
|
12287 |
+
"logits/rejected": -1.3089298009872437,
|
12288 |
+
"logps/chosen": -196.00987243652344,
|
12289 |
+
"logps/rejected": -218.62310791015625,
|
12290 |
+
"loss": 0.4161,
|
12291 |
+
"rewards/accuracies": 0.875,
|
12292 |
+
"rewards/chosen": -0.46055513620376587,
|
12293 |
+
"rewards/margins": 0.8772752285003662,
|
12294 |
+
"rewards/rejected": -1.3378304243087769,
|
12295 |
+
"step": 1604
|
12296 |
+
},
|
12297 |
+
{
|
12298 |
+
"epoch": 1.8313733874991092,
|
12299 |
+
"grad_norm": 63.586529609086426,
|
12300 |
+
"learning_rate": 3.829690541602504e-09,
|
12301 |
+
"logits/chosen": -1.173471450805664,
|
12302 |
+
"logits/rejected": -1.27016282081604,
|
12303 |
+
"logps/chosen": -162.02359008789062,
|
12304 |
+
"logps/rejected": -195.39901733398438,
|
12305 |
+
"loss": 0.3966,
|
12306 |
+
"rewards/accuracies": 0.84375,
|
12307 |
+
"rewards/chosen": -0.4807528853416443,
|
12308 |
+
"rewards/margins": 1.1842964887619019,
|
12309 |
+
"rewards/rejected": -1.665049433708191,
|
12310 |
+
"step": 1606
|
12311 |
+
},
|
12312 |
+
{
|
12313 |
+
"epoch": 1.83365405174257,
|
12314 |
+
"grad_norm": 59.32975592376087,
|
12315 |
+
"learning_rate": 3.726138240227628e-09,
|
12316 |
+
"logits/chosen": -1.255246639251709,
|
12317 |
+
"logits/rejected": -1.3199628591537476,
|
12318 |
+
"logps/chosen": -132.19796752929688,
|
12319 |
+
"logps/rejected": -187.007568359375,
|
12320 |
+
"loss": 0.433,
|
12321 |
+
"rewards/accuracies": 0.875,
|
12322 |
+
"rewards/chosen": -0.2542805075645447,
|
12323 |
+
"rewards/margins": 1.1002681255340576,
|
12324 |
+
"rewards/rejected": -1.3545485734939575,
|
12325 |
+
"step": 1608
|
12326 |
+
},
|
12327 |
+
{
|
12328 |
+
"epoch": 1.8359347159860309,
|
12329 |
+
"grad_norm": 53.62231825347675,
|
12330 |
+
"learning_rate": 3.623978606606426e-09,
|
12331 |
+
"logits/chosen": -1.165490984916687,
|
12332 |
+
"logits/rejected": -1.1475247144699097,
|
12333 |
+
"logps/chosen": -173.6094970703125,
|
12334 |
+
"logps/rejected": -202.46153259277344,
|
12335 |
+
"loss": 0.3824,
|
12336 |
+
"rewards/accuracies": 0.75,
|
12337 |
+
"rewards/chosen": -0.47014373540878296,
|
12338 |
+
"rewards/margins": 0.9737652540206909,
|
12339 |
+
"rewards/rejected": -1.4439090490341187,
|
12340 |
+
"step": 1610
|
12341 |
+
},
|
12342 |
+
{
|
12343 |
+
"epoch": 1.8382153802294918,
|
12344 |
+
"grad_norm": 54.19282245857111,
|
12345 |
+
"learning_rate": 3.523213118548407e-09,
|
12346 |
+
"logits/chosen": -1.2416139841079712,
|
12347 |
+
"logits/rejected": -1.278630256652832,
|
12348 |
+
"logps/chosen": -152.09185791015625,
|
12349 |
+
"logps/rejected": -186.2200164794922,
|
12350 |
+
"loss": 0.4421,
|
12351 |
+
"rewards/accuracies": 0.78125,
|
12352 |
+
"rewards/chosen": -0.47047847509384155,
|
12353 |
+
"rewards/margins": 0.9149271845817566,
|
12354 |
+
"rewards/rejected": -1.3854056596755981,
|
12355 |
+
"step": 1612
|
12356 |
+
},
|
12357 |
+
{
|
12358 |
+
"epoch": 1.8404960444729528,
|
12359 |
+
"grad_norm": 66.35928406172948,
|
12360 |
+
"learning_rate": 3.423843233695789e-09,
|
12361 |
+
"logits/chosen": -1.2137271165847778,
|
12362 |
+
"logits/rejected": -1.209067940711975,
|
12363 |
+
"logps/chosen": -162.6099853515625,
|
12364 |
+
"logps/rejected": -168.80848693847656,
|
12365 |
+
"loss": 0.5097,
|
12366 |
+
"rewards/accuracies": 0.8125,
|
12367 |
+
"rewards/chosen": -0.4765735864639282,
|
12368 |
+
"rewards/margins": 0.7680255174636841,
|
12369 |
+
"rewards/rejected": -1.2445989847183228,
|
12370 |
+
"step": 1614
|
12371 |
+
},
|
12372 |
+
{
|
12373 |
+
"epoch": 1.8427767087164137,
|
12374 |
+
"grad_norm": 76.5553780804754,
|
12375 |
+
"learning_rate": 3.3258703895024386e-09,
|
12376 |
+
"logits/chosen": -1.2277370691299438,
|
12377 |
+
"logits/rejected": -1.2694729566574097,
|
12378 |
+
"logps/chosen": -167.89581298828125,
|
12379 |
+
"logps/rejected": -190.85592651367188,
|
12380 |
+
"loss": 0.448,
|
12381 |
+
"rewards/accuracies": 0.6875,
|
12382 |
+
"rewards/chosen": -0.586859941482544,
|
12383 |
+
"rewards/margins": 0.8319023847579956,
|
12384 |
+
"rewards/rejected": -1.4187625646591187,
|
12385 |
+
"step": 1616
|
12386 |
+
},
|
12387 |
+
{
|
12388 |
+
"epoch": 1.8450573729598747,
|
12389 |
+
"grad_norm": 62.481075662440986,
|
12390 |
+
"learning_rate": 3.2292960032130557e-09,
|
12391 |
+
"logits/chosen": -1.3022971153259277,
|
12392 |
+
"logits/rejected": -1.3909885883331299,
|
12393 |
+
"logps/chosen": -138.24668884277344,
|
12394 |
+
"logps/rejected": -144.76327514648438,
|
12395 |
+
"loss": 0.5064,
|
12396 |
+
"rewards/accuracies": 0.75,
|
12397 |
+
"rewards/chosen": -0.29410520195961,
|
12398 |
+
"rewards/margins": 0.5049476623535156,
|
12399 |
+
"rewards/rejected": -0.7990528345108032,
|
12400 |
+
"step": 1618
|
12401 |
+
},
|
12402 |
+
{
|
12403 |
+
"epoch": 1.8473380372033354,
|
12404 |
+
"grad_norm": 49.40037975448036,
|
12405 |
+
"learning_rate": 3.134121471842688e-09,
|
12406 |
+
"logits/chosen": -1.3413515090942383,
|
12407 |
+
"logits/rejected": -1.3459126949310303,
|
12408 |
+
"logps/chosen": -168.7082977294922,
|
12409 |
+
"logps/rejected": -210.79649353027344,
|
12410 |
+
"loss": 0.3703,
|
12411 |
+
"rewards/accuracies": 0.8125,
|
12412 |
+
"rewards/chosen": -0.37254247069358826,
|
12413 |
+
"rewards/margins": 1.4622108936309814,
|
12414 |
+
"rewards/rejected": -1.8347532749176025,
|
12415 |
+
"step": 1620
|
12416 |
+
},
|
12417 |
+
{
|
12418 |
+
"epoch": 1.8496187014467964,
|
12419 |
+
"grad_norm": 64.64467682395964,
|
12420 |
+
"learning_rate": 3.0403481721565373e-09,
|
12421 |
+
"logits/chosen": -1.165281891822815,
|
12422 |
+
"logits/rejected": -1.2723007202148438,
|
12423 |
+
"logps/chosen": -136.62118530273438,
|
12424 |
+
"logps/rejected": -162.6814422607422,
|
12425 |
+
"loss": 0.4646,
|
12426 |
+
"rewards/accuracies": 0.78125,
|
12427 |
+
"rewards/chosen": -0.21550993621349335,
|
12428 |
+
"rewards/margins": 0.7950283288955688,
|
12429 |
+
"rewards/rejected": -1.0105382204055786,
|
12430 |
+
"step": 1622
|
12431 |
+
},
|
12432 |
+
{
|
12433 |
+
"epoch": 1.8518993656902571,
|
12434 |
+
"grad_norm": 52.728229173624364,
|
12435 |
+
"learning_rate": 2.947977460649975e-09,
|
12436 |
+
"logits/chosen": -1.2271634340286255,
|
12437 |
+
"logits/rejected": -1.2183120250701904,
|
12438 |
+
"logps/chosen": -120.3056640625,
|
12439 |
+
"logps/rejected": -152.6262664794922,
|
12440 |
+
"loss": 0.4505,
|
12441 |
+
"rewards/accuracies": 0.8125,
|
12442 |
+
"rewards/chosen": -0.4195891320705414,
|
12443 |
+
"rewards/margins": 0.9882142543792725,
|
12444 |
+
"rewards/rejected": -1.4078034162521362,
|
12445 |
+
"step": 1624
|
12446 |
+
},
|
12447 |
+
{
|
12448 |
+
"epoch": 1.854180029933718,
|
12449 |
+
"grad_norm": 69.89183140204983,
|
12450 |
+
"learning_rate": 2.8570106735290144e-09,
|
12451 |
+
"logits/chosen": -1.2754062414169312,
|
12452 |
+
"logits/rejected": -1.285915732383728,
|
12453 |
+
"logps/chosen": -148.9326171875,
|
12454 |
+
"logps/rejected": -176.26077270507812,
|
12455 |
+
"loss": 0.4184,
|
12456 |
+
"rewards/accuracies": 0.75,
|
12457 |
+
"rewards/chosen": -0.42058467864990234,
|
12458 |
+
"rewards/margins": 1.0723397731781006,
|
12459 |
+
"rewards/rejected": -1.492924451828003,
|
12460 |
+
"step": 1626
|
12461 |
+
},
|
12462 |
+
{
|
12463 |
+
"epoch": 1.856460694177179,
|
12464 |
+
"grad_norm": 78.54847866654863,
|
12465 |
+
"learning_rate": 2.7674491266909016e-09,
|
12466 |
+
"logits/chosen": -1.2117140293121338,
|
12467 |
+
"logits/rejected": -1.2298004627227783,
|
12468 |
+
"logps/chosen": -187.1058807373047,
|
12469 |
+
"logps/rejected": -215.0411376953125,
|
12470 |
+
"loss": 0.4243,
|
12471 |
+
"rewards/accuracies": 0.875,
|
12472 |
+
"rewards/chosen": -0.9483327865600586,
|
12473 |
+
"rewards/margins": 1.1213573217391968,
|
12474 |
+
"rewards/rejected": -2.069690227508545,
|
12475 |
+
"step": 1628
|
12476 |
+
},
|
12477 |
+
{
|
12478 |
+
"epoch": 1.85874135842064,
|
12479 |
+
"grad_norm": 55.582996135186875,
|
12480 |
+
"learning_rate": 2.679294115705144e-09,
|
12481 |
+
"logits/chosen": -1.2989763021469116,
|
12482 |
+
"logits/rejected": -1.425642967224121,
|
12483 |
+
"logps/chosen": -191.18502807617188,
|
12484 |
+
"logps/rejected": -235.31173706054688,
|
12485 |
+
"loss": 0.3977,
|
12486 |
+
"rewards/accuracies": 0.84375,
|
12487 |
+
"rewards/chosen": -0.30440258979797363,
|
12488 |
+
"rewards/margins": 1.0316964387893677,
|
12489 |
+
"rewards/rejected": -1.3360989093780518,
|
12490 |
+
"step": 1630
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 1.861022022664101,
|
12494 |
+
"grad_norm": 51.611066982113556,
|
12495 |
+
"learning_rate": 2.5925469157947135e-09,
|
12496 |
+
"logits/chosen": -1.2190608978271484,
|
12497 |
+
"logits/rejected": -1.2017196416854858,
|
12498 |
+
"logps/chosen": -209.8916778564453,
|
12499 |
+
"logps/rejected": -251.41043090820312,
|
12500 |
+
"loss": 0.3837,
|
12501 |
+
"rewards/accuracies": 0.875,
|
12502 |
+
"rewards/chosen": -0.5788516402244568,
|
12503 |
+
"rewards/margins": 1.5164175033569336,
|
12504 |
+
"rewards/rejected": -2.095268964767456,
|
12505 |
+
"step": 1632
|
12506 |
+
},
|
12507 |
+
{
|
12508 |
+
"epoch": 1.863302686907562,
|
12509 |
+
"grad_norm": 61.654874361770986,
|
12510 |
+
"learning_rate": 2.507208781817638e-09,
|
12511 |
+
"logits/chosen": -1.2814161777496338,
|
12512 |
+
"logits/rejected": -1.4046311378479004,
|
12513 |
+
"logps/chosen": -190.91163635253906,
|
12514 |
+
"logps/rejected": -240.64810180664062,
|
12515 |
+
"loss": 0.4435,
|
12516 |
+
"rewards/accuracies": 0.84375,
|
12517 |
+
"rewards/chosen": -0.6178247928619385,
|
12518 |
+
"rewards/margins": 1.0759724378585815,
|
12519 |
+
"rewards/rejected": -1.6937971115112305,
|
12520 |
+
"step": 1634
|
12521 |
+
},
|
12522 |
+
{
|
12523 |
+
"epoch": 1.8655833511510227,
|
12524 |
+
"grad_norm": 65.59685553528632,
|
12525 |
+
"learning_rate": 2.4232809482488403e-09,
|
12526 |
+
"logits/chosen": -1.204872965812683,
|
12527 |
+
"logits/rejected": -1.2012598514556885,
|
12528 |
+
"logps/chosen": -180.18150329589844,
|
12529 |
+
"logps/rejected": -206.04872131347656,
|
12530 |
+
"loss": 0.4425,
|
12531 |
+
"rewards/accuracies": 0.75,
|
12532 |
+
"rewards/chosen": -0.6783896684646606,
|
12533 |
+
"rewards/margins": 1.0068707466125488,
|
12534 |
+
"rewards/rejected": -1.68526029586792,
|
12535 |
+
"step": 1636
|
12536 |
+
},
|
12537 |
+
{
|
12538 |
+
"epoch": 1.8678640153944837,
|
12539 |
+
"grad_norm": 60.54718272304763,
|
12540 |
+
"learning_rate": 2.340764629162284e-09,
|
12541 |
+
"logits/chosen": -1.1621663570404053,
|
12542 |
+
"logits/rejected": -1.2637563943862915,
|
12543 |
+
"logps/chosen": -178.81527709960938,
|
12544 |
+
"logps/rejected": -228.64007568359375,
|
12545 |
+
"loss": 0.3834,
|
12546 |
+
"rewards/accuracies": 0.8125,
|
12547 |
+
"rewards/chosen": -0.3456554114818573,
|
12548 |
+
"rewards/margins": 0.9540256261825562,
|
12549 |
+
"rewards/rejected": -1.2996809482574463,
|
12550 |
+
"step": 1638
|
12551 |
+
},
|
12552 |
+
{
|
12553 |
+
"epoch": 1.8701446796379444,
|
12554 |
+
"grad_norm": 55.533431370620015,
|
12555 |
+
"learning_rate": 2.2596610182133325e-09,
|
12556 |
+
"logits/chosen": -1.2759058475494385,
|
12557 |
+
"logits/rejected": -1.3463444709777832,
|
12558 |
+
"logps/chosen": -156.00009155273438,
|
12559 |
+
"logps/rejected": -192.7170867919922,
|
12560 |
+
"loss": 0.4157,
|
12561 |
+
"rewards/accuracies": 0.90625,
|
12562 |
+
"rewards/chosen": -0.003083046991378069,
|
12563 |
+
"rewards/margins": 1.3031508922576904,
|
12564 |
+
"rewards/rejected": -1.3062340021133423,
|
12565 |
+
"step": 1640
|
12566 |
+
},
|
12567 |
+
{
|
12568 |
+
"epoch": 1.8724253438814054,
|
12569 |
+
"grad_norm": 62.10012065390903,
|
12570 |
+
"learning_rate": 2.1799712886216628e-09,
|
12571 |
+
"logits/chosen": -1.1718287467956543,
|
12572 |
+
"logits/rejected": -1.1970500946044922,
|
12573 |
+
"logps/chosen": -146.75628662109375,
|
12574 |
+
"logps/rejected": -208.56558227539062,
|
12575 |
+
"loss": 0.3858,
|
12576 |
+
"rewards/accuracies": 0.875,
|
12577 |
+
"rewards/chosen": -0.1122959554195404,
|
12578 |
+
"rewards/margins": 1.5379095077514648,
|
12579 |
+
"rewards/rejected": -1.650205373764038,
|
12580 |
+
"step": 1642
|
12581 |
+
},
|
12582 |
+
{
|
12583 |
+
"epoch": 1.8747060081248663,
|
12584 |
+
"grad_norm": 56.04768532231553,
|
12585 |
+
"learning_rate": 2.1016965931541007e-09,
|
12586 |
+
"logits/chosen": -1.253339171409607,
|
12587 |
+
"logits/rejected": -1.3260321617126465,
|
12588 |
+
"logps/chosen": -207.69583129882812,
|
12589 |
+
"logps/rejected": -243.12586975097656,
|
12590 |
+
"loss": 0.4223,
|
12591 |
+
"rewards/accuracies": 0.84375,
|
12592 |
+
"rewards/chosen": -0.5657081604003906,
|
12593 |
+
"rewards/margins": 1.0796537399291992,
|
12594 |
+
"rewards/rejected": -1.6453620195388794,
|
12595 |
+
"step": 1644
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 1.8769866723683273,
|
12599 |
+
"grad_norm": 60.14746373631622,
|
12600 |
+
"learning_rate": 2.02483806410807e-09,
|
12601 |
+
"logits/chosen": -1.114426851272583,
|
12602 |
+
"logits/rejected": -1.2585283517837524,
|
12603 |
+
"logps/chosen": -144.21237182617188,
|
12604 |
+
"logps/rejected": -216.4067840576172,
|
12605 |
+
"loss": 0.4165,
|
12606 |
+
"rewards/accuracies": 0.875,
|
12607 |
+
"rewards/chosen": -0.6168836951255798,
|
12608 |
+
"rewards/margins": 1.211737036705017,
|
12609 |
+
"rewards/rejected": -1.8286206722259521,
|
12610 |
+
"step": 1646
|
12611 |
+
},
|
12612 |
+
{
|
12613 |
+
"epoch": 1.8792673366117882,
|
12614 |
+
"grad_norm": 58.35041588601004,
|
12615 |
+
"learning_rate": 1.9493968132951455e-09,
|
12616 |
+
"logits/chosen": -1.2065017223358154,
|
12617 |
+
"logits/rejected": -1.2185966968536377,
|
12618 |
+
"logps/chosen": -144.52651977539062,
|
12619 |
+
"logps/rejected": -182.16015625,
|
12620 |
+
"loss": 0.4119,
|
12621 |
+
"rewards/accuracies": 0.875,
|
12622 |
+
"rewards/chosen": -0.5021862387657166,
|
12623 |
+
"rewards/margins": 1.0205962657928467,
|
12624 |
+
"rewards/rejected": -1.522782564163208,
|
12625 |
+
"step": 1648
|
12626 |
+
},
|
12627 |
+
{
|
12628 |
+
"epoch": 1.8815480008552492,
|
12629 |
+
"grad_norm": 61.66500286457481,
|
12630 |
+
"learning_rate": 1.875373932025015e-09,
|
12631 |
+
"logits/chosen": -1.113441824913025,
|
12632 |
+
"logits/rejected": -1.2277448177337646,
|
12633 |
+
"logps/chosen": -88.60232543945312,
|
12634 |
+
"logps/rejected": -123.64103698730469,
|
12635 |
+
"loss": 0.5025,
|
12636 |
+
"rewards/accuracies": 0.6875,
|
12637 |
+
"rewards/chosen": -0.3603760600090027,
|
12638 |
+
"rewards/margins": 0.6354212760925293,
|
12639 |
+
"rewards/rejected": -0.9957974553108215,
|
12640 |
+
"step": 1650
|
12641 |
+
},
|
12642 |
+
{
|
12643 |
+
"epoch": 1.8838286650987102,
|
12644 |
+
"grad_norm": 66.53236233989348,
|
12645 |
+
"learning_rate": 1.8027704910896668e-09,
|
12646 |
+
"logits/chosen": -1.2696802616119385,
|
12647 |
+
"logits/rejected": -1.293856143951416,
|
12648 |
+
"logps/chosen": -178.1632843017578,
|
12649 |
+
"logps/rejected": -208.1569061279297,
|
12650 |
+
"loss": 0.4141,
|
12651 |
+
"rewards/accuracies": 0.84375,
|
12652 |
+
"rewards/chosen": -0.4311864674091339,
|
12653 |
+
"rewards/margins": 1.18040931224823,
|
12654 |
+
"rewards/rejected": -1.611595869064331,
|
12655 |
+
"step": 1652
|
12656 |
+
},
|
12657 |
+
{
|
12658 |
+
"epoch": 1.886109329342171,
|
12659 |
+
"grad_norm": 73.576105680566,
|
12660 |
+
"learning_rate": 1.731587540747903e-09,
|
12661 |
+
"logits/chosen": -1.3243728876113892,
|
12662 |
+
"logits/rejected": -1.3624733686447144,
|
12663 |
+
"logps/chosen": -178.9629669189453,
|
12664 |
+
"logps/rejected": -203.73611450195312,
|
12665 |
+
"loss": 0.4112,
|
12666 |
+
"rewards/accuracies": 0.84375,
|
12667 |
+
"rewards/chosen": -0.35923391580581665,
|
12668 |
+
"rewards/margins": 1.0950078964233398,
|
12669 |
+
"rewards/rejected": -1.4542417526245117,
|
12670 |
+
"step": 1654
|
12671 |
+
},
|
12672 |
+
{
|
12673 |
+
"epoch": 1.8883899935856319,
|
12674 |
+
"grad_norm": 60.917071878292,
|
12675 |
+
"learning_rate": 1.6618261107101628e-09,
|
12676 |
+
"logits/chosen": -1.2426798343658447,
|
12677 |
+
"logits/rejected": -1.299846887588501,
|
12678 |
+
"logps/chosen": -166.9674530029297,
|
12679 |
+
"logps/rejected": -200.53256225585938,
|
12680 |
+
"loss": 0.4225,
|
12681 |
+
"rewards/accuracies": 0.875,
|
12682 |
+
"rewards/chosen": -0.36187708377838135,
|
12683 |
+
"rewards/margins": 1.0805463790893555,
|
12684 |
+
"rewards/rejected": -1.4424233436584473,
|
12685 |
+
"step": 1656
|
12686 |
+
},
|
12687 |
+
{
|
12688 |
+
"epoch": 1.8906706578290926,
|
12689 |
+
"grad_norm": 58.315768026639354,
|
12690 |
+
"learning_rate": 1.5934872101235785e-09,
|
12691 |
+
"logits/chosen": -1.1988379955291748,
|
12692 |
+
"logits/rejected": -1.276864767074585,
|
12693 |
+
"logps/chosen": -117.3199691772461,
|
12694 |
+
"logps/rejected": -166.17037963867188,
|
12695 |
+
"loss": 0.4669,
|
12696 |
+
"rewards/accuracies": 0.6875,
|
12697 |
+
"rewards/chosen": -0.37138426303863525,
|
12698 |
+
"rewards/margins": 0.7784909009933472,
|
12699 |
+
"rewards/rejected": -1.1498751640319824,
|
12700 |
+
"step": 1658
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 1.8929513220725536,
|
12704 |
+
"grad_norm": 73.00783189642374,
|
12705 |
+
"learning_rate": 1.5265718275574656e-09,
|
12706 |
+
"logits/chosen": -1.199881672859192,
|
12707 |
+
"logits/rejected": -1.294306755065918,
|
12708 |
+
"logps/chosen": -163.18499755859375,
|
12709 |
+
"logps/rejected": -240.10389709472656,
|
12710 |
+
"loss": 0.4091,
|
12711 |
+
"rewards/accuracies": 0.90625,
|
12712 |
+
"rewards/chosen": -0.5089311599731445,
|
12713 |
+
"rewards/margins": 1.5384039878845215,
|
12714 |
+
"rewards/rejected": -2.047335147857666,
|
12715 |
+
"step": 1660
|
12716 |
+
},
|
12717 |
+
{
|
12718 |
+
"epoch": 1.8952319863160145,
|
12719 |
+
"grad_norm": 59.87965880073438,
|
12720 |
+
"learning_rate": 1.4610809309889338e-09,
|
12721 |
+
"logits/chosen": -1.1384081840515137,
|
12722 |
+
"logits/rejected": -1.2086718082427979,
|
12723 |
+
"logps/chosen": -228.3030548095703,
|
12724 |
+
"logps/rejected": -291.9572448730469,
|
12725 |
+
"loss": 0.3708,
|
12726 |
+
"rewards/accuracies": 0.875,
|
12727 |
+
"rewards/chosen": -0.6747040748596191,
|
12728 |
+
"rewards/margins": 1.962480068206787,
|
12729 |
+
"rewards/rejected": -2.6371843814849854,
|
12730 |
+
"step": 1662
|
12731 |
+
},
|
12732 |
+
{
|
12733 |
+
"epoch": 1.8975126505594755,
|
12734 |
+
"grad_norm": 65.04049810626964,
|
12735 |
+
"learning_rate": 1.3970154677889312e-09,
|
12736 |
+
"logits/chosen": -1.2462139129638672,
|
12737 |
+
"logits/rejected": -1.3531625270843506,
|
12738 |
+
"logps/chosen": -166.90621948242188,
|
12739 |
+
"logps/rejected": -219.62258911132812,
|
12740 |
+
"loss": 0.4477,
|
12741 |
+
"rewards/accuracies": 0.78125,
|
12742 |
+
"rewards/chosen": -0.6883362531661987,
|
12743 |
+
"rewards/margins": 1.3010790348052979,
|
12744 |
+
"rewards/rejected": -1.989415168762207,
|
12745 |
+
"step": 1664
|
12746 |
+
},
|
12747 |
+
{
|
12748 |
+
"epoch": 1.8997933148029365,
|
12749 |
+
"grad_norm": 67.17708620355747,
|
12750 |
+
"learning_rate": 1.3343763647085337e-09,
|
12751 |
+
"logits/chosen": -1.149139404296875,
|
12752 |
+
"logits/rejected": -1.2173185348510742,
|
12753 |
+
"logps/chosen": -155.94863891601562,
|
12754 |
+
"logps/rejected": -230.28384399414062,
|
12755 |
+
"loss": 0.4712,
|
12756 |
+
"rewards/accuracies": 0.84375,
|
12757 |
+
"rewards/chosen": -0.5960929989814758,
|
12758 |
+
"rewards/margins": 1.4794337749481201,
|
12759 |
+
"rewards/rejected": -2.075526714324951,
|
12760 |
+
"step": 1666
|
12761 |
+
},
|
12762 |
+
{
|
12763 |
+
"epoch": 1.9020739790463974,
|
12764 |
+
"grad_norm": 56.97928801975319,
|
12765 |
+
"learning_rate": 1.2731645278655445e-09,
|
12766 |
+
"logits/chosen": -1.2667725086212158,
|
12767 |
+
"logits/rejected": -1.3436360359191895,
|
12768 |
+
"logps/chosen": -160.5927734375,
|
12769 |
+
"logps/rejected": -186.63018798828125,
|
12770 |
+
"loss": 0.4321,
|
12771 |
+
"rewards/accuracies": 0.8125,
|
12772 |
+
"rewards/chosen": -0.48528626561164856,
|
12773 |
+
"rewards/margins": 0.931164562702179,
|
12774 |
+
"rewards/rejected": -1.4164507389068604,
|
12775 |
+
"step": 1668
|
12776 |
+
},
|
12777 |
+
{
|
12778 |
+
"epoch": 1.9043546432898582,
|
12779 |
+
"grad_norm": 63.8162468661416,
|
12780 |
+
"learning_rate": 1.2133808427313485e-09,
|
12781 |
+
"logits/chosen": -1.2804149389266968,
|
12782 |
+
"logits/rejected": -1.3368322849273682,
|
12783 |
+
"logps/chosen": -131.45181274414062,
|
12784 |
+
"logps/rejected": -180.07037353515625,
|
12785 |
+
"loss": 0.4551,
|
12786 |
+
"rewards/accuracies": 0.65625,
|
12787 |
+
"rewards/chosen": -0.5746269226074219,
|
12788 |
+
"rewards/margins": 0.8660170435905457,
|
12789 |
+
"rewards/rejected": -1.4406440258026123,
|
12790 |
+
"step": 1670
|
12791 |
+
},
|
12792 |
+
{
|
12793 |
+
"epoch": 1.9066353075333191,
|
12794 |
+
"grad_norm": 52.46121205171607,
|
12795 |
+
"learning_rate": 1.1550261741181565e-09,
|
12796 |
+
"logits/chosen": -1.2648401260375977,
|
12797 |
+
"logits/rejected": -1.239923119544983,
|
12798 |
+
"logps/chosen": -163.7249298095703,
|
12799 |
+
"logps/rejected": -190.42611694335938,
|
12800 |
+
"loss": 0.3885,
|
12801 |
+
"rewards/accuracies": 0.84375,
|
12802 |
+
"rewards/chosen": -0.3933155834674835,
|
12803 |
+
"rewards/margins": 1.0268510580062866,
|
12804 |
+
"rewards/rejected": -1.4201666116714478,
|
12805 |
+
"step": 1672
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 1.9089159717767799,
|
12809 |
+
"grad_norm": 64.22449427271324,
|
12810 |
+
"learning_rate": 1.0981013661664706e-09,
|
12811 |
+
"logits/chosen": -1.293068289756775,
|
12812 |
+
"logits/rejected": -1.4030743837356567,
|
12813 |
+
"logps/chosen": -174.76348876953125,
|
12814 |
+
"logps/rejected": -219.83175659179688,
|
12815 |
+
"loss": 0.4548,
|
12816 |
+
"rewards/accuracies": 0.75,
|
12817 |
+
"rewards/chosen": -0.3287160098552704,
|
12818 |
+
"rewards/margins": 0.9871301651000977,
|
12819 |
+
"rewards/rejected": -1.3158462047576904,
|
12820 |
+
"step": 1674
|
12821 |
+
},
|
12822 |
+
{
|
12823 |
+
"epoch": 1.9111966360202408,
|
12824 |
+
"grad_norm": 55.77569297347896,
|
12825 |
+
"learning_rate": 1.042607242332838e-09,
|
12826 |
+
"logits/chosen": -1.2760488986968994,
|
12827 |
+
"logits/rejected": -1.31367027759552,
|
12828 |
+
"logps/chosen": -148.86257934570312,
|
12829 |
+
"logps/rejected": -173.52359008789062,
|
12830 |
+
"loss": 0.4349,
|
12831 |
+
"rewards/accuracies": 0.78125,
|
12832 |
+
"rewards/chosen": -0.24598023295402527,
|
12833 |
+
"rewards/margins": 0.7863931655883789,
|
12834 |
+
"rewards/rejected": -1.0323734283447266,
|
12835 |
+
"step": 1676
|
12836 |
+
},
|
12837 |
+
{
|
12838 |
+
"epoch": 1.9134773002637018,
|
12839 |
+
"grad_norm": 57.697067279665426,
|
12840 |
+
"learning_rate": 9.885446053780277e-10,
|
12841 |
+
"logits/chosen": -1.3217543363571167,
|
12842 |
+
"logits/rejected": -1.3588594198226929,
|
12843 |
+
"logps/chosen": -234.18218994140625,
|
12844 |
+
"logps/rejected": -244.6886444091797,
|
12845 |
+
"loss": 0.4099,
|
12846 |
+
"rewards/accuracies": 0.78125,
|
12847 |
+
"rewards/chosen": -0.6677453517913818,
|
12848 |
+
"rewards/margins": 0.864335834980011,
|
12849 |
+
"rewards/rejected": -1.5320810079574585,
|
12850 |
+
"step": 1678
|
12851 |
+
},
|
12852 |
+
{
|
12853 |
+
"epoch": 1.9157579645071627,
|
12854 |
+
"grad_norm": 52.93198869808111,
|
12855 |
+
"learning_rate": 9.359142373553286e-10,
|
12856 |
+
"logits/chosen": -1.2812589406967163,
|
12857 |
+
"logits/rejected": -1.374354362487793,
|
12858 |
+
"logps/chosen": -181.33377075195312,
|
12859 |
+
"logps/rejected": -226.33584594726562,
|
12860 |
+
"loss": 0.3969,
|
12861 |
+
"rewards/accuracies": 0.84375,
|
12862 |
+
"rewards/chosen": -0.40316393971443176,
|
12863 |
+
"rewards/margins": 1.0113019943237305,
|
12864 |
+
"rewards/rejected": -1.4144660234451294,
|
12865 |
+
"step": 1680
|
12866 |
+
},
|
12867 |
+
{
|
12868 |
+
"epoch": 1.9180386287506237,
|
12869 |
+
"grad_norm": 56.81221018582624,
|
12870 |
+
"learning_rate": 8.847168995992915e-10,
|
12871 |
+
"logits/chosen": -1.3222296237945557,
|
12872 |
+
"logits/rejected": -1.3470890522003174,
|
12873 |
+
"logps/chosen": -144.92401123046875,
|
12874 |
+
"logps/rejected": -160.66360473632812,
|
12875 |
+
"loss": 0.4557,
|
12876 |
+
"rewards/accuracies": 0.8125,
|
12877 |
+
"rewards/chosen": -0.2944653630256653,
|
12878 |
+
"rewards/margins": 0.691783607006073,
|
12879 |
+
"rewards/rejected": -0.9862489104270935,
|
12880 |
+
"step": 1682
|
12881 |
+
},
|
12882 |
+
{
|
12883 |
+
"epoch": 1.9203192929940847,
|
12884 |
+
"grad_norm": 59.96217669380799,
|
12885 |
+
"learning_rate": 8.349533327146719e-10,
|
12886 |
+
"logits/chosen": -1.1826375722885132,
|
12887 |
+
"logits/rejected": -1.2797571420669556,
|
12888 |
+
"logps/chosen": -173.0347442626953,
|
12889 |
+
"logps/rejected": -217.1268768310547,
|
12890 |
+
"loss": 0.421,
|
12891 |
+
"rewards/accuracies": 0.875,
|
12892 |
+
"rewards/chosen": -0.6681329011917114,
|
12893 |
+
"rewards/margins": 1.3352155685424805,
|
12894 |
+
"rewards/rejected": -2.0033483505249023,
|
12895 |
+
"step": 1684
|
12896 |
+
},
|
12897 |
+
{
|
12898 |
+
"epoch": 1.9225999572375454,
|
12899 |
+
"grad_norm": 53.71192157200429,
|
12900 |
+
"learning_rate": 7.866242565657599e-10,
|
12901 |
+
"logits/chosen": -1.133514642715454,
|
12902 |
+
"logits/rejected": -1.2035727500915527,
|
12903 |
+
"logps/chosen": -123.30015563964844,
|
12904 |
+
"logps/rejected": -165.7233123779297,
|
12905 |
+
"loss": 0.4164,
|
12906 |
+
"rewards/accuracies": 0.90625,
|
12907 |
+
"rewards/chosen": -0.38910388946533203,
|
12908 |
+
"rewards/margins": 1.0756738185882568,
|
12909 |
+
"rewards/rejected": -1.4647778272628784,
|
12910 |
+
"step": 1686
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 1.9248806214810064,
|
12914 |
+
"grad_norm": 84.94214636470907,
|
12915 |
+
"learning_rate": 7.397303702659674e-10,
|
12916 |
+
"logits/chosen": -1.2931269407272339,
|
12917 |
+
"logits/rejected": -1.3728893995285034,
|
12918 |
+
"logps/chosen": -153.67669677734375,
|
12919 |
+
"logps/rejected": -198.1190185546875,
|
12920 |
+
"loss": 0.4512,
|
12921 |
+
"rewards/accuracies": 0.78125,
|
12922 |
+
"rewards/chosen": -0.3215530216693878,
|
12923 |
+
"rewards/margins": 1.3663368225097656,
|
12924 |
+
"rewards/rejected": -1.6878899335861206,
|
12925 |
+
"step": 1688
|
12926 |
+
},
|
12927 |
+
{
|
12928 |
+
"epoch": 1.927161285724467,
|
12929 |
+
"grad_norm": 60.620511783810954,
|
12930 |
+
"learning_rate": 6.942723521676464e-10,
|
12931 |
+
"logits/chosen": -1.2509236335754395,
|
12932 |
+
"logits/rejected": -1.3288919925689697,
|
12933 |
+
"logps/chosen": -171.9304962158203,
|
12934 |
+
"logps/rejected": -200.7103729248047,
|
12935 |
+
"loss": 0.406,
|
12936 |
+
"rewards/accuracies": 0.90625,
|
12937 |
+
"rewards/chosen": -0.2758704423904419,
|
12938 |
+
"rewards/margins": 0.8382859230041504,
|
12939 |
+
"rewards/rejected": -1.1141563653945923,
|
12940 |
+
"step": 1690
|
12941 |
+
},
|
12942 |
+
{
|
12943 |
+
"epoch": 1.929441949967928,
|
12944 |
+
"grad_norm": 61.96268516778484,
|
12945 |
+
"learning_rate": 6.502508598523748e-10,
|
12946 |
+
"logits/chosen": -1.1139042377471924,
|
12947 |
+
"logits/rejected": -1.113561987876892,
|
12948 |
+
"logps/chosen": -145.94715881347656,
|
12949 |
+
"logps/rejected": -177.67091369628906,
|
12950 |
+
"loss": 0.4153,
|
12951 |
+
"rewards/accuracies": 0.84375,
|
12952 |
+
"rewards/chosen": -0.3293991684913635,
|
12953 |
+
"rewards/margins": 0.9399027824401855,
|
12954 |
+
"rewards/rejected": -1.2693020105361938,
|
12955 |
+
"step": 1692
|
12956 |
+
},
|
12957 |
+
{
|
12958 |
+
"epoch": 1.931722614211389,
|
12959 |
+
"grad_norm": 64.38956995843415,
|
12960 |
+
"learning_rate": 6.076665301213646e-10,
|
12961 |
+
"logits/chosen": -1.2710590362548828,
|
12962 |
+
"logits/rejected": -1.3398025035858154,
|
12963 |
+
"logps/chosen": -258.7908935546875,
|
12964 |
+
"logps/rejected": -270.1191711425781,
|
12965 |
+
"loss": 0.4663,
|
12966 |
+
"rewards/accuracies": 0.75,
|
12967 |
+
"rewards/chosen": -1.183610200881958,
|
12968 |
+
"rewards/margins": 1.2962732315063477,
|
12969 |
+
"rewards/rejected": -2.4798836708068848,
|
12970 |
+
"step": 1694
|
12971 |
+
},
|
12972 |
+
{
|
12973 |
+
"epoch": 1.93400327845485,
|
12974 |
+
"grad_norm": 70.46099460182324,
|
12975 |
+
"learning_rate": 5.665199789862907e-10,
|
12976 |
+
"logits/chosen": -1.3359112739562988,
|
12977 |
+
"logits/rejected": -1.3275290727615356,
|
12978 |
+
"logps/chosen": -183.69839477539062,
|
12979 |
+
"logps/rejected": -183.45762634277344,
|
12980 |
+
"loss": 0.5587,
|
12981 |
+
"rewards/accuracies": 0.6875,
|
12982 |
+
"rewards/chosen": -0.5863617658615112,
|
12983 |
+
"rewards/margins": 0.39982593059539795,
|
12984 |
+
"rewards/rejected": -0.9861876368522644,
|
12985 |
+
"step": 1696
|
12986 |
+
},
|
12987 |
+
{
|
12988 |
+
"epoch": 1.936283942698311,
|
12989 |
+
"grad_norm": 59.14480772543764,
|
12990 |
+
"learning_rate": 5.268118016603651e-10,
|
12991 |
+
"logits/chosen": -1.2939709424972534,
|
12992 |
+
"logits/rejected": -1.2879596948623657,
|
12993 |
+
"logps/chosen": -242.49627685546875,
|
12994 |
+
"logps/rejected": -255.47325134277344,
|
12995 |
+
"loss": 0.5263,
|
12996 |
+
"rewards/accuracies": 0.875,
|
12997 |
+
"rewards/chosen": -0.6082537174224854,
|
12998 |
+
"rewards/margins": 0.8644734621047974,
|
12999 |
+
"rewards/rejected": -1.4727270603179932,
|
13000 |
+
"step": 1698
|
13001 |
+
},
|
13002 |
+
{
|
13003 |
+
"epoch": 1.938564606941772,
|
13004 |
+
"grad_norm": 65.14033380217037,
|
13005 |
+
"learning_rate": 4.88542572549755e-10,
|
13006 |
+
"logits/chosen": -1.258131742477417,
|
13007 |
+
"logits/rejected": -1.341509461402893,
|
13008 |
+
"logps/chosen": -280.3218688964844,
|
13009 |
+
"logps/rejected": -349.66766357421875,
|
13010 |
+
"loss": 0.4044,
|
13011 |
+
"rewards/accuracies": 0.875,
|
13012 |
+
"rewards/chosen": -0.9879501461982727,
|
13013 |
+
"rewards/margins": 1.8015985488891602,
|
13014 |
+
"rewards/rejected": -2.789548873901367,
|
13015 |
+
"step": 1700
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 1.938564606941772,
|
13019 |
+
"eval_logits/chosen": -1.3368662595748901,
|
13020 |
+
"eval_logits/rejected": -1.3193824291229248,
|
13021 |
+
"eval_logps/chosen": -132.35743713378906,
|
13022 |
+
"eval_logps/rejected": -139.43797302246094,
|
13023 |
+
"eval_loss": 0.5376756191253662,
|
13024 |
+
"eval_rewards/accuracies": 0.7200000286102295,
|
13025 |
+
"eval_rewards/chosen": -0.2459474503993988,
|
13026 |
+
"eval_rewards/margins": 0.49157509207725525,
|
13027 |
+
"eval_rewards/rejected": -0.7375224828720093,
|
13028 |
+
"eval_runtime": 20.9786,
|
13029 |
+
"eval_samples_per_second": 4.767,
|
13030 |
+
"eval_steps_per_second": 1.192,
|
13031 |
+
"step": 1700
|
13032 |
+
},
|
13033 |
+
{
|
13034 |
+
"epoch": 1.9408452711852326,
|
13035 |
+
"grad_norm": 67.5831820584933,
|
13036 |
+
"learning_rate": 4.5171284524521127e-10,
|
13037 |
+
"logits/chosen": -1.1273996829986572,
|
13038 |
+
"logits/rejected": -1.200268030166626,
|
13039 |
+
"logps/chosen": -141.1074981689453,
|
13040 |
+
"logps/rejected": -161.90756225585938,
|
13041 |
+
"loss": 0.3986,
|
13042 |
+
"rewards/accuracies": 0.875,
|
13043 |
+
"rewards/chosen": -0.13401609659194946,
|
13044 |
+
"rewards/margins": 0.9844987392425537,
|
13045 |
+
"rewards/rejected": -1.118514895439148,
|
13046 |
+
"step": 1702
|
13047 |
+
},
|
13048 |
+
{
|
13049 |
+
"epoch": 1.9431259354286936,
|
13050 |
+
"grad_norm": 64.49222478839803,
|
13051 |
+
"learning_rate": 4.163231525141309e-10,
|
13052 |
+
"logits/chosen": -1.4064816236495972,
|
13053 |
+
"logits/rejected": -1.4361658096313477,
|
13054 |
+
"logps/chosen": -240.72976684570312,
|
13055 |
+
"logps/rejected": -262.8630676269531,
|
13056 |
+
"loss": 0.4409,
|
13057 |
+
"rewards/accuracies": 0.9375,
|
13058 |
+
"rewards/chosen": -0.6487561464309692,
|
13059 |
+
"rewards/margins": 1.2385480403900146,
|
13060 |
+
"rewards/rejected": -1.8873043060302734,
|
13061 |
+
"step": 1704
|
13062 |
+
},
|
13063 |
+
{
|
13064 |
+
"epoch": 1.9454065996721546,
|
13065 |
+
"grad_norm": 50.83672179405874,
|
13066 |
+
"learning_rate": 3.8237400629280714e-10,
|
13067 |
+
"logits/chosen": -1.0193315744400024,
|
13068 |
+
"logits/rejected": -0.9887692928314209,
|
13069 |
+
"logps/chosen": -120.17151641845703,
|
13070 |
+
"logps/rejected": -136.94674682617188,
|
13071 |
+
"loss": 0.4543,
|
13072 |
+
"rewards/accuracies": 0.78125,
|
13073 |
+
"rewards/chosen": -0.27739959955215454,
|
13074 |
+
"rewards/margins": 0.6725433468818665,
|
13075 |
+
"rewards/rejected": -0.949942946434021,
|
13076 |
+
"step": 1706
|
13077 |
+
},
|
13078 |
+
{
|
13079 |
+
"epoch": 1.9476872639156153,
|
13080 |
+
"grad_norm": 53.5967901694209,
|
13081 |
+
"learning_rate": 3.4986589767902476e-10,
|
13082 |
+
"logits/chosen": -1.2411226034164429,
|
13083 |
+
"logits/rejected": -1.3345215320587158,
|
13084 |
+
"logps/chosen": -90.0027084350586,
|
13085 |
+
"logps/rejected": -116.62909698486328,
|
13086 |
+
"loss": 0.4495,
|
13087 |
+
"rewards/accuracies": 0.78125,
|
13088 |
+
"rewards/chosen": -0.078713558614254,
|
13089 |
+
"rewards/margins": 0.6798267364501953,
|
13090 |
+
"rewards/rejected": -0.7585403323173523,
|
13091 |
+
"step": 1708
|
13092 |
+
},
|
13093 |
+
{
|
13094 |
+
"epoch": 1.9499679281590763,
|
13095 |
+
"grad_norm": 59.266256355757655,
|
13096 |
+
"learning_rate": 3.187992969249875e-10,
|
13097 |
+
"logits/chosen": -1.2389315366744995,
|
13098 |
+
"logits/rejected": -1.2326477766036987,
|
13099 |
+
"logps/chosen": -127.89494323730469,
|
13100 |
+
"logps/rejected": -154.4901580810547,
|
13101 |
+
"loss": 0.4366,
|
13102 |
+
"rewards/accuracies": 0.78125,
|
13103 |
+
"rewards/chosen": -0.4514698088169098,
|
13104 |
+
"rewards/margins": 0.6477700471878052,
|
13105 |
+
"rewards/rejected": -1.0992399454116821,
|
13106 |
+
"step": 1710
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 1.9522485924025372,
|
13110 |
+
"grad_norm": 62.77597760270175,
|
13111 |
+
"learning_rate": 2.8917465343047954e-10,
|
13112 |
+
"logits/chosen": -1.3058414459228516,
|
13113 |
+
"logits/rejected": -1.374163269996643,
|
13114 |
+
"logps/chosen": -171.95448303222656,
|
13115 |
+
"logps/rejected": -207.97914123535156,
|
13116 |
+
"loss": 0.3709,
|
13117 |
+
"rewards/accuracies": 0.84375,
|
13118 |
+
"rewards/chosen": -0.32011568546295166,
|
13119 |
+
"rewards/margins": 1.2897684574127197,
|
13120 |
+
"rewards/rejected": -1.6098840236663818,
|
13121 |
+
"step": 1712
|
13122 |
+
},
|
13123 |
+
{
|
13124 |
+
"epoch": 1.9545292566459982,
|
13125 |
+
"grad_norm": 56.41697287299428,
|
13126 |
+
"learning_rate": 2.609923957363702e-10,
|
13127 |
+
"logits/chosen": -1.2662739753723145,
|
13128 |
+
"logits/rejected": -1.3142364025115967,
|
13129 |
+
"logps/chosen": -149.61746215820312,
|
13130 |
+
"logps/rejected": -184.44931030273438,
|
13131 |
+
"loss": 0.4197,
|
13132 |
+
"rewards/accuracies": 0.84375,
|
13133 |
+
"rewards/chosen": -0.28848132491111755,
|
13134 |
+
"rewards/margins": 1.1091067790985107,
|
13135 |
+
"rewards/rejected": -1.3975881338119507,
|
13136 |
+
"step": 1714
|
13137 |
+
},
|
13138 |
+
{
|
13139 |
+
"epoch": 1.9568099208894592,
|
13140 |
+
"grad_norm": 58.90785637715505,
|
13141 |
+
"learning_rate": 2.3425293151845273e-10,
|
13142 |
+
"logits/chosen": -1.2464115619659424,
|
13143 |
+
"logits/rejected": -1.2890043258666992,
|
13144 |
+
"logps/chosen": -146.8938446044922,
|
13145 |
+
"logps/rejected": -162.07830810546875,
|
13146 |
+
"loss": 0.4128,
|
13147 |
+
"rewards/accuracies": 0.8125,
|
13148 |
+
"rewards/chosen": -0.2892056405544281,
|
13149 |
+
"rewards/margins": 0.7288376092910767,
|
13150 |
+
"rewards/rejected": -1.0180431604385376,
|
13151 |
+
"step": 1716
|
13152 |
+
},
|
13153 |
+
{
|
13154 |
+
"epoch": 1.9590905851329201,
|
13155 |
+
"grad_norm": 66.05988715134325,
|
13156 |
+
"learning_rate": 2.0895664758151521e-10,
|
13157 |
+
"logits/chosen": -1.1935899257659912,
|
13158 |
+
"logits/rejected": -1.254826545715332,
|
13159 |
+
"logps/chosen": -194.64492797851562,
|
13160 |
+
"logps/rejected": -225.4056854248047,
|
13161 |
+
"loss": 0.4007,
|
13162 |
+
"rewards/accuracies": 0.8125,
|
13163 |
+
"rewards/chosen": -0.525715172290802,
|
13164 |
+
"rewards/margins": 1.1838706731796265,
|
13165 |
+
"rewards/rejected": -1.7095859050750732,
|
13166 |
+
"step": 1718
|
13167 |
+
},
|
13168 |
+
{
|
13169 |
+
"epoch": 1.9613712493763809,
|
13170 |
+
"grad_norm": 64.18176403648182,
|
13171 |
+
"learning_rate": 1.8510390985371216e-10,
|
13172 |
+
"logits/chosen": -1.1969249248504639,
|
13173 |
+
"logits/rejected": -1.2043269872665405,
|
13174 |
+
"logps/chosen": -201.9025115966797,
|
13175 |
+
"logps/rejected": -233.6451873779297,
|
13176 |
+
"loss": 0.4386,
|
13177 |
+
"rewards/accuracies": 0.84375,
|
13178 |
+
"rewards/chosen": -0.5428147315979004,
|
13179 |
+
"rewards/margins": 0.8293758630752563,
|
13180 |
+
"rewards/rejected": -1.3721905946731567,
|
13181 |
+
"step": 1720
|
13182 |
+
},
|
13183 |
+
{
|
13184 |
+
"epoch": 1.9636519136198418,
|
13185 |
+
"grad_norm": 58.86675845104934,
|
13186 |
+
"learning_rate": 1.626950633813351e-10,
|
13187 |
+
"logits/chosen": -1.2230490446090698,
|
13188 |
+
"logits/rejected": -1.2853928804397583,
|
13189 |
+
"logps/chosen": -160.97621154785156,
|
13190 |
+
"logps/rejected": -223.6080780029297,
|
13191 |
+
"loss": 0.4377,
|
13192 |
+
"rewards/accuracies": 0.8125,
|
13193 |
+
"rewards/chosen": -0.3992640972137451,
|
13194 |
+
"rewards/margins": 1.2427630424499512,
|
13195 |
+
"rewards/rejected": -1.6420272588729858,
|
13196 |
+
"step": 1722
|
13197 |
+
},
|
13198 |
+
{
|
13199 |
+
"epoch": 1.9659325778633026,
|
13200 |
+
"grad_norm": 63.84847943868757,
|
13201 |
+
"learning_rate": 1.4173043232380554e-10,
|
13202 |
+
"logits/chosen": -1.1713950634002686,
|
13203 |
+
"logits/rejected": -1.2190814018249512,
|
13204 |
+
"logps/chosen": -160.047119140625,
|
13205 |
+
"logps/rejected": -184.2767791748047,
|
13206 |
+
"loss": 0.4372,
|
13207 |
+
"rewards/accuracies": 0.84375,
|
13208 |
+
"rewards/chosen": -0.4601638913154602,
|
13209 |
+
"rewards/margins": 0.9584896564483643,
|
13210 |
+
"rewards/rejected": -1.4186536073684692,
|
13211 |
+
"step": 1724
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 1.9682132421067635,
|
13215 |
+
"grad_norm": 64.3589769132475,
|
13216 |
+
"learning_rate": 1.222103199489455e-10,
|
13217 |
+
"logits/chosen": -1.3236342668533325,
|
13218 |
+
"logits/rejected": -1.3482894897460938,
|
13219 |
+
"logps/chosen": -182.65269470214844,
|
13220 |
+
"logps/rejected": -217.88861083984375,
|
13221 |
+
"loss": 0.4358,
|
13222 |
+
"rewards/accuracies": 0.875,
|
13223 |
+
"rewards/chosen": -0.43886712193489075,
|
13224 |
+
"rewards/margins": 1.474854588508606,
|
13225 |
+
"rewards/rejected": -1.9137215614318848,
|
13226 |
+
"step": 1726
|
13227 |
+
},
|
13228 |
+
{
|
13229 |
+
"epoch": 1.9704939063502245,
|
13230 |
+
"grad_norm": 71.97812711207425,
|
13231 |
+
"learning_rate": 1.0413500862864743e-10,
|
13232 |
+
"logits/chosen": -1.377000093460083,
|
13233 |
+
"logits/rejected": -1.4007121324539185,
|
13234 |
+
"logps/chosen": -237.48316955566406,
|
13235 |
+
"logps/rejected": -259.9788818359375,
|
13236 |
+
"loss": 0.4143,
|
13237 |
+
"rewards/accuracies": 0.90625,
|
13238 |
+
"rewards/chosen": -0.6071898937225342,
|
13239 |
+
"rewards/margins": 1.190570592880249,
|
13240 |
+
"rewards/rejected": -1.7977604866027832,
|
13241 |
+
"step": 1728
|
13242 |
+
},
|
13243 |
+
{
|
13244 |
+
"epoch": 1.9727745705936854,
|
13245 |
+
"grad_norm": 61.7108383512953,
|
13246 |
+
"learning_rate": 8.750475983472227e-11,
|
13247 |
+
"logits/chosen": -1.2676740884780884,
|
13248 |
+
"logits/rejected": -1.2757924795150757,
|
13249 |
+
"logps/chosen": -190.92140197753906,
|
13250 |
+
"logps/rejected": -197.59115600585938,
|
13251 |
+
"loss": 0.4226,
|
13252 |
+
"rewards/accuracies": 0.90625,
|
13253 |
+
"rewards/chosen": -0.5921288132667542,
|
13254 |
+
"rewards/margins": 1.0576057434082031,
|
13255 |
+
"rewards/rejected": -1.6497344970703125,
|
13256 |
+
"step": 1730
|
13257 |
+
},
|
13258 |
+
{
|
13259 |
+
"epoch": 1.9750552348371464,
|
13260 |
+
"grad_norm": 188.64844039216806,
|
13261 |
+
"learning_rate": 7.231981413520217e-11,
|
13262 |
+
"logits/chosen": -1.263267993927002,
|
13263 |
+
"logits/rejected": -1.2889572381973267,
|
13264 |
+
"logps/chosen": -157.240478515625,
|
13265 |
+
"logps/rejected": -197.0875701904297,
|
13266 |
+
"loss": 0.4582,
|
13267 |
+
"rewards/accuracies": 0.875,
|
13268 |
+
"rewards/chosen": -0.24919547140598297,
|
13269 |
+
"rewards/margins": 0.7916581630706787,
|
13270 |
+
"rewards/rejected": -1.0408536195755005,
|
13271 |
+
"step": 1732
|
13272 |
+
},
|
13273 |
+
{
|
13274 |
+
"epoch": 1.9773358990806074,
|
13275 |
+
"grad_norm": 65.4504778217422,
|
13276 |
+
"learning_rate": 5.858039119077673e-11,
|
13277 |
+
"logits/chosen": -1.2087372541427612,
|
13278 |
+
"logits/rejected": -1.2781308889389038,
|
13279 |
+
"logps/chosen": -176.43057250976562,
|
13280 |
+
"logps/rejected": -216.8165740966797,
|
13281 |
+
"loss": 0.4563,
|
13282 |
+
"rewards/accuracies": 0.84375,
|
13283 |
+
"rewards/chosen": -0.41199278831481934,
|
13284 |
+
"rewards/margins": 0.922531008720398,
|
13285 |
+
"rewards/rejected": -1.3345237970352173,
|
13286 |
+
"step": 1734
|
13287 |
+
},
|
13288 |
+
{
|
13289 |
+
"epoch": 1.979616563324068,
|
13290 |
+
"grad_norm": 61.72345732533114,
|
13291 |
+
"learning_rate": 4.628668975166228e-11,
|
13292 |
+
"logits/chosen": -1.4069479703903198,
|
13293 |
+
"logits/rejected": -1.425885558128357,
|
13294 |
+
"logps/chosen": -155.35098266601562,
|
13295 |
+
"logps/rejected": -172.41546630859375,
|
13296 |
+
"loss": 0.3891,
|
13297 |
+
"rewards/accuracies": 0.84375,
|
13298 |
+
"rewards/chosen": -0.3574288785457611,
|
13299 |
+
"rewards/margins": 1.1354269981384277,
|
13300 |
+
"rewards/rejected": -1.4928559064865112,
|
13301 |
+
"step": 1736
|
13302 |
+
},
|
13303 |
+
{
|
13304 |
+
"epoch": 1.981897227567529,
|
13305 |
+
"grad_norm": 50.29362647543594,
|
13306 |
+
"learning_rate": 3.5438887654737346e-11,
|
13307 |
+
"logits/chosen": -1.1932240724563599,
|
13308 |
+
"logits/rejected": -1.2252675294876099,
|
13309 |
+
"logps/chosen": -104.49500274658203,
|
13310 |
+
"logps/rejected": -128.2619171142578,
|
13311 |
+
"loss": 0.3998,
|
13312 |
+
"rewards/accuracies": 0.78125,
|
13313 |
+
"rewards/chosen": -0.0990162342786789,
|
13314 |
+
"rewards/margins": 0.8109432458877563,
|
13315 |
+
"rewards/rejected": -0.9099595546722412,
|
13316 |
+
"step": 1738
|
13317 |
+
},
|
13318 |
+
{
|
13319 |
+
"epoch": 1.9841778918109898,
|
13320 |
+
"grad_norm": 63.67796104250234,
|
13321 |
+
"learning_rate": 2.603714182093375e-11,
|
13322 |
+
"logits/chosen": -1.2760851383209229,
|
13323 |
+
"logits/rejected": -1.3577792644500732,
|
13324 |
+
"logps/chosen": -172.22714233398438,
|
13325 |
+
"logps/rejected": -215.27749633789062,
|
13326 |
+
"loss": 0.4565,
|
13327 |
+
"rewards/accuracies": 0.84375,
|
13328 |
+
"rewards/chosen": -0.560702919960022,
|
13329 |
+
"rewards/margins": 1.0820647478103638,
|
13330 |
+
"rewards/rejected": -1.6427676677703857,
|
13331 |
+
"step": 1740
|
13332 |
+
},
|
13333 |
+
{
|
13334 |
+
"epoch": 1.9864585560544508,
|
13335 |
+
"grad_norm": 58.508228872135426,
|
13336 |
+
"learning_rate": 1.808158825297168e-11,
|
13337 |
+
"logits/chosen": -1.3208973407745361,
|
13338 |
+
"logits/rejected": -1.3905422687530518,
|
13339 |
+
"logps/chosen": -147.84127807617188,
|
13340 |
+
"logps/rejected": -178.2074737548828,
|
13341 |
+
"loss": 0.4758,
|
13342 |
+
"rewards/accuracies": 0.6875,
|
13343 |
+
"rewards/chosen": -0.48498719930648804,
|
13344 |
+
"rewards/margins": 0.888887882232666,
|
13345 |
+
"rewards/rejected": -1.3738751411437988,
|
13346 |
+
"step": 1742
|
13347 |
+
},
|
13348 |
+
{
|
13349 |
+
"epoch": 1.9887392202979117,
|
13350 |
+
"grad_norm": 58.05411220405763,
|
13351 |
+
"learning_rate": 1.1572342033416838e-11,
|
13352 |
+
"logits/chosen": -1.3570505380630493,
|
13353 |
+
"logits/rejected": -1.3865540027618408,
|
13354 |
+
"logps/chosen": -171.88287353515625,
|
13355 |
+
"logps/rejected": -210.6277618408203,
|
13356 |
+
"loss": 0.4492,
|
13357 |
+
"rewards/accuracies": 0.90625,
|
13358 |
+
"rewards/chosen": -0.28449732065200806,
|
13359 |
+
"rewards/margins": 1.1635254621505737,
|
13360 |
+
"rewards/rejected": -1.448022723197937,
|
13361 |
+
"step": 1744
|
13362 |
+
},
|
13363 |
+
{
|
13364 |
+
"epoch": 1.9910198845413727,
|
13365 |
+
"grad_norm": 60.292954042523704,
|
13366 |
+
"learning_rate": 6.50949732301509e-12,
|
13367 |
+
"logits/chosen": -1.2242615222930908,
|
13368 |
+
"logits/rejected": -1.3328139781951904,
|
13369 |
+
"logps/chosen": -164.70863342285156,
|
13370 |
+
"logps/rejected": -213.20814514160156,
|
13371 |
+
"loss": 0.3912,
|
13372 |
+
"rewards/accuracies": 0.78125,
|
13373 |
+
"rewards/chosen": -0.497256338596344,
|
13374 |
+
"rewards/margins": 1.1804652214050293,
|
13375 |
+
"rewards/rejected": -1.677721619606018,
|
13376 |
+
"step": 1746
|
13377 |
+
},
|
13378 |
+
{
|
13379 |
+
"epoch": 1.9933005487848336,
|
13380 |
+
"grad_norm": 68.33201893652561,
|
13381 |
+
"learning_rate": 2.893127359282488e-12,
|
13382 |
+
"logits/chosen": -1.2822688817977905,
|
13383 |
+
"logits/rejected": -1.3241004943847656,
|
13384 |
+
"logps/chosen": -207.72021484375,
|
13385 |
+
"logps/rejected": -262.7379455566406,
|
13386 |
+
"loss": 0.3881,
|
13387 |
+
"rewards/accuracies": 0.875,
|
13388 |
+
"rewards/chosen": -0.34897035360336304,
|
13389 |
+
"rewards/margins": 1.5757369995117188,
|
13390 |
+
"rewards/rejected": -1.924707293510437,
|
13391 |
+
"step": 1748
|
13392 |
+
},
|
13393 |
+
{
|
13394 |
+
"epoch": 1.9955812130282946,
|
13395 |
+
"grad_norm": 62.482127853658426,
|
13396 |
+
"learning_rate": 7.232844555282725e-13,
|
13397 |
+
"logits/chosen": -1.3659021854400635,
|
13398 |
+
"logits/rejected": -1.3771145343780518,
|
13399 |
+
"logps/chosen": -201.26531982421875,
|
13400 |
+
"logps/rejected": -219.556640625,
|
13401 |
+
"loss": 0.3859,
|
13402 |
+
"rewards/accuracies": 0.875,
|
13403 |
+
"rewards/chosen": -0.7029599547386169,
|
13404 |
+
"rewards/margins": 1.120865821838379,
|
13405 |
+
"rewards/rejected": -1.823825716972351,
|
13406 |
+
"step": 1750
|
13407 |
+
},
|
13408 |
+
{
|
13409 |
+
"epoch": 1.9978618772717553,
|
13410 |
+
"grad_norm": 67.65957613657208,
|
13411 |
+
"learning_rate": 0.0,
|
13412 |
+
"logits/chosen": -1.171149492263794,
|
13413 |
+
"logits/rejected": -1.2374571561813354,
|
13414 |
+
"logps/chosen": -129.96214294433594,
|
13415 |
+
"logps/rejected": -161.25244140625,
|
13416 |
+
"loss": 0.4188,
|
13417 |
+
"rewards/accuracies": 0.90625,
|
13418 |
+
"rewards/chosen": -0.34549519419670105,
|
13419 |
+
"rewards/margins": 1.0673820972442627,
|
13420 |
+
"rewards/rejected": -1.4128773212432861,
|
13421 |
+
"step": 1752
|
13422 |
}
|
13423 |
],
|
13424 |
"logging_steps": 2,
|
|
|
13433 |
"should_evaluate": false,
|
13434 |
"should_log": false,
|
13435 |
"should_save": true,
|
13436 |
+
"should_training_stop": true
|
13437 |
},
|
13438 |
"attributes": {}
|
13439 |
}
|