Training in progress, step 1726, checkpoint
Browse files- last-checkpoint/global_step1726/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1726/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1726/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1726/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1726/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1726/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1726/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1726/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1730 -3
last-checkpoint/global_step1726/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab8153b1f364ce8143dd8ca6a91c02d9b732a7f246e193af8418363fb39c323e
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1726/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c33f67b21fac802a7b9f40879e4213877fdadc621faf76516815fbfb899b0182
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1726/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e7ad1f2e6b5d52de51c9142c253e4504b14cb96ba450e1d677982e176eb40e6
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1726/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3d4e1f3390a9b43c817745dc7aacca53a7f480440f86642baacca43ef2b6cbf
|
3 |
+
size 24090788996
|
last-checkpoint/global_step1726/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db3e44efac1a79db42ada5e189c9066027261b7fb0012fcb570344c3f83140e7
|
3 |
+
size 150693
|
last-checkpoint/global_step1726/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0503a2119d3a7637d504d6bee881b7a97f77742d4a2e4dcc7a27a1dd1b027a5
|
3 |
+
size 150693
|
last-checkpoint/global_step1726/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:361b7e8e42f99ecbc4792177905ee85e8dffc028b9a1b65f86bf348be14d0039
|
3 |
+
size 150693
|
last-checkpoint/global_step1726/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd50bc52fa7d9aa5ccff56846315efd88d3dbf255e3f23d479db9461ebdfd302
|
3 |
+
size 150693
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1726
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d06d89588d2c5a6b7c30a35a96b4705ea4a256222a1999601f01382cead91d16
|
3 |
size 4976698672
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:195f937f5574012744471b0d6769d312a0af52820de2cb76093f28e08f193898
|
3 |
size 4999802720
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:590d0c8a9ae18db231102e3d95cf9330b9873b37d3e0d44bb2af348b5368d23d
|
3 |
size 4915916176
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b71c6b037e8207917d33c1313e1bd133b2aed3cfcf9087dc27c9683c5ff38c99
|
3 |
size 1168138808
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8f22ced19e790cc864cefe3b7c711d9ae631c44f95d42fb4829688cc3de0153
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e0407513eba77d34cbf3adf0e59a58bd80716f4f00f414854253637e82be43d
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6060636c023258ce9b965e244b8a58b4c99d5784dde4405b39737550ef50cd4f
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c24ccdfdcde39cb2265c82c50c36ffdfcc670f757aba4bcf4bb0fdc6d1373c4c
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b56a0f1c3322e3323ab0de90511453e2d705194cbbc2d4c04dd46c593fd07065
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11497,6 +11497,1733 @@
|
|
11497 |
"eval_samples_per_second": 4.338,
|
11498 |
"eval_steps_per_second": 1.085,
|
11499 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11500 |
}
|
11501 |
],
|
11502 |
"logging_steps": 2,
|
@@ -11511,7 +13238,7 @@
|
|
11511 |
"should_evaluate": false,
|
11512 |
"should_log": false,
|
11513 |
"should_save": true,
|
11514 |
-
"should_training_stop":
|
11515 |
},
|
11516 |
"attributes": {}
|
11517 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9989866087585957,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1726,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11497 |
"eval_samples_per_second": 4.338,
|
11498 |
"eval_steps_per_second": 1.085,
|
11499 |
"step": 1500
|
11500 |
+
},
|
11501 |
+
{
|
11502 |
+
"epoch": 1.7395584509591024,
|
11503 |
+
"grad_norm": 55.81693817141226,
|
11504 |
+
"learning_rate": 9.220091139554887e-09,
|
11505 |
+
"logits/chosen": -1.1932331323623657,
|
11506 |
+
"logits/rejected": -1.1756948232650757,
|
11507 |
+
"logps/chosen": -103.76750183105469,
|
11508 |
+
"logps/rejected": -129.5191650390625,
|
11509 |
+
"loss": 0.4117,
|
11510 |
+
"rewards/accuracies": 0.8125,
|
11511 |
+
"rewards/chosen": -0.2089885175228119,
|
11512 |
+
"rewards/margins": 0.9426325559616089,
|
11513 |
+
"rewards/rejected": -1.1516211032867432,
|
11514 |
+
"step": 1502
|
11515 |
+
},
|
11516 |
+
{
|
11517 |
+
"epoch": 1.741874773796598,
|
11518 |
+
"grad_norm": 62.56378010964504,
|
11519 |
+
"learning_rate": 9.05870280610117e-09,
|
11520 |
+
"logits/chosen": -1.2296499013900757,
|
11521 |
+
"logits/rejected": -1.1992714405059814,
|
11522 |
+
"logps/chosen": -123.05607604980469,
|
11523 |
+
"logps/rejected": -130.20932006835938,
|
11524 |
+
"loss": 0.4251,
|
11525 |
+
"rewards/accuracies": 0.8125,
|
11526 |
+
"rewards/chosen": -0.41981515288352966,
|
11527 |
+
"rewards/margins": 0.8822442293167114,
|
11528 |
+
"rewards/rejected": -1.3020594120025635,
|
11529 |
+
"step": 1504
|
11530 |
+
},
|
11531 |
+
{
|
11532 |
+
"epoch": 1.7441910966340934,
|
11533 |
+
"grad_norm": 81.02596584931305,
|
11534 |
+
"learning_rate": 8.898672408511553e-09,
|
11535 |
+
"logits/chosen": -1.2401373386383057,
|
11536 |
+
"logits/rejected": -1.17184317111969,
|
11537 |
+
"logps/chosen": -168.21986389160156,
|
11538 |
+
"logps/rejected": -162.71383666992188,
|
11539 |
+
"loss": 0.4402,
|
11540 |
+
"rewards/accuracies": 0.78125,
|
11541 |
+
"rewards/chosen": -0.5058491230010986,
|
11542 |
+
"rewards/margins": 1.3156105279922485,
|
11543 |
+
"rewards/rejected": -1.8214595317840576,
|
11544 |
+
"step": 1506
|
11545 |
+
},
|
11546 |
+
{
|
11547 |
+
"epoch": 1.7465074194715888,
|
11548 |
+
"grad_norm": 56.282435640432055,
|
11549 |
+
"learning_rate": 8.740002336360686e-09,
|
11550 |
+
"logits/chosen": -1.351073980331421,
|
11551 |
+
"logits/rejected": -1.4128607511520386,
|
11552 |
+
"logps/chosen": -152.2001190185547,
|
11553 |
+
"logps/rejected": -191.74932861328125,
|
11554 |
+
"loss": 0.4226,
|
11555 |
+
"rewards/accuracies": 0.90625,
|
11556 |
+
"rewards/chosen": -0.2529861629009247,
|
11557 |
+
"rewards/margins": 1.520397424697876,
|
11558 |
+
"rewards/rejected": -1.773383617401123,
|
11559 |
+
"step": 1508
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 1.7488237423090842,
|
11563 |
+
"grad_norm": 49.81866700382731,
|
11564 |
+
"learning_rate": 8.582694958910807e-09,
|
11565 |
+
"logits/chosen": -1.2254369258880615,
|
11566 |
+
"logits/rejected": -1.2749468088150024,
|
11567 |
+
"logps/chosen": -182.1556854248047,
|
11568 |
+
"logps/rejected": -216.18411254882812,
|
11569 |
+
"loss": 0.3902,
|
11570 |
+
"rewards/accuracies": 0.84375,
|
11571 |
+
"rewards/chosen": -0.6539211869239807,
|
11572 |
+
"rewards/margins": 1.5710985660552979,
|
11573 |
+
"rewards/rejected": -2.225019693374634,
|
11574 |
+
"step": 1510
|
11575 |
+
},
|
11576 |
+
{
|
11577 |
+
"epoch": 1.7511400651465798,
|
11578 |
+
"grad_norm": 52.34711654194476,
|
11579 |
+
"learning_rate": 8.426752625076373e-09,
|
11580 |
+
"logits/chosen": -1.2552261352539062,
|
11581 |
+
"logits/rejected": -1.3420953750610352,
|
11582 |
+
"logps/chosen": -186.13934326171875,
|
11583 |
+
"logps/rejected": -285.68804931640625,
|
11584 |
+
"loss": 0.3499,
|
11585 |
+
"rewards/accuracies": 0.9375,
|
11586 |
+
"rewards/chosen": -0.5289927124977112,
|
11587 |
+
"rewards/margins": 3.851708173751831,
|
11588 |
+
"rewards/rejected": -4.380701065063477,
|
11589 |
+
"step": 1512
|
11590 |
+
},
|
11591 |
+
{
|
11592 |
+
"epoch": 1.7534563879840754,
|
11593 |
+
"grad_norm": 52.389521034020916,
|
11594 |
+
"learning_rate": 8.272177663389046e-09,
|
11595 |
+
"logits/chosen": -1.1967260837554932,
|
11596 |
+
"logits/rejected": -1.1967551708221436,
|
11597 |
+
"logps/chosen": -217.95095825195312,
|
11598 |
+
"logps/rejected": -236.05380249023438,
|
11599 |
+
"loss": 0.3782,
|
11600 |
+
"rewards/accuracies": 0.9375,
|
11601 |
+
"rewards/chosen": -0.3343973159790039,
|
11602 |
+
"rewards/margins": 1.9475483894348145,
|
11603 |
+
"rewards/rejected": -2.2819457054138184,
|
11604 |
+
"step": 1514
|
11605 |
+
},
|
11606 |
+
{
|
11607 |
+
"epoch": 1.7557727108215708,
|
11608 |
+
"grad_norm": 55.99063639865476,
|
11609 |
+
"learning_rate": 8.118972381962851e-09,
|
11610 |
+
"logits/chosen": -1.1716736555099487,
|
11611 |
+
"logits/rejected": -1.2387813329696655,
|
11612 |
+
"logps/chosen": -161.53382873535156,
|
11613 |
+
"logps/rejected": -189.86782836914062,
|
11614 |
+
"loss": 0.3571,
|
11615 |
+
"rewards/accuracies": 0.875,
|
11616 |
+
"rewards/chosen": -0.7027201652526855,
|
11617 |
+
"rewards/margins": 1.415562629699707,
|
11618 |
+
"rewards/rejected": -2.1182827949523926,
|
11619 |
+
"step": 1516
|
11620 |
+
},
|
11621 |
+
{
|
11622 |
+
"epoch": 1.7580890336590662,
|
11623 |
+
"grad_norm": 60.98846930904024,
|
11624 |
+
"learning_rate": 7.967139068459726e-09,
|
11625 |
+
"logits/chosen": -1.1493229866027832,
|
11626 |
+
"logits/rejected": -1.146936297416687,
|
11627 |
+
"logps/chosen": -128.60189819335938,
|
11628 |
+
"logps/rejected": -160.15321350097656,
|
11629 |
+
"loss": 0.3883,
|
11630 |
+
"rewards/accuracies": 0.90625,
|
11631 |
+
"rewards/chosen": -0.36255598068237305,
|
11632 |
+
"rewards/margins": 1.3770678043365479,
|
11633 |
+
"rewards/rejected": -1.7396236658096313,
|
11634 |
+
"step": 1518
|
11635 |
+
},
|
11636 |
+
{
|
11637 |
+
"epoch": 1.7604053564965616,
|
11638 |
+
"grad_norm": 176.68417193498476,
|
11639 |
+
"learning_rate": 7.81667999005543e-09,
|
11640 |
+
"logits/chosen": -1.3078656196594238,
|
11641 |
+
"logits/rejected": -1.3120546340942383,
|
11642 |
+
"logps/chosen": -179.33438110351562,
|
11643 |
+
"logps/rejected": -186.1118621826172,
|
11644 |
+
"loss": 0.4993,
|
11645 |
+
"rewards/accuracies": 0.78125,
|
11646 |
+
"rewards/chosen": -0.22156819701194763,
|
11647 |
+
"rewards/margins": 0.9435240030288696,
|
11648 |
+
"rewards/rejected": -1.16509211063385,
|
11649 |
+
"step": 1520
|
11650 |
+
},
|
11651 |
+
{
|
11652 |
+
"epoch": 1.7627216793340572,
|
11653 |
+
"grad_norm": 51.91509618352603,
|
11654 |
+
"learning_rate": 7.6675973934056e-09,
|
11655 |
+
"logits/chosen": -1.0760035514831543,
|
11656 |
+
"logits/rejected": -1.1664559841156006,
|
11657 |
+
"logps/chosen": -136.77081298828125,
|
11658 |
+
"logps/rejected": -175.25926208496094,
|
11659 |
+
"loss": 0.4397,
|
11660 |
+
"rewards/accuracies": 0.875,
|
11661 |
+
"rewards/chosen": -0.2903676927089691,
|
11662 |
+
"rewards/margins": 1.1137360334396362,
|
11663 |
+
"rewards/rejected": -1.4041036367416382,
|
11664 |
+
"step": 1522
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 1.7650380021715528,
|
11668 |
+
"grad_norm": 59.67977888278656,
|
11669 |
+
"learning_rate": 7.51989350461224e-09,
|
11670 |
+
"logits/chosen": -1.2789033651351929,
|
11671 |
+
"logits/rejected": -1.3050099611282349,
|
11672 |
+
"logps/chosen": -145.60400390625,
|
11673 |
+
"logps/rejected": -156.1199951171875,
|
11674 |
+
"loss": 0.4456,
|
11675 |
+
"rewards/accuracies": 0.84375,
|
11676 |
+
"rewards/chosen": -0.3298056125640869,
|
11677 |
+
"rewards/margins": 0.7980384826660156,
|
11678 |
+
"rewards/rejected": -1.1278440952301025,
|
11679 |
+
"step": 1524
|
11680 |
+
},
|
11681 |
+
{
|
11682 |
+
"epoch": 1.7673543250090482,
|
11683 |
+
"grad_norm": 50.021597145613505,
|
11684 |
+
"learning_rate": 7.373570529190498e-09,
|
11685 |
+
"logits/chosen": -1.2145639657974243,
|
11686 |
+
"logits/rejected": -1.2063783407211304,
|
11687 |
+
"logps/chosen": -162.4467315673828,
|
11688 |
+
"logps/rejected": -196.65577697753906,
|
11689 |
+
"loss": 0.3705,
|
11690 |
+
"rewards/accuracies": 0.75,
|
11691 |
+
"rewards/chosen": -0.2843925654888153,
|
11692 |
+
"rewards/margins": 1.6666440963745117,
|
11693 |
+
"rewards/rejected": -1.9510366916656494,
|
11694 |
+
"step": 1526
|
11695 |
+
},
|
11696 |
+
{
|
11697 |
+
"epoch": 1.7696706478465436,
|
11698 |
+
"grad_norm": 62.64810414938643,
|
11699 |
+
"learning_rate": 7.228630652035717e-09,
|
11700 |
+
"logits/chosen": -1.2518867254257202,
|
11701 |
+
"logits/rejected": -1.1622406244277954,
|
11702 |
+
"logps/chosen": -169.1246337890625,
|
11703 |
+
"logps/rejected": -189.02169799804688,
|
11704 |
+
"loss": 0.3845,
|
11705 |
+
"rewards/accuracies": 0.9375,
|
11706 |
+
"rewards/chosen": -0.23387570679187775,
|
11707 |
+
"rewards/margins": 1.9417215585708618,
|
11708 |
+
"rewards/rejected": -2.1755971908569336,
|
11709 |
+
"step": 1528
|
11710 |
+
},
|
11711 |
+
{
|
11712 |
+
"epoch": 1.771986970684039,
|
11713 |
+
"grad_norm": 56.92633456964183,
|
11714 |
+
"learning_rate": 7.08507603739078e-09,
|
11715 |
+
"logits/chosen": -1.2512166500091553,
|
11716 |
+
"logits/rejected": -1.2901430130004883,
|
11717 |
+
"logps/chosen": -162.3385772705078,
|
11718 |
+
"logps/rejected": -193.41940307617188,
|
11719 |
+
"loss": 0.4376,
|
11720 |
+
"rewards/accuracies": 0.875,
|
11721 |
+
"rewards/chosen": -0.40012550354003906,
|
11722 |
+
"rewards/margins": 1.2529363632202148,
|
11723 |
+
"rewards/rejected": -1.653061866760254,
|
11724 |
+
"step": 1530
|
11725 |
+
},
|
11726 |
+
{
|
11727 |
+
"epoch": 1.7743032935215346,
|
11728 |
+
"grad_norm": 76.55205063599432,
|
11729 |
+
"learning_rate": 6.942908828813876e-09,
|
11730 |
+
"logits/chosen": -1.323652982711792,
|
11731 |
+
"logits/rejected": -1.2494463920593262,
|
11732 |
+
"logps/chosen": -192.47853088378906,
|
11733 |
+
"logps/rejected": -190.863037109375,
|
11734 |
+
"loss": 0.3828,
|
11735 |
+
"rewards/accuracies": 0.78125,
|
11736 |
+
"rewards/chosen": -0.46822619438171387,
|
11737 |
+
"rewards/margins": 1.0589056015014648,
|
11738 |
+
"rewards/rejected": -1.5271317958831787,
|
11739 |
+
"step": 1532
|
11740 |
+
},
|
11741 |
+
{
|
11742 |
+
"epoch": 1.77661961635903,
|
11743 |
+
"grad_norm": 58.83800639947019,
|
11744 |
+
"learning_rate": 6.802131149146373e-09,
|
11745 |
+
"logits/chosen": -1.254701018333435,
|
11746 |
+
"logits/rejected": -1.2828840017318726,
|
11747 |
+
"logps/chosen": -144.4114990234375,
|
11748 |
+
"logps/rejected": -155.79037475585938,
|
11749 |
+
"loss": 0.374,
|
11750 |
+
"rewards/accuracies": 0.84375,
|
11751 |
+
"rewards/chosen": -0.3679081201553345,
|
11752 |
+
"rewards/margins": 1.2586240768432617,
|
11753 |
+
"rewards/rejected": -1.6265323162078857,
|
11754 |
+
"step": 1534
|
11755 |
+
},
|
11756 |
+
{
|
11757 |
+
"epoch": 1.7789359391965256,
|
11758 |
+
"grad_norm": 68.11680500501693,
|
11759 |
+
"learning_rate": 6.662745100481271e-09,
|
11760 |
+
"logits/chosen": -1.1942329406738281,
|
11761 |
+
"logits/rejected": -1.289471983909607,
|
11762 |
+
"logps/chosen": -103.48456573486328,
|
11763 |
+
"logps/rejected": -110.4488754272461,
|
11764 |
+
"loss": 0.3996,
|
11765 |
+
"rewards/accuracies": 0.71875,
|
11766 |
+
"rewards/chosen": -0.294292151927948,
|
11767 |
+
"rewards/margins": 0.796977162361145,
|
11768 |
+
"rewards/rejected": -1.0912692546844482,
|
11769 |
+
"step": 1536
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 1.781252262034021,
|
11773 |
+
"grad_norm": 60.49404420207855,
|
11774 |
+
"learning_rate": 6.5247527641316465e-09,
|
11775 |
+
"logits/chosen": -1.2070562839508057,
|
11776 |
+
"logits/rejected": -1.2548003196716309,
|
11777 |
+
"logps/chosen": -156.2920379638672,
|
11778 |
+
"logps/rejected": -177.38890075683594,
|
11779 |
+
"loss": 0.4381,
|
11780 |
+
"rewards/accuracies": 0.90625,
|
11781 |
+
"rewards/chosen": -0.03995545208454132,
|
11782 |
+
"rewards/margins": 1.5764446258544922,
|
11783 |
+
"rewards/rejected": -1.616400122642517,
|
11784 |
+
"step": 1538
|
11785 |
+
},
|
11786 |
+
{
|
11787 |
+
"epoch": 1.7835685848715164,
|
11788 |
+
"grad_norm": 58.839873216052546,
|
11789 |
+
"learning_rate": 6.388156200599726e-09,
|
11790 |
+
"logits/chosen": -1.1764907836914062,
|
11791 |
+
"logits/rejected": -1.2363911867141724,
|
11792 |
+
"logps/chosen": -138.08792114257812,
|
11793 |
+
"logps/rejected": -156.8811492919922,
|
11794 |
+
"loss": 0.4437,
|
11795 |
+
"rewards/accuracies": 0.8125,
|
11796 |
+
"rewards/chosen": -0.5578911304473877,
|
11797 |
+
"rewards/margins": 0.9130861163139343,
|
11798 |
+
"rewards/rejected": -1.4709770679473877,
|
11799 |
+
"step": 1540
|
11800 |
+
},
|
11801 |
+
{
|
11802 |
+
"epoch": 1.785884907709012,
|
11803 |
+
"grad_norm": 54.830012236677426,
|
11804 |
+
"learning_rate": 6.2529574495459815e-09,
|
11805 |
+
"logits/chosen": -1.2464194297790527,
|
11806 |
+
"logits/rejected": -1.2431282997131348,
|
11807 |
+
"logps/chosen": -146.96051025390625,
|
11808 |
+
"logps/rejected": -175.07481384277344,
|
11809 |
+
"loss": 0.386,
|
11810 |
+
"rewards/accuracies": 0.8125,
|
11811 |
+
"rewards/chosen": -0.2085748016834259,
|
11812 |
+
"rewards/margins": 1.4820951223373413,
|
11813 |
+
"rewards/rejected": -1.6906698942184448,
|
11814 |
+
"step": 1542
|
11815 |
+
},
|
11816 |
+
{
|
11817 |
+
"epoch": 1.7882012305465074,
|
11818 |
+
"grad_norm": 53.964612199236846,
|
11819 |
+
"learning_rate": 6.119158529758817e-09,
|
11820 |
+
"logits/chosen": -1.2010880708694458,
|
11821 |
+
"logits/rejected": -1.252152681350708,
|
11822 |
+
"logps/chosen": -125.40618896484375,
|
11823 |
+
"logps/rejected": -147.0822296142578,
|
11824 |
+
"loss": 0.4294,
|
11825 |
+
"rewards/accuracies": 0.84375,
|
11826 |
+
"rewards/chosen": -0.6274422407150269,
|
11827 |
+
"rewards/margins": 1.2626943588256836,
|
11828 |
+
"rewards/rejected": -1.8901365995407104,
|
11829 |
+
"step": 1544
|
11830 |
+
},
|
11831 |
+
{
|
11832 |
+
"epoch": 1.790517553384003,
|
11833 |
+
"grad_norm": 61.93429459296764,
|
11834 |
+
"learning_rate": 5.986761439124288e-09,
|
11835 |
+
"logits/chosen": -1.0499889850616455,
|
11836 |
+
"logits/rejected": -1.0637288093566895,
|
11837 |
+
"logps/chosen": -145.4034881591797,
|
11838 |
+
"logps/rejected": -170.8926239013672,
|
11839 |
+
"loss": 0.4097,
|
11840 |
+
"rewards/accuracies": 0.90625,
|
11841 |
+
"rewards/chosen": -1.0535945892333984,
|
11842 |
+
"rewards/margins": 1.4541335105895996,
|
11843 |
+
"rewards/rejected": -2.507727861404419,
|
11844 |
+
"step": 1546
|
11845 |
+
},
|
11846 |
+
{
|
11847 |
+
"epoch": 1.7928338762214984,
|
11848 |
+
"grad_norm": 63.39263653013202,
|
11849 |
+
"learning_rate": 5.855768154596363e-09,
|
11850 |
+
"logits/chosen": -1.2247127294540405,
|
11851 |
+
"logits/rejected": -1.287811040878296,
|
11852 |
+
"logps/chosen": -128.2520751953125,
|
11853 |
+
"logps/rejected": -145.6575927734375,
|
11854 |
+
"loss": 0.4182,
|
11855 |
+
"rewards/accuracies": 0.8125,
|
11856 |
+
"rewards/chosen": -0.20227603614330292,
|
11857 |
+
"rewards/margins": 0.8022910356521606,
|
11858 |
+
"rewards/rejected": -1.0045669078826904,
|
11859 |
+
"step": 1548
|
11860 |
+
},
|
11861 |
+
{
|
11862 |
+
"epoch": 1.7951501990589938,
|
11863 |
+
"grad_norm": 53.1979380263347,
|
11864 |
+
"learning_rate": 5.726180632167354e-09,
|
11865 |
+
"logits/chosen": -1.2052092552185059,
|
11866 |
+
"logits/rejected": -1.2159252166748047,
|
11867 |
+
"logps/chosen": -151.31918334960938,
|
11868 |
+
"logps/rejected": -184.94479370117188,
|
11869 |
+
"loss": 0.4075,
|
11870 |
+
"rewards/accuracies": 0.8125,
|
11871 |
+
"rewards/chosen": -0.06491108983755112,
|
11872 |
+
"rewards/margins": 1.7111616134643555,
|
11873 |
+
"rewards/rejected": -1.7760728597640991,
|
11874 |
+
"step": 1550
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 1.7974665218964894,
|
11878 |
+
"grad_norm": 53.36509041975899,
|
11879 |
+
"learning_rate": 5.5980008068387655e-09,
|
11880 |
+
"logits/chosen": -1.192318081855774,
|
11881 |
+
"logits/rejected": -1.2422665357589722,
|
11882 |
+
"logps/chosen": -158.2563934326172,
|
11883 |
+
"logps/rejected": -208.37709045410156,
|
11884 |
+
"loss": 0.3783,
|
11885 |
+
"rewards/accuracies": 0.8125,
|
11886 |
+
"rewards/chosen": -0.3992021381855011,
|
11887 |
+
"rewards/margins": 1.9673078060150146,
|
11888 |
+
"rewards/rejected": -2.3665099143981934,
|
11889 |
+
"step": 1552
|
11890 |
+
},
|
11891 |
+
{
|
11892 |
+
"epoch": 1.7997828447339848,
|
11893 |
+
"grad_norm": 57.43435085106451,
|
11894 |
+
"learning_rate": 5.471230592592313e-09,
|
11895 |
+
"logits/chosen": -1.2281129360198975,
|
11896 |
+
"logits/rejected": -1.1943424940109253,
|
11897 |
+
"logps/chosen": -132.93118286132812,
|
11898 |
+
"logps/rejected": -142.4124755859375,
|
11899 |
+
"loss": 0.3999,
|
11900 |
+
"rewards/accuracies": 0.84375,
|
11901 |
+
"rewards/chosen": -0.2500740587711334,
|
11902 |
+
"rewards/margins": 0.9634323120117188,
|
11903 |
+
"rewards/rejected": -1.2135063409805298,
|
11904 |
+
"step": 1554
|
11905 |
+
},
|
11906 |
+
{
|
11907 |
+
"epoch": 1.8020991675714804,
|
11908 |
+
"grad_norm": 116.95703791110742,
|
11909 |
+
"learning_rate": 5.345871882361397e-09,
|
11910 |
+
"logits/chosen": -1.222663402557373,
|
11911 |
+
"logits/rejected": -1.2307226657867432,
|
11912 |
+
"logps/chosen": -195.47381591796875,
|
11913 |
+
"logps/rejected": -213.84588623046875,
|
11914 |
+
"loss": 0.5455,
|
11915 |
+
"rewards/accuracies": 0.71875,
|
11916 |
+
"rewards/chosen": -1.06570303440094,
|
11917 |
+
"rewards/margins": 1.080770492553711,
|
11918 |
+
"rewards/rejected": -2.1464734077453613,
|
11919 |
+
"step": 1556
|
11920 |
+
},
|
11921 |
+
{
|
11922 |
+
"epoch": 1.8044154904089758,
|
11923 |
+
"grad_norm": 54.00119490171407,
|
11924 |
+
"learning_rate": 5.221926548002875e-09,
|
11925 |
+
"logits/chosen": -1.1924062967300415,
|
11926 |
+
"logits/rejected": -1.269582748413086,
|
11927 |
+
"logps/chosen": -165.26943969726562,
|
11928 |
+
"logps/rejected": -179.38568115234375,
|
11929 |
+
"loss": 0.4258,
|
11930 |
+
"rewards/accuracies": 0.78125,
|
11931 |
+
"rewards/chosen": 0.21030552685260773,
|
11932 |
+
"rewards/margins": 1.1077656745910645,
|
11933 |
+
"rewards/rejected": -0.8974601030349731,
|
11934 |
+
"step": 1558
|
11935 |
+
},
|
11936 |
+
{
|
11937 |
+
"epoch": 1.8067318132464711,
|
11938 |
+
"grad_norm": 47.637201993987425,
|
11939 |
+
"learning_rate": 5.099396440269033e-09,
|
11940 |
+
"logits/chosen": -1.1668461561203003,
|
11941 |
+
"logits/rejected": -1.1675832271575928,
|
11942 |
+
"logps/chosen": -132.747314453125,
|
11943 |
+
"logps/rejected": -197.4693603515625,
|
11944 |
+
"loss": 0.3714,
|
11945 |
+
"rewards/accuracies": 0.96875,
|
11946 |
+
"rewards/chosen": -0.2991000711917877,
|
11947 |
+
"rewards/margins": 2.7344629764556885,
|
11948 |
+
"rewards/rejected": -3.0335628986358643,
|
11949 |
+
"step": 1560
|
11950 |
+
},
|
11951 |
+
{
|
11952 |
+
"epoch": 1.8090481360839668,
|
11953 |
+
"grad_norm": 73.3924537450436,
|
11954 |
+
"learning_rate": 4.978283388780002e-09,
|
11955 |
+
"logits/chosen": -1.2106759548187256,
|
11956 |
+
"logits/rejected": -1.3471499681472778,
|
11957 |
+
"logps/chosen": -172.1467742919922,
|
11958 |
+
"logps/rejected": -206.36143493652344,
|
11959 |
+
"loss": 0.3895,
|
11960 |
+
"rewards/accuracies": 0.8125,
|
11961 |
+
"rewards/chosen": -0.703016996383667,
|
11962 |
+
"rewards/margins": 1.2060117721557617,
|
11963 |
+
"rewards/rejected": -1.9090288877487183,
|
11964 |
+
"step": 1562
|
11965 |
+
},
|
11966 |
+
{
|
11967 |
+
"epoch": 1.8113644589214621,
|
11968 |
+
"grad_norm": 90.51012356320436,
|
11969 |
+
"learning_rate": 4.858589201996432e-09,
|
11970 |
+
"logits/chosen": -1.0378146171569824,
|
11971 |
+
"logits/rejected": -1.1732603311538696,
|
11972 |
+
"logps/chosen": -141.2643280029297,
|
11973 |
+
"logps/rejected": -164.4271697998047,
|
11974 |
+
"loss": 0.4848,
|
11975 |
+
"rewards/accuracies": 0.71875,
|
11976 |
+
"rewards/chosen": -0.8795535564422607,
|
11977 |
+
"rewards/margins": 0.9949630498886108,
|
11978 |
+
"rewards/rejected": -1.874516487121582,
|
11979 |
+
"step": 1564
|
11980 |
+
},
|
11981 |
+
{
|
11982 |
+
"epoch": 1.8136807817589577,
|
11983 |
+
"grad_norm": 56.01168000132923,
|
11984 |
+
"learning_rate": 4.740315667192441e-09,
|
11985 |
+
"logits/chosen": -1.1176464557647705,
|
11986 |
+
"logits/rejected": -1.2511212825775146,
|
11987 |
+
"logps/chosen": -103.09518432617188,
|
11988 |
+
"logps/rejected": -135.39122009277344,
|
11989 |
+
"loss": 0.4393,
|
11990 |
+
"rewards/accuracies": 0.78125,
|
11991 |
+
"rewards/chosen": -0.2941249907016754,
|
11992 |
+
"rewards/margins": 0.9569557905197144,
|
11993 |
+
"rewards/rejected": -1.2510807514190674,
|
11994 |
+
"step": 1566
|
11995 |
+
},
|
11996 |
+
{
|
11997 |
+
"epoch": 1.8159971045964531,
|
11998 |
+
"grad_norm": 48.16666589487942,
|
11999 |
+
"learning_rate": 4.623464550429002e-09,
|
12000 |
+
"logits/chosen": -1.102777361869812,
|
12001 |
+
"logits/rejected": -1.1394641399383545,
|
12002 |
+
"logps/chosen": -111.80138397216797,
|
12003 |
+
"logps/rejected": -145.2130126953125,
|
12004 |
+
"loss": 0.4561,
|
12005 |
+
"rewards/accuracies": 0.84375,
|
12006 |
+
"rewards/chosen": -0.7176414728164673,
|
12007 |
+
"rewards/margins": 1.2812902927398682,
|
12008 |
+
"rewards/rejected": -1.998931646347046,
|
12009 |
+
"step": 1568
|
12010 |
+
},
|
12011 |
+
{
|
12012 |
+
"epoch": 1.8183134274339485,
|
12013 |
+
"grad_norm": 50.97749085021057,
|
12014 |
+
"learning_rate": 4.508037596527525e-09,
|
12015 |
+
"logits/chosen": -1.1966917514801025,
|
12016 |
+
"logits/rejected": -1.2247413396835327,
|
12017 |
+
"logps/chosen": -114.48523712158203,
|
12018 |
+
"logps/rejected": -128.622802734375,
|
12019 |
+
"loss": 0.3797,
|
12020 |
+
"rewards/accuracies": 0.78125,
|
12021 |
+
"rewards/chosen": -0.19608543813228607,
|
12022 |
+
"rewards/margins": 0.9697508215904236,
|
12023 |
+
"rewards/rejected": -1.1658360958099365,
|
12024 |
+
"step": 1570
|
12025 |
+
},
|
12026 |
+
{
|
12027 |
+
"epoch": 1.8206297502714441,
|
12028 |
+
"grad_norm": 54.387837657286084,
|
12029 |
+
"learning_rate": 4.39403652904381e-09,
|
12030 |
+
"logits/chosen": -1.1147388219833374,
|
12031 |
+
"logits/rejected": -1.1594665050506592,
|
12032 |
+
"logps/chosen": -114.78770446777344,
|
12033 |
+
"logps/rejected": -155.918701171875,
|
12034 |
+
"loss": 0.3961,
|
12035 |
+
"rewards/accuracies": 0.9375,
|
12036 |
+
"rewards/chosen": -0.18746113777160645,
|
12037 |
+
"rewards/margins": 1.8321788311004639,
|
12038 |
+
"rewards/rejected": -2.0196399688720703,
|
12039 |
+
"step": 1572
|
12040 |
+
},
|
12041 |
+
{
|
12042 |
+
"epoch": 1.8229460731089395,
|
12043 |
+
"grad_norm": 62.271451803387365,
|
12044 |
+
"learning_rate": 4.2814630502422845e-09,
|
12045 |
+
"logits/chosen": -1.1847018003463745,
|
12046 |
+
"logits/rejected": -1.1410635709762573,
|
12047 |
+
"logps/chosen": -178.85458374023438,
|
12048 |
+
"logps/rejected": -211.72219848632812,
|
12049 |
+
"loss": 0.4279,
|
12050 |
+
"rewards/accuracies": 0.75,
|
12051 |
+
"rewards/chosen": -0.22784435749053955,
|
12052 |
+
"rewards/margins": 2.5258147716522217,
|
12053 |
+
"rewards/rejected": -2.7536590099334717,
|
12054 |
+
"step": 1574
|
12055 |
+
},
|
12056 |
+
{
|
12057 |
+
"epoch": 1.8252623959464351,
|
12058 |
+
"grad_norm": 57.174518848316346,
|
12059 |
+
"learning_rate": 4.170318841070708e-09,
|
12060 |
+
"logits/chosen": -1.120819330215454,
|
12061 |
+
"logits/rejected": -1.1634063720703125,
|
12062 |
+
"logps/chosen": -140.26319885253906,
|
12063 |
+
"logps/rejected": -207.86880493164062,
|
12064 |
+
"loss": 0.4442,
|
12065 |
+
"rewards/accuracies": 0.84375,
|
12066 |
+
"rewards/chosen": -0.25584009289741516,
|
12067 |
+
"rewards/margins": 1.6964097023010254,
|
12068 |
+
"rewards/rejected": -1.9522497653961182,
|
12069 |
+
"step": 1576
|
12070 |
+
},
|
12071 |
+
{
|
12072 |
+
"epoch": 1.8275787187839305,
|
12073 |
+
"grad_norm": 57.29425789262467,
|
12074 |
+
"learning_rate": 4.060605561134889e-09,
|
12075 |
+
"logits/chosen": -1.3027273416519165,
|
12076 |
+
"logits/rejected": -1.2673333883285522,
|
12077 |
+
"logps/chosen": -170.17152404785156,
|
12078 |
+
"logps/rejected": -188.33880615234375,
|
12079 |
+
"loss": 0.4311,
|
12080 |
+
"rewards/accuracies": 0.84375,
|
12081 |
+
"rewards/chosen": -0.42000892758369446,
|
12082 |
+
"rewards/margins": 1.3922333717346191,
|
12083 |
+
"rewards/rejected": -1.8122422695159912,
|
12084 |
+
"step": 1578
|
12085 |
+
},
|
12086 |
+
{
|
12087 |
+
"epoch": 1.829895041621426,
|
12088 |
+
"grad_norm": 65.96165236575662,
|
12089 |
+
"learning_rate": 3.952324848674004e-09,
|
12090 |
+
"logits/chosen": -1.1435868740081787,
|
12091 |
+
"logits/rejected": -1.2456907033920288,
|
12092 |
+
"logps/chosen": -118.90472412109375,
|
12093 |
+
"logps/rejected": -160.82818603515625,
|
12094 |
+
"loss": 0.3963,
|
12095 |
+
"rewards/accuracies": 0.875,
|
12096 |
+
"rewards/chosen": -0.47032859921455383,
|
12097 |
+
"rewards/margins": 1.457180142402649,
|
12098 |
+
"rewards/rejected": -1.9275087118148804,
|
12099 |
+
"step": 1580
|
12100 |
+
},
|
12101 |
+
{
|
12102 |
+
"epoch": 1.8322113644589213,
|
12103 |
+
"grad_norm": 62.96392877654251,
|
12104 |
+
"learning_rate": 3.8454783205361774e-09,
|
12105 |
+
"logits/chosen": -1.2181570529937744,
|
12106 |
+
"logits/rejected": -1.2495853900909424,
|
12107 |
+
"logps/chosen": -195.74876403808594,
|
12108 |
+
"logps/rejected": -292.0626220703125,
|
12109 |
+
"loss": 0.4112,
|
12110 |
+
"rewards/accuracies": 0.8125,
|
12111 |
+
"rewards/chosen": -0.3267236649990082,
|
12112 |
+
"rewards/margins": 4.259873390197754,
|
12113 |
+
"rewards/rejected": -4.586597442626953,
|
12114 |
+
"step": 1582
|
12115 |
+
},
|
12116 |
+
{
|
12117 |
+
"epoch": 1.834527687296417,
|
12118 |
+
"grad_norm": 58.298377548314235,
|
12119 |
+
"learning_rate": 3.740067572154238e-09,
|
12120 |
+
"logits/chosen": -1.292594075202942,
|
12121 |
+
"logits/rejected": -1.3315826654434204,
|
12122 |
+
"logps/chosen": -154.32740783691406,
|
12123 |
+
"logps/rejected": -175.98606872558594,
|
12124 |
+
"loss": 0.4088,
|
12125 |
+
"rewards/accuracies": 0.8125,
|
12126 |
+
"rewards/chosen": -0.2856728136539459,
|
12127 |
+
"rewards/margins": 1.166110634803772,
|
12128 |
+
"rewards/rejected": -1.4517834186553955,
|
12129 |
+
"step": 1584
|
12130 |
+
},
|
12131 |
+
{
|
12132 |
+
"epoch": 1.8368440101339125,
|
12133 |
+
"grad_norm": 59.34894292485851,
|
12134 |
+
"learning_rate": 3.6360941775219534e-09,
|
12135 |
+
"logits/chosen": -1.2552549839019775,
|
12136 |
+
"logits/rejected": -1.3246078491210938,
|
12137 |
+
"logps/chosen": -165.2515869140625,
|
12138 |
+
"logps/rejected": -189.0300750732422,
|
12139 |
+
"loss": 0.3893,
|
12140 |
+
"rewards/accuracies": 0.84375,
|
12141 |
+
"rewards/chosen": -0.31748124957084656,
|
12142 |
+
"rewards/margins": 1.6454672813415527,
|
12143 |
+
"rewards/rejected": -1.9629485607147217,
|
12144 |
+
"step": 1586
|
12145 |
+
},
|
12146 |
+
{
|
12147 |
+
"epoch": 1.839160332971408,
|
12148 |
+
"grad_norm": 72.01635850106565,
|
12149 |
+
"learning_rate": 3.53355968917054e-09,
|
12150 |
+
"logits/chosen": -1.1828457117080688,
|
12151 |
+
"logits/rejected": -1.1595231294631958,
|
12152 |
+
"logps/chosen": -188.94688415527344,
|
12153 |
+
"logps/rejected": -214.75,
|
12154 |
+
"loss": 0.3615,
|
12155 |
+
"rewards/accuracies": 0.84375,
|
12156 |
+
"rewards/chosen": -0.9493909478187561,
|
12157 |
+
"rewards/margins": 1.7555177211761475,
|
12158 |
+
"rewards/rejected": -2.704908609390259,
|
12159 |
+
"step": 1588
|
12160 |
+
},
|
12161 |
+
{
|
12162 |
+
"epoch": 1.8414766558089033,
|
12163 |
+
"grad_norm": 62.6493622965171,
|
12164 |
+
"learning_rate": 3.432465638145443e-09,
|
12165 |
+
"logits/chosen": -1.2264246940612793,
|
12166 |
+
"logits/rejected": -1.2353841066360474,
|
12167 |
+
"logps/chosen": -195.12002563476562,
|
12168 |
+
"logps/rejected": -207.8896942138672,
|
12169 |
+
"loss": 0.3766,
|
12170 |
+
"rewards/accuracies": 0.78125,
|
12171 |
+
"rewards/chosen": -0.44329333305358887,
|
12172 |
+
"rewards/margins": 1.4592864513397217,
|
12173 |
+
"rewards/rejected": -1.9025800228118896,
|
12174 |
+
"step": 1590
|
12175 |
+
},
|
12176 |
+
{
|
12177 |
+
"epoch": 1.8437929786463987,
|
12178 |
+
"grad_norm": 90.93634487708707,
|
12179 |
+
"learning_rate": 3.3328135339834917e-09,
|
12180 |
+
"logits/chosen": -1.2629611492156982,
|
12181 |
+
"logits/rejected": -1.249568223953247,
|
12182 |
+
"logps/chosen": -190.23126220703125,
|
12183 |
+
"logps/rejected": -229.70721435546875,
|
12184 |
+
"loss": 0.4445,
|
12185 |
+
"rewards/accuracies": 0.84375,
|
12186 |
+
"rewards/chosen": -0.7300775647163391,
|
12187 |
+
"rewards/margins": 1.7166606187820435,
|
12188 |
+
"rewards/rejected": -2.4467382431030273,
|
12189 |
+
"step": 1592
|
12190 |
+
},
|
12191 |
+
{
|
12192 |
+
"epoch": 1.8461093014838943,
|
12193 |
+
"grad_norm": 70.40594487298823,
|
12194 |
+
"learning_rate": 3.234604864690349e-09,
|
12195 |
+
"logits/chosen": -1.1563414335250854,
|
12196 |
+
"logits/rejected": -1.1132121086120605,
|
12197 |
+
"logps/chosen": -112.93782806396484,
|
12198 |
+
"logps/rejected": -115.19876861572266,
|
12199 |
+
"loss": 0.4635,
|
12200 |
+
"rewards/accuracies": 0.75,
|
12201 |
+
"rewards/chosen": -0.16091413795948029,
|
12202 |
+
"rewards/margins": 1.0079346895217896,
|
12203 |
+
"rewards/rejected": -1.1688487529754639,
|
12204 |
+
"step": 1594
|
12205 |
+
},
|
12206 |
+
{
|
12207 |
+
"epoch": 1.84842562432139,
|
12208 |
+
"grad_norm": 70.46959430003976,
|
12209 |
+
"learning_rate": 3.13784109671833e-09,
|
12210 |
+
"logits/chosen": -1.1287944316864014,
|
12211 |
+
"logits/rejected": -1.1954846382141113,
|
12212 |
+
"logps/chosen": -145.52749633789062,
|
12213 |
+
"logps/rejected": -168.68896484375,
|
12214 |
+
"loss": 0.4428,
|
12215 |
+
"rewards/accuracies": 0.84375,
|
12216 |
+
"rewards/chosen": -0.435101717710495,
|
12217 |
+
"rewards/margins": 1.1136534214019775,
|
12218 |
+
"rewards/rejected": -1.548755168914795,
|
12219 |
+
"step": 1596
|
12220 |
+
},
|
12221 |
+
{
|
12222 |
+
"epoch": 1.8507419471588853,
|
12223 |
+
"grad_norm": 63.53152924846159,
|
12224 |
+
"learning_rate": 3.0425236749444307e-09,
|
12225 |
+
"logits/chosen": -1.1079940795898438,
|
12226 |
+
"logits/rejected": -1.1838057041168213,
|
12227 |
+
"logps/chosen": -104.38517761230469,
|
12228 |
+
"logps/rejected": -126.13815307617188,
|
12229 |
+
"loss": 0.4119,
|
12230 |
+
"rewards/accuracies": 0.8125,
|
12231 |
+
"rewards/chosen": -0.20333430171012878,
|
12232 |
+
"rewards/margins": 1.2202249765396118,
|
12233 |
+
"rewards/rejected": -1.4235591888427734,
|
12234 |
+
"step": 1598
|
12235 |
+
},
|
12236 |
+
{
|
12237 |
+
"epoch": 1.8530582699963807,
|
12238 |
+
"grad_norm": 66.75495278471351,
|
12239 |
+
"learning_rate": 2.9486540226488555e-09,
|
12240 |
+
"logits/chosen": -1.1984293460845947,
|
12241 |
+
"logits/rejected": -1.1811829805374146,
|
12242 |
+
"logps/chosen": -107.63018035888672,
|
12243 |
+
"logps/rejected": -130.51141357421875,
|
12244 |
+
"loss": 0.4185,
|
12245 |
+
"rewards/accuracies": 0.75,
|
12246 |
+
"rewards/chosen": -0.44386693835258484,
|
12247 |
+
"rewards/margins": 1.2538187503814697,
|
12248 |
+
"rewards/rejected": -1.697685718536377,
|
12249 |
+
"step": 1600
|
12250 |
+
},
|
12251 |
+
{
|
12252 |
+
"epoch": 1.8530582699963807,
|
12253 |
+
"eval_logits/chosen": -1.2096275091171265,
|
12254 |
+
"eval_logits/rejected": -1.2041908502578735,
|
12255 |
+
"eval_logps/chosen": -144.02456665039062,
|
12256 |
+
"eval_logps/rejected": -149.35797119140625,
|
12257 |
+
"eval_loss": 0.59078049659729,
|
12258 |
+
"eval_rewards/accuracies": 0.7599999904632568,
|
12259 |
+
"eval_rewards/chosen": -0.8393388986587524,
|
12260 |
+
"eval_rewards/margins": 0.7011021375656128,
|
12261 |
+
"eval_rewards/rejected": -1.5404411554336548,
|
12262 |
+
"eval_runtime": 26.1157,
|
12263 |
+
"eval_samples_per_second": 3.829,
|
12264 |
+
"eval_steps_per_second": 0.957,
|
12265 |
+
"step": 1600
|
12266 |
+
},
|
12267 |
+
{
|
12268 |
+
"epoch": 1.855374592833876,
|
12269 |
+
"grad_norm": 51.25806551926982,
|
12270 |
+
"learning_rate": 2.856233541493691e-09,
|
12271 |
+
"logits/chosen": -1.1180177927017212,
|
12272 |
+
"logits/rejected": -1.153393268585205,
|
12273 |
+
"logps/chosen": -144.27127075195312,
|
12274 |
+
"logps/rejected": -172.8219757080078,
|
12275 |
+
"loss": 0.4103,
|
12276 |
+
"rewards/accuracies": 0.875,
|
12277 |
+
"rewards/chosen": -0.5550628900527954,
|
12278 |
+
"rewards/margins": 1.737399697303772,
|
12279 |
+
"rewards/rejected": -2.2924625873565674,
|
12280 |
+
"step": 1602
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 1.8576909156713717,
|
12284 |
+
"grad_norm": 44.574245908096394,
|
12285 |
+
"learning_rate": 2.7652636115019554e-09,
|
12286 |
+
"logits/chosen": -1.2830660343170166,
|
12287 |
+
"logits/rejected": -1.3382513523101807,
|
12288 |
+
"logps/chosen": -186.8584747314453,
|
12289 |
+
"logps/rejected": -214.39341735839844,
|
12290 |
+
"loss": 0.336,
|
12291 |
+
"rewards/accuracies": 0.90625,
|
12292 |
+
"rewards/chosen": 0.011630617082118988,
|
12293 |
+
"rewards/margins": 2.068033218383789,
|
12294 |
+
"rewards/rejected": -2.0564029216766357,
|
12295 |
+
"step": 1604
|
12296 |
+
},
|
12297 |
+
{
|
12298 |
+
"epoch": 1.8600072385088673,
|
12299 |
+
"grad_norm": 88.24929156836725,
|
12300 |
+
"learning_rate": 2.6757455910370487e-09,
|
12301 |
+
"logits/chosen": -1.1889640092849731,
|
12302 |
+
"logits/rejected": -1.178146481513977,
|
12303 |
+
"logps/chosen": -169.42417907714844,
|
12304 |
+
"logps/rejected": -202.3689422607422,
|
12305 |
+
"loss": 0.453,
|
12306 |
+
"rewards/accuracies": 0.8125,
|
12307 |
+
"rewards/chosen": -0.4166257083415985,
|
12308 |
+
"rewards/margins": 1.3977904319763184,
|
12309 |
+
"rewards/rejected": -1.8144161701202393,
|
12310 |
+
"step": 1606
|
12311 |
+
},
|
12312 |
+
{
|
12313 |
+
"epoch": 1.8623235613463627,
|
12314 |
+
"grad_norm": 54.823245822280576,
|
12315 |
+
"learning_rate": 2.5876808167825005e-09,
|
12316 |
+
"logits/chosen": -1.2597419023513794,
|
12317 |
+
"logits/rejected": -1.193768858909607,
|
12318 |
+
"logps/chosen": -93.74658203125,
|
12319 |
+
"logps/rejected": -92.6616439819336,
|
12320 |
+
"loss": 0.3924,
|
12321 |
+
"rewards/accuracies": 0.6875,
|
12322 |
+
"rewards/chosen": -0.25879502296447754,
|
12323 |
+
"rewards/margins": 0.4778652489185333,
|
12324 |
+
"rewards/rejected": -0.7366602420806885,
|
12325 |
+
"step": 1608
|
12326 |
+
},
|
12327 |
+
{
|
12328 |
+
"epoch": 1.864639884183858,
|
12329 |
+
"grad_norm": 74.02926136780609,
|
12330 |
+
"learning_rate": 2.5010706037218885e-09,
|
12331 |
+
"logits/chosen": -1.2314317226409912,
|
12332 |
+
"logits/rejected": -1.2886399030685425,
|
12333 |
+
"logps/chosen": -163.94842529296875,
|
12334 |
+
"logps/rejected": -195.11390686035156,
|
12335 |
+
"loss": 0.3971,
|
12336 |
+
"rewards/accuracies": 0.78125,
|
12337 |
+
"rewards/chosen": -0.2574860155582428,
|
12338 |
+
"rewards/margins": 1.3789194822311401,
|
12339 |
+
"rewards/rejected": -1.6364054679870605,
|
12340 |
+
"step": 1610
|
12341 |
+
},
|
12342 |
+
{
|
12343 |
+
"epoch": 1.8669562070213535,
|
12344 |
+
"grad_norm": 54.30860117915708,
|
12345 |
+
"learning_rate": 2.4159162451193094e-09,
|
12346 |
+
"logits/chosen": -1.0902681350708008,
|
12347 |
+
"logits/rejected": -1.112775206565857,
|
12348 |
+
"logps/chosen": -140.3288116455078,
|
12349 |
+
"logps/rejected": -188.05210876464844,
|
12350 |
+
"loss": 0.4003,
|
12351 |
+
"rewards/accuracies": 0.90625,
|
12352 |
+
"rewards/chosen": -0.1892092376947403,
|
12353 |
+
"rewards/margins": 1.8315831422805786,
|
12354 |
+
"rewards/rejected": -2.0207924842834473,
|
12355 |
+
"step": 1612
|
12356 |
+
},
|
12357 |
+
{
|
12358 |
+
"epoch": 1.869272529858849,
|
12359 |
+
"grad_norm": 62.18685455386846,
|
12360 |
+
"learning_rate": 2.3322190125000475e-09,
|
12361 |
+
"logits/chosen": -1.0966382026672363,
|
12362 |
+
"logits/rejected": -1.1658515930175781,
|
12363 |
+
"logps/chosen": -116.89921569824219,
|
12364 |
+
"logps/rejected": -151.35015869140625,
|
12365 |
+
"loss": 0.4112,
|
12366 |
+
"rewards/accuracies": 0.75,
|
12367 |
+
"rewards/chosen": -0.4243711233139038,
|
12368 |
+
"rewards/margins": 1.4443333148956299,
|
12369 |
+
"rewards/rejected": -1.8687043190002441,
|
12370 |
+
"step": 1614
|
12371 |
+
},
|
12372 |
+
{
|
12373 |
+
"epoch": 1.8715888526963447,
|
12374 |
+
"grad_norm": 55.591911669551806,
|
12375 |
+
"learning_rate": 2.24998015563157e-09,
|
12376 |
+
"logits/chosen": -1.1172374486923218,
|
12377 |
+
"logits/rejected": -1.1134474277496338,
|
12378 |
+
"logps/chosen": -106.15010070800781,
|
12379 |
+
"logps/rejected": -122.0849609375,
|
12380 |
+
"loss": 0.42,
|
12381 |
+
"rewards/accuracies": 0.90625,
|
12382 |
+
"rewards/chosen": -0.5767372250556946,
|
12383 |
+
"rewards/margins": 0.9947463274002075,
|
12384 |
+
"rewards/rejected": -1.5714833736419678,
|
12385 |
+
"step": 1616
|
12386 |
+
},
|
12387 |
+
{
|
12388 |
+
"epoch": 1.87390517553384,
|
12389 |
+
"grad_norm": 69.67114883544231,
|
12390 |
+
"learning_rate": 2.169200902504842e-09,
|
12391 |
+
"logits/chosen": -1.291989803314209,
|
12392 |
+
"logits/rejected": -1.3668596744537354,
|
12393 |
+
"logps/chosen": -140.99050903320312,
|
12394 |
+
"logps/rejected": -164.55636596679688,
|
12395 |
+
"loss": 0.4102,
|
12396 |
+
"rewards/accuracies": 0.8125,
|
12397 |
+
"rewards/chosen": -0.1543090045452118,
|
12398 |
+
"rewards/margins": 1.0028785467147827,
|
12399 |
+
"rewards/rejected": -1.1571874618530273,
|
12400 |
+
"step": 1618
|
12401 |
+
},
|
12402 |
+
{
|
12403 |
+
"epoch": 1.8762214983713354,
|
12404 |
+
"grad_norm": 49.47929313355962,
|
12405 |
+
"learning_rate": 2.0898824593160503e-09,
|
12406 |
+
"logits/chosen": -1.1244778633117676,
|
12407 |
+
"logits/rejected": -1.1484088897705078,
|
12408 |
+
"logps/chosen": -115.74507904052734,
|
12409 |
+
"logps/rejected": -133.63916015625,
|
12410 |
+
"loss": 0.3943,
|
12411 |
+
"rewards/accuracies": 0.71875,
|
12412 |
+
"rewards/chosen": -0.4605577886104584,
|
12413 |
+
"rewards/margins": 0.8462937474250793,
|
12414 |
+
"rewards/rejected": -1.3068513870239258,
|
12415 |
+
"step": 1620
|
12416 |
+
},
|
12417 |
+
{
|
12418 |
+
"epoch": 1.8785378212088308,
|
12419 |
+
"grad_norm": 51.80324368762919,
|
12420 |
+
"learning_rate": 2.012026010448542e-09,
|
12421 |
+
"logits/chosen": -1.0954941511154175,
|
12422 |
+
"logits/rejected": -1.160184383392334,
|
12423 |
+
"logps/chosen": -131.16983032226562,
|
12424 |
+
"logps/rejected": -178.2694549560547,
|
12425 |
+
"loss": 0.4011,
|
12426 |
+
"rewards/accuracies": 0.84375,
|
12427 |
+
"rewards/chosen": -0.1698540300130844,
|
12428 |
+
"rewards/margins": 1.7133314609527588,
|
12429 |
+
"rewards/rejected": -1.8831853866577148,
|
12430 |
+
"step": 1622
|
12431 |
+
},
|
12432 |
+
{
|
12433 |
+
"epoch": 1.8808541440463264,
|
12434 |
+
"grad_norm": 61.62870922068943,
|
12435 |
+
"learning_rate": 1.935632718455171e-09,
|
12436 |
+
"logits/chosen": -1.167246699333191,
|
12437 |
+
"logits/rejected": -1.2080024480819702,
|
12438 |
+
"logps/chosen": -154.55758666992188,
|
12439 |
+
"logps/rejected": -188.28201293945312,
|
12440 |
+
"loss": 0.3673,
|
12441 |
+
"rewards/accuracies": 0.84375,
|
12442 |
+
"rewards/chosen": -0.5035812854766846,
|
12443 |
+
"rewards/margins": 1.6042046546936035,
|
12444 |
+
"rewards/rejected": -2.107785701751709,
|
12445 |
+
"step": 1624
|
12446 |
+
},
|
12447 |
+
{
|
12448 |
+
"epoch": 1.883170466883822,
|
12449 |
+
"grad_norm": 66.67930280370663,
|
12450 |
+
"learning_rate": 1.860703724040935e-09,
|
12451 |
+
"logits/chosen": -1.1037479639053345,
|
12452 |
+
"logits/rejected": -1.0890851020812988,
|
12453 |
+
"logps/chosen": -151.0048370361328,
|
12454 |
+
"logps/rejected": -170.82943725585938,
|
12455 |
+
"loss": 0.4514,
|
12456 |
+
"rewards/accuracies": 0.6875,
|
12457 |
+
"rewards/chosen": -0.4693964123725891,
|
12458 |
+
"rewards/margins": 0.7652498483657837,
|
12459 |
+
"rewards/rejected": -1.2346463203430176,
|
12460 |
+
"step": 1626
|
12461 |
+
},
|
12462 |
+
{
|
12463 |
+
"epoch": 1.8854867897213174,
|
12464 |
+
"grad_norm": 64.45068485207041,
|
12465 |
+
"learning_rate": 1.7872401460458874e-09,
|
12466 |
+
"logits/chosen": -1.1636077165603638,
|
12467 |
+
"logits/rejected": -1.2113772630691528,
|
12468 |
+
"logps/chosen": -128.23899841308594,
|
12469 |
+
"logps/rejected": -151.98800659179688,
|
12470 |
+
"loss": 0.4074,
|
12471 |
+
"rewards/accuracies": 0.75,
|
12472 |
+
"rewards/chosen": -0.15583369135856628,
|
12473 |
+
"rewards/margins": 1.3744523525238037,
|
12474 |
+
"rewards/rejected": -1.5302859544754028,
|
12475 |
+
"step": 1628
|
12476 |
+
},
|
12477 |
+
{
|
12478 |
+
"epoch": 1.8878031125588128,
|
12479 |
+
"grad_norm": 60.44416949725557,
|
12480 |
+
"learning_rate": 1.7152430814285302e-09,
|
12481 |
+
"logits/chosen": -1.2152669429779053,
|
12482 |
+
"logits/rejected": -1.225684404373169,
|
12483 |
+
"logps/chosen": -145.33447265625,
|
12484 |
+
"logps/rejected": -170.64059448242188,
|
12485 |
+
"loss": 0.4299,
|
12486 |
+
"rewards/accuracies": 0.875,
|
12487 |
+
"rewards/chosen": -0.09620651602745056,
|
12488 |
+
"rewards/margins": 1.3490362167358398,
|
12489 |
+
"rewards/rejected": -1.4452428817749023,
|
12490 |
+
"step": 1630
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 1.8901194353963082,
|
12494 |
+
"grad_norm": 46.81333988402518,
|
12495 |
+
"learning_rate": 1.6447136052493704e-09,
|
12496 |
+
"logits/chosen": -1.169386863708496,
|
12497 |
+
"logits/rejected": -1.248286247253418,
|
12498 |
+
"logps/chosen": -163.55990600585938,
|
12499 |
+
"logps/rejected": -216.13929748535156,
|
12500 |
+
"loss": 0.3547,
|
12501 |
+
"rewards/accuracies": 1.0,
|
12502 |
+
"rewards/chosen": -0.5737725496292114,
|
12503 |
+
"rewards/margins": 1.9429514408111572,
|
12504 |
+
"rewards/rejected": -2.516724109649658,
|
12505 |
+
"step": 1632
|
12506 |
+
},
|
12507 |
+
{
|
12508 |
+
"epoch": 1.8924357582338038,
|
12509 |
+
"grad_norm": 76.34596117355929,
|
12510 |
+
"learning_rate": 1.5756527706548561e-09,
|
12511 |
+
"logits/chosen": -1.2912683486938477,
|
12512 |
+
"logits/rejected": -1.2929950952529907,
|
12513 |
+
"logps/chosen": -192.79061889648438,
|
12514 |
+
"logps/rejected": -216.2474365234375,
|
12515 |
+
"loss": 0.4647,
|
12516 |
+
"rewards/accuracies": 0.8125,
|
12517 |
+
"rewards/chosen": -0.6607008576393127,
|
12518 |
+
"rewards/margins": 1.0471830368041992,
|
12519 |
+
"rewards/rejected": -1.7078838348388672,
|
12520 |
+
"step": 1634
|
12521 |
+
},
|
12522 |
+
{
|
12523 |
+
"epoch": 1.8947520810712994,
|
12524 |
+
"grad_norm": 50.49082900430503,
|
12525 |
+
"learning_rate": 1.5080616088616882e-09,
|
12526 |
+
"logits/chosen": -1.1574630737304688,
|
12527 |
+
"logits/rejected": -1.2174662351608276,
|
12528 |
+
"logps/chosen": -105.63571166992188,
|
12529 |
+
"logps/rejected": -138.35476684570312,
|
12530 |
+
"loss": 0.4099,
|
12531 |
+
"rewards/accuracies": 0.84375,
|
12532 |
+
"rewards/chosen": -0.4468805193901062,
|
12533 |
+
"rewards/margins": 1.2918894290924072,
|
12534 |
+
"rewards/rejected": -1.7387701272964478,
|
12535 |
+
"step": 1636
|
12536 |
+
},
|
12537 |
+
{
|
12538 |
+
"epoch": 1.8970684039087948,
|
12539 |
+
"grad_norm": 75.9093198815468,
|
12540 |
+
"learning_rate": 1.4419411291413885e-09,
|
12541 |
+
"logits/chosen": -1.1982598304748535,
|
12542 |
+
"logits/rejected": -1.1942592859268188,
|
12543 |
+
"logps/chosen": -191.63914489746094,
|
12544 |
+
"logps/rejected": -217.94439697265625,
|
12545 |
+
"loss": 0.4268,
|
12546 |
+
"rewards/accuracies": 0.71875,
|
12547 |
+
"rewards/chosen": -0.6322917938232422,
|
12548 |
+
"rewards/margins": 1.844104528427124,
|
12549 |
+
"rewards/rejected": -2.4763965606689453,
|
12550 |
+
"step": 1638
|
12551 |
+
},
|
12552 |
+
{
|
12553 |
+
"epoch": 1.8993847267462902,
|
12554 |
+
"grad_norm": 58.34682402089141,
|
12555 |
+
"learning_rate": 1.3772923188052787e-09,
|
12556 |
+
"logits/chosen": -1.0590007305145264,
|
12557 |
+
"logits/rejected": -1.0922439098358154,
|
12558 |
+
"logps/chosen": -90.13923645019531,
|
12559 |
+
"logps/rejected": -109.63917541503906,
|
12560 |
+
"loss": 0.4401,
|
12561 |
+
"rewards/accuracies": 0.84375,
|
12562 |
+
"rewards/chosen": -0.1902827024459839,
|
12563 |
+
"rewards/margins": 1.1345347166061401,
|
12564 |
+
"rewards/rejected": -1.3248172998428345,
|
12565 |
+
"step": 1640
|
12566 |
+
},
|
12567 |
+
{
|
12568 |
+
"epoch": 1.9017010495837856,
|
12569 |
+
"grad_norm": 60.17667883042894,
|
12570 |
+
"learning_rate": 1.3141161431896808e-09,
|
12571 |
+
"logits/chosen": -1.292588472366333,
|
12572 |
+
"logits/rejected": -1.2948338985443115,
|
12573 |
+
"logps/chosen": -141.24307250976562,
|
12574 |
+
"logps/rejected": -183.1881866455078,
|
12575 |
+
"loss": 0.4093,
|
12576 |
+
"rewards/accuracies": 0.84375,
|
12577 |
+
"rewards/chosen": -0.06378068029880524,
|
12578 |
+
"rewards/margins": 1.4755961894989014,
|
12579 |
+
"rewards/rejected": -1.539376974105835,
|
12580 |
+
"step": 1642
|
12581 |
+
},
|
12582 |
+
{
|
12583 |
+
"epoch": 1.9040173724212812,
|
12584 |
+
"grad_norm": 46.419475472935424,
|
12585 |
+
"learning_rate": 1.2524135456415286e-09,
|
12586 |
+
"logits/chosen": -1.3303675651550293,
|
12587 |
+
"logits/rejected": -1.3957011699676514,
|
12588 |
+
"logps/chosen": -197.4314727783203,
|
12589 |
+
"logps/rejected": -230.6079559326172,
|
12590 |
+
"loss": 0.3864,
|
12591 |
+
"rewards/accuracies": 0.84375,
|
12592 |
+
"rewards/chosen": -0.5181460380554199,
|
12593 |
+
"rewards/margins": 1.6645830869674683,
|
12594 |
+
"rewards/rejected": -2.1827290058135986,
|
12595 |
+
"step": 1644
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 1.9063336952587768,
|
12599 |
+
"grad_norm": 63.51262338219536,
|
12600 |
+
"learning_rate": 1.1921854475043125e-09,
|
12601 |
+
"logits/chosen": -1.3057444095611572,
|
12602 |
+
"logits/rejected": -1.3598231077194214,
|
12603 |
+
"logps/chosen": -140.8748779296875,
|
12604 |
+
"logps/rejected": -162.47158813476562,
|
12605 |
+
"loss": 0.438,
|
12606 |
+
"rewards/accuracies": 0.8125,
|
12607 |
+
"rewards/chosen": -0.24470748007297516,
|
12608 |
+
"rewards/margins": 1.0919952392578125,
|
12609 |
+
"rewards/rejected": -1.3367027044296265,
|
12610 |
+
"step": 1646
|
12611 |
+
},
|
12612 |
+
{
|
12613 |
+
"epoch": 1.9086500180962722,
|
12614 |
+
"grad_norm": 59.16346811149136,
|
12615 |
+
"learning_rate": 1.133432748104257e-09,
|
12616 |
+
"logits/chosen": -1.2664871215820312,
|
12617 |
+
"logits/rejected": -1.2415859699249268,
|
12618 |
+
"logps/chosen": -168.35910034179688,
|
12619 |
+
"logps/rejected": -186.53665161132812,
|
12620 |
+
"loss": 0.3824,
|
12621 |
+
"rewards/accuracies": 0.75,
|
12622 |
+
"rewards/chosen": -0.3128247559070587,
|
12623 |
+
"rewards/margins": 1.2026112079620361,
|
12624 |
+
"rewards/rejected": -1.5154359340667725,
|
12625 |
+
"step": 1648
|
12626 |
+
},
|
12627 |
+
{
|
12628 |
+
"epoch": 1.9109663409337676,
|
12629 |
+
"grad_norm": 72.05923858294557,
|
12630 |
+
"learning_rate": 1.0761563247369322e-09,
|
12631 |
+
"logits/chosen": -1.2224782705307007,
|
12632 |
+
"logits/rejected": -1.2964767217636108,
|
12633 |
+
"logps/chosen": -149.58824157714844,
|
12634 |
+
"logps/rejected": -172.06092834472656,
|
12635 |
+
"loss": 0.4327,
|
12636 |
+
"rewards/accuracies": 0.8125,
|
12637 |
+
"rewards/chosen": -0.10623270273208618,
|
12638 |
+
"rewards/margins": 1.0352814197540283,
|
12639 |
+
"rewards/rejected": -1.1415140628814697,
|
12640 |
+
"step": 1650
|
12641 |
+
},
|
12642 |
+
{
|
12643 |
+
"epoch": 1.913282663771263,
|
12644 |
+
"grad_norm": 72.25189940099223,
|
12645 |
+
"learning_rate": 1.0203570326541622e-09,
|
12646 |
+
"logits/chosen": -1.0865113735198975,
|
12647 |
+
"logits/rejected": -1.1271172761917114,
|
12648 |
+
"logps/chosen": -103.49948120117188,
|
12649 |
+
"logps/rejected": -108.95231628417969,
|
12650 |
+
"loss": 0.4442,
|
12651 |
+
"rewards/accuracies": 0.8125,
|
12652 |
+
"rewards/chosen": -0.29463884234428406,
|
12653 |
+
"rewards/margins": 0.5380735397338867,
|
12654 |
+
"rewards/rejected": -0.8327123522758484,
|
12655 |
+
"step": 1652
|
12656 |
+
},
|
12657 |
+
{
|
12658 |
+
"epoch": 1.9155989866087586,
|
12659 |
+
"grad_norm": 58.87468402600475,
|
12660 |
+
"learning_rate": 9.660357050512158e-10,
|
12661 |
+
"logits/chosen": -1.1705281734466553,
|
12662 |
+
"logits/rejected": -1.213219165802002,
|
12663 |
+
"logps/chosen": -151.6047821044922,
|
12664 |
+
"logps/rejected": -167.16241455078125,
|
12665 |
+
"loss": 0.4755,
|
12666 |
+
"rewards/accuracies": 0.8125,
|
12667 |
+
"rewards/chosen": -1.2385808229446411,
|
12668 |
+
"rewards/margins": 0.9532268047332764,
|
12669 |
+
"rewards/rejected": -2.191807746887207,
|
12670 |
+
"step": 1654
|
12671 |
+
},
|
12672 |
+
{
|
12673 |
+
"epoch": 1.9179153094462542,
|
12674 |
+
"grad_norm": 61.13297680975046,
|
12675 |
+
"learning_rate": 9.131931530544146e-10,
|
12676 |
+
"logits/chosen": -1.1430917978286743,
|
12677 |
+
"logits/rejected": -1.1427133083343506,
|
12678 |
+
"logps/chosen": -152.1170654296875,
|
12679 |
+
"logps/rejected": -184.9088592529297,
|
12680 |
+
"loss": 0.4595,
|
12681 |
+
"rewards/accuracies": 0.8125,
|
12682 |
+
"rewards/chosen": -0.958507239818573,
|
12683 |
+
"rewards/margins": 1.5942294597625732,
|
12684 |
+
"rewards/rejected": -2.552736759185791,
|
12685 |
+
"step": 1656
|
12686 |
+
},
|
12687 |
+
{
|
12688 |
+
"epoch": 1.9202316322837496,
|
12689 |
+
"grad_norm": 52.83655360025748,
|
12690 |
+
"learning_rate": 8.618301657089877e-10,
|
12691 |
+
"logits/chosen": -1.190130352973938,
|
12692 |
+
"logits/rejected": -1.2142915725708008,
|
12693 |
+
"logps/chosen": -161.54466247558594,
|
12694 |
+
"logps/rejected": -182.72946166992188,
|
12695 |
+
"loss": 0.3894,
|
12696 |
+
"rewards/accuracies": 0.84375,
|
12697 |
+
"rewards/chosen": -0.6212272047996521,
|
12698 |
+
"rewards/margins": 1.291230320930481,
|
12699 |
+
"rewards/rejected": -1.9124574661254883,
|
12700 |
+
"step": 1658
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 1.922547955121245,
|
12704 |
+
"grad_norm": 110.57140073148696,
|
12705 |
+
"learning_rate": 8.119475099673035e-10,
|
12706 |
+
"logits/chosen": -1.1131267547607422,
|
12707 |
+
"logits/rejected": -1.2174605131149292,
|
12708 |
+
"logps/chosen": -157.0382843017578,
|
12709 |
+
"logps/rejected": -187.213134765625,
|
12710 |
+
"loss": 0.4364,
|
12711 |
+
"rewards/accuracies": 0.78125,
|
12712 |
+
"rewards/chosen": -0.5017825961112976,
|
12713 |
+
"rewards/margins": 0.9430520534515381,
|
12714 |
+
"rewards/rejected": -1.444834589958191,
|
12715 |
+
"step": 1660
|
12716 |
+
},
|
12717 |
+
{
|
12718 |
+
"epoch": 1.9248642779587404,
|
12719 |
+
"grad_norm": 53.02147974570211,
|
12720 |
+
"learning_rate": 7.635459306773784e-10,
|
12721 |
+
"logits/chosen": -1.1875760555267334,
|
12722 |
+
"logits/rejected": -1.2432739734649658,
|
12723 |
+
"logps/chosen": -147.28131103515625,
|
12724 |
+
"logps/rejected": -169.49566650390625,
|
12725 |
+
"loss": 0.43,
|
12726 |
+
"rewards/accuracies": 0.84375,
|
12727 |
+
"rewards/chosen": -0.40067267417907715,
|
12728 |
+
"rewards/margins": 1.1304292678833008,
|
12729 |
+
"rewards/rejected": -1.531101942062378,
|
12730 |
+
"step": 1662
|
12731 |
+
},
|
12732 |
+
{
|
12733 |
+
"epoch": 1.927180600796236,
|
12734 |
+
"grad_norm": 64.80466315551176,
|
12735 |
+
"learning_rate": 7.166261505718418e-10,
|
12736 |
+
"logits/chosen": -1.222117304801941,
|
12737 |
+
"logits/rejected": -1.2081456184387207,
|
12738 |
+
"logps/chosen": -157.27780151367188,
|
12739 |
+
"logps/rejected": -173.7355499267578,
|
12740 |
+
"loss": 0.3734,
|
12741 |
+
"rewards/accuracies": 0.84375,
|
12742 |
+
"rewards/chosen": -0.33614563941955566,
|
12743 |
+
"rewards/margins": 1.0949398279190063,
|
12744 |
+
"rewards/rejected": -1.431085467338562,
|
12745 |
+
"step": 1664
|
12746 |
+
},
|
12747 |
+
{
|
12748 |
+
"epoch": 1.9294969236337316,
|
12749 |
+
"grad_norm": 53.26534606993647,
|
12750 |
+
"learning_rate": 6.711888702570556e-10,
|
12751 |
+
"logits/chosen": -1.2844552993774414,
|
12752 |
+
"logits/rejected": -1.3053499460220337,
|
12753 |
+
"logps/chosen": -167.5966033935547,
|
12754 |
+
"logps/rejected": -168.5198974609375,
|
12755 |
+
"loss": 0.4155,
|
12756 |
+
"rewards/accuracies": 0.71875,
|
12757 |
+
"rewards/chosen": -0.15978145599365234,
|
12758 |
+
"rewards/margins": 1.0049140453338623,
|
12759 |
+
"rewards/rejected": -1.1646955013275146,
|
12760 |
+
"step": 1666
|
12761 |
+
},
|
12762 |
+
{
|
12763 |
+
"epoch": 1.931813246471227,
|
12764 |
+
"grad_norm": 44.85242610407527,
|
12765 |
+
"learning_rate": 6.272347682026779e-10,
|
12766 |
+
"logits/chosen": -1.0309640169143677,
|
12767 |
+
"logits/rejected": -1.134310007095337,
|
12768 |
+
"logps/chosen": -111.43970489501953,
|
12769 |
+
"logps/rejected": -144.99891662597656,
|
12770 |
+
"loss": 0.3869,
|
12771 |
+
"rewards/accuracies": 0.78125,
|
12772 |
+
"rewards/chosen": -0.24206304550170898,
|
12773 |
+
"rewards/margins": 1.2624578475952148,
|
12774 |
+
"rewards/rejected": -1.5045208930969238,
|
12775 |
+
"step": 1668
|
12776 |
+
},
|
12777 |
+
{
|
12778 |
+
"epoch": 1.9341295693087224,
|
12779 |
+
"grad_norm": 47.25828490220509,
|
12780 |
+
"learning_rate": 5.847645007315937e-10,
|
12781 |
+
"logits/chosen": -1.1614665985107422,
|
12782 |
+
"logits/rejected": -1.254847764968872,
|
12783 |
+
"logps/chosen": -137.72381591796875,
|
12784 |
+
"logps/rejected": -143.80300903320312,
|
12785 |
+
"loss": 0.4304,
|
12786 |
+
"rewards/accuracies": 0.6875,
|
12787 |
+
"rewards/chosen": -0.6075265407562256,
|
12788 |
+
"rewards/margins": 0.8250117301940918,
|
12789 |
+
"rewards/rejected": -1.4325382709503174,
|
12790 |
+
"step": 1670
|
12791 |
+
},
|
12792 |
+
{
|
12793 |
+
"epoch": 1.9364458921462178,
|
12794 |
+
"grad_norm": 47.60965224972502,
|
12795 |
+
"learning_rate": 5.437787020100115e-10,
|
12796 |
+
"logits/chosen": -1.2086517810821533,
|
12797 |
+
"logits/rejected": -1.1928253173828125,
|
12798 |
+
"logps/chosen": -162.096923828125,
|
12799 |
+
"logps/rejected": -178.24951171875,
|
12800 |
+
"loss": 0.3871,
|
12801 |
+
"rewards/accuracies": 0.875,
|
12802 |
+
"rewards/chosen": -0.37640607357025146,
|
12803 |
+
"rewards/margins": 1.857001781463623,
|
12804 |
+
"rewards/rejected": -2.233407974243164,
|
12805 |
+
"step": 1672
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 1.9387622149837134,
|
12809 |
+
"grad_norm": 51.270284022735154,
|
12810 |
+
"learning_rate": 5.042779840380595e-10,
|
12811 |
+
"logits/chosen": -1.2238942384719849,
|
12812 |
+
"logits/rejected": -1.195109486579895,
|
12813 |
+
"logps/chosen": -119.4543685913086,
|
12814 |
+
"logps/rejected": -133.65127563476562,
|
12815 |
+
"loss": 0.4054,
|
12816 |
+
"rewards/accuracies": 0.65625,
|
12817 |
+
"rewards/chosen": -0.3657826781272888,
|
12818 |
+
"rewards/margins": 0.8839918375015259,
|
12819 |
+
"rewards/rejected": -1.249774694442749,
|
12820 |
+
"step": 1674
|
12821 |
+
},
|
12822 |
+
{
|
12823 |
+
"epoch": 1.941078537821209,
|
12824 |
+
"grad_norm": 58.278892064452975,
|
12825 |
+
"learning_rate": 4.662629366406601e-10,
|
12826 |
+
"logits/chosen": -1.2001346349716187,
|
12827 |
+
"logits/rejected": -1.1202467679977417,
|
12828 |
+
"logps/chosen": -128.72256469726562,
|
12829 |
+
"logps/rejected": -139.1036376953125,
|
12830 |
+
"loss": 0.4051,
|
12831 |
+
"rewards/accuracies": 0.8125,
|
12832 |
+
"rewards/chosen": -0.27061766386032104,
|
12833 |
+
"rewards/margins": 1.1671736240386963,
|
12834 |
+
"rewards/rejected": -1.437791347503662,
|
12835 |
+
"step": 1676
|
12836 |
+
},
|
12837 |
+
{
|
12838 |
+
"epoch": 1.9433948606587044,
|
12839 |
+
"grad_norm": 46.614126047623216,
|
12840 |
+
"learning_rate": 4.2973412745864744e-10,
|
12841 |
+
"logits/chosen": -1.167816162109375,
|
12842 |
+
"logits/rejected": -1.1992610692977905,
|
12843 |
+
"logps/chosen": -129.0093536376953,
|
12844 |
+
"logps/rejected": -169.50445556640625,
|
12845 |
+
"loss": 0.3959,
|
12846 |
+
"rewards/accuracies": 0.96875,
|
12847 |
+
"rewards/chosen": -0.2619994878768921,
|
12848 |
+
"rewards/margins": 1.6201242208480835,
|
12849 |
+
"rewards/rejected": -1.8821238279342651,
|
12850 |
+
"step": 1678
|
12851 |
+
},
|
12852 |
+
{
|
12853 |
+
"epoch": 1.9457111834961998,
|
12854 |
+
"grad_norm": 75.8267220593344,
|
12855 |
+
"learning_rate": 3.946921019403859e-10,
|
12856 |
+
"logits/chosen": -1.2351601123809814,
|
12857 |
+
"logits/rejected": -1.3069424629211426,
|
12858 |
+
"logps/chosen": -130.69961547851562,
|
12859 |
+
"logps/rejected": -150.25662231445312,
|
12860 |
+
"loss": 0.4755,
|
12861 |
+
"rewards/accuracies": 0.59375,
|
12862 |
+
"rewards/chosen": -0.33375632762908936,
|
12863 |
+
"rewards/margins": 0.7270826697349548,
|
12864 |
+
"rewards/rejected": -1.0608389377593994,
|
12865 |
+
"step": 1680
|
12866 |
+
},
|
12867 |
+
{
|
12868 |
+
"epoch": 1.9480275063336951,
|
12869 |
+
"grad_norm": 57.21603903329357,
|
12870 |
+
"learning_rate": 3.61137383333554e-10,
|
12871 |
+
"logits/chosen": -1.1741724014282227,
|
12872 |
+
"logits/rejected": -1.158752679824829,
|
12873 |
+
"logps/chosen": -158.3549346923828,
|
12874 |
+
"logps/rejected": -179.17391967773438,
|
12875 |
+
"loss": 0.393,
|
12876 |
+
"rewards/accuracies": 0.75,
|
12877 |
+
"rewards/chosen": -0.5816279649734497,
|
12878 |
+
"rewards/margins": 1.2226191759109497,
|
12879 |
+
"rewards/rejected": -1.8042471408843994,
|
12880 |
+
"step": 1682
|
12881 |
+
},
|
12882 |
+
{
|
12883 |
+
"epoch": 1.9503438291711908,
|
12884 |
+
"grad_norm": 56.808299639154505,
|
12885 |
+
"learning_rate": 3.2907047267736186e-10,
|
12886 |
+
"logits/chosen": -1.240709900856018,
|
12887 |
+
"logits/rejected": -1.223825454711914,
|
12888 |
+
"logps/chosen": -163.18728637695312,
|
12889 |
+
"logps/rejected": -198.8317413330078,
|
12890 |
+
"loss": 0.3877,
|
12891 |
+
"rewards/accuracies": 0.78125,
|
12892 |
+
"rewards/chosen": -0.28957706689834595,
|
12893 |
+
"rewards/margins": 1.6465396881103516,
|
12894 |
+
"rewards/rejected": -1.9361168146133423,
|
12895 |
+
"step": 1684
|
12896 |
+
},
|
12897 |
+
{
|
12898 |
+
"epoch": 1.9526601520086864,
|
12899 |
+
"grad_norm": 52.80591879977487,
|
12900 |
+
"learning_rate": 2.9849184879506827e-10,
|
12901 |
+
"logits/chosen": -1.1730728149414062,
|
12902 |
+
"logits/rejected": -1.1454265117645264,
|
12903 |
+
"logps/chosen": -136.7293701171875,
|
12904 |
+
"logps/rejected": -151.21914672851562,
|
12905 |
+
"loss": 0.3944,
|
12906 |
+
"rewards/accuracies": 0.78125,
|
12907 |
+
"rewards/chosen": -0.25426185131073,
|
12908 |
+
"rewards/margins": 1.232313871383667,
|
12909 |
+
"rewards/rejected": -1.4865756034851074,
|
12910 |
+
"step": 1686
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 1.9549764748461818,
|
12914 |
+
"grad_norm": 75.29247959139558,
|
12915 |
+
"learning_rate": 2.6940196828681983e-10,
|
12916 |
+
"logits/chosen": -1.093564510345459,
|
12917 |
+
"logits/rejected": -1.0963504314422607,
|
12918 |
+
"logps/chosen": -189.71131896972656,
|
12919 |
+
"logps/rejected": -234.08973693847656,
|
12920 |
+
"loss": 0.4931,
|
12921 |
+
"rewards/accuracies": 0.78125,
|
12922 |
+
"rewards/chosen": -1.0700315237045288,
|
12923 |
+
"rewards/margins": 1.948075532913208,
|
12924 |
+
"rewards/rejected": -3.0181069374084473,
|
12925 |
+
"step": 1688
|
12926 |
+
},
|
12927 |
+
{
|
12928 |
+
"epoch": 1.9572927976836771,
|
12929 |
+
"grad_norm": 70.12384040466765,
|
12930 |
+
"learning_rate": 2.418012655228452e-10,
|
12931 |
+
"logits/chosen": -1.2431126832962036,
|
12932 |
+
"logits/rejected": -1.2657066583633423,
|
12933 |
+
"logps/chosen": -99.85934448242188,
|
12934 |
+
"logps/rejected": -121.7491226196289,
|
12935 |
+
"loss": 0.426,
|
12936 |
+
"rewards/accuracies": 0.75,
|
12937 |
+
"rewards/chosen": -0.3242354393005371,
|
12938 |
+
"rewards/margins": 0.8404097557067871,
|
12939 |
+
"rewards/rejected": -1.1646450757980347,
|
12940 |
+
"step": 1690
|
12941 |
+
},
|
12942 |
+
{
|
12943 |
+
"epoch": 1.9596091205211725,
|
12944 |
+
"grad_norm": 50.23873116829206,
|
12945 |
+
"learning_rate": 2.1569015263697143e-10,
|
12946 |
+
"logits/chosen": -1.2286624908447266,
|
12947 |
+
"logits/rejected": -1.2570605278015137,
|
12948 |
+
"logps/chosen": -145.4255828857422,
|
12949 |
+
"logps/rejected": -194.1035919189453,
|
12950 |
+
"loss": 0.4198,
|
12951 |
+
"rewards/accuracies": 0.90625,
|
12952 |
+
"rewards/chosen": -0.5209024548530579,
|
12953 |
+
"rewards/margins": 1.7652302980422974,
|
12954 |
+
"rewards/rejected": -2.286133050918579,
|
12955 |
+
"step": 1692
|
12956 |
+
},
|
12957 |
+
{
|
12958 |
+
"epoch": 1.9619254433586681,
|
12959 |
+
"grad_norm": 54.25222586547325,
|
12960 |
+
"learning_rate": 1.9106901952045119e-10,
|
12961 |
+
"logits/chosen": -1.2050321102142334,
|
12962 |
+
"logits/rejected": -1.2619915008544922,
|
12963 |
+
"logps/chosen": -179.3286590576172,
|
12964 |
+
"logps/rejected": -227.4953155517578,
|
12965 |
+
"loss": 0.4315,
|
12966 |
+
"rewards/accuracies": 0.9375,
|
12967 |
+
"rewards/chosen": -0.5577185750007629,
|
12968 |
+
"rewards/margins": 1.80524742603302,
|
12969 |
+
"rewards/rejected": -2.3629660606384277,
|
12970 |
+
"step": 1694
|
12971 |
+
},
|
12972 |
+
{
|
12973 |
+
"epoch": 1.9642417661961638,
|
12974 |
+
"grad_norm": 61.997046360221496,
|
12975 |
+
"learning_rate": 1.6793823381614501e-10,
|
12976 |
+
"logits/chosen": -1.294581651687622,
|
12977 |
+
"logits/rejected": -1.247463583946228,
|
12978 |
+
"logps/chosen": -144.93246459960938,
|
12979 |
+
"logps/rejected": -168.00628662109375,
|
12980 |
+
"loss": 0.444,
|
12981 |
+
"rewards/accuracies": 0.9375,
|
12982 |
+
"rewards/chosen": -0.0931825190782547,
|
12983 |
+
"rewards/margins": 1.4015557765960693,
|
12984 |
+
"rewards/rejected": -1.4947383403778076,
|
12985 |
+
"step": 1696
|
12986 |
+
},
|
12987 |
+
{
|
12988 |
+
"epoch": 1.9665580890336591,
|
12989 |
+
"grad_norm": 82.5016461876686,
|
12990 |
+
"learning_rate": 1.4629814091307036e-10,
|
12991 |
+
"logits/chosen": -1.2317255735397339,
|
12992 |
+
"logits/rejected": -1.2456412315368652,
|
12993 |
+
"logps/chosen": -156.1422576904297,
|
12994 |
+
"logps/rejected": -151.87107849121094,
|
12995 |
+
"loss": 0.417,
|
12996 |
+
"rewards/accuracies": 0.75,
|
12997 |
+
"rewards/chosen": -0.4730032682418823,
|
12998 |
+
"rewards/margins": 1.0971145629882812,
|
12999 |
+
"rewards/rejected": -1.570117712020874,
|
13000 |
+
"step": 1698
|
13001 |
+
},
|
13002 |
+
{
|
13003 |
+
"epoch": 1.9688744118711545,
|
13004 |
+
"grad_norm": 58.642096283997354,
|
13005 |
+
"learning_rate": 1.261490639411833e-10,
|
13006 |
+
"logits/chosen": -1.1768873929977417,
|
13007 |
+
"logits/rejected": -1.2999684810638428,
|
13008 |
+
"logps/chosen": -109.31826782226562,
|
13009 |
+
"logps/rejected": -134.15371704101562,
|
13010 |
+
"loss": 0.3986,
|
13011 |
+
"rewards/accuracies": 0.78125,
|
13012 |
+
"rewards/chosen": -0.2187974750995636,
|
13013 |
+
"rewards/margins": 1.001755714416504,
|
13014 |
+
"rewards/rejected": -1.2205531597137451,
|
13015 |
+
"step": 1700
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 1.9688744118711545,
|
13019 |
+
"eval_logits/chosen": -1.215119481086731,
|
13020 |
+
"eval_logits/rejected": -1.2099292278289795,
|
13021 |
+
"eval_logps/chosen": -144.09429931640625,
|
13022 |
+
"eval_logps/rejected": -149.15176391601562,
|
13023 |
+
"eval_loss": 0.5949785113334656,
|
13024 |
+
"eval_rewards/accuracies": 0.7599999904632568,
|
13025 |
+
"eval_rewards/chosen": -0.8463126420974731,
|
13026 |
+
"eval_rewards/margins": 0.6735073924064636,
|
13027 |
+
"eval_rewards/rejected": -1.5198200941085815,
|
13028 |
+
"eval_runtime": 22.9886,
|
13029 |
+
"eval_samples_per_second": 4.35,
|
13030 |
+
"eval_steps_per_second": 1.087,
|
13031 |
+
"step": 1700
|
13032 |
+
},
|
13033 |
+
{
|
13034 |
+
"epoch": 1.97119073470865,
|
13035 |
+
"grad_norm": 55.677396344114726,
|
13036 |
+
"learning_rate": 1.0749130376659366e-10,
|
13037 |
+
"logits/chosen": -1.2230623960494995,
|
13038 |
+
"logits/rejected": -1.163780689239502,
|
13039 |
+
"logps/chosen": -164.1004638671875,
|
13040 |
+
"logps/rejected": -179.64088439941406,
|
13041 |
+
"loss": 0.3997,
|
13042 |
+
"rewards/accuracies": 0.84375,
|
13043 |
+
"rewards/chosen": -0.1451684981584549,
|
13044 |
+
"rewards/margins": 1.5081610679626465,
|
13045 |
+
"rewards/rejected": -1.653329610824585,
|
13046 |
+
"step": 1702
|
13047 |
+
},
|
13048 |
+
{
|
13049 |
+
"epoch": 1.9735070575461455,
|
13050 |
+
"grad_norm": 71.96257699524784,
|
13051 |
+
"learning_rate": 9.032513898705741e-11,
|
13052 |
+
"logits/chosen": -1.2779675722122192,
|
13053 |
+
"logits/rejected": -1.2883471250534058,
|
13054 |
+
"logps/chosen": -133.36380004882812,
|
13055 |
+
"logps/rejected": -153.81741333007812,
|
13056 |
+
"loss": 0.467,
|
13057 |
+
"rewards/accuracies": 0.8125,
|
13058 |
+
"rewards/chosen": -0.13987727463245392,
|
13059 |
+
"rewards/margins": 1.16806161403656,
|
13060 |
+
"rewards/rejected": -1.307938814163208,
|
13061 |
+
"step": 1704
|
13062 |
+
},
|
13063 |
+
{
|
13064 |
+
"epoch": 1.975823380383641,
|
13065 |
+
"grad_norm": 49.123608283002156,
|
13066 |
+
"learning_rate": 7.465082592782445e-11,
|
13067 |
+
"logits/chosen": -1.207802414894104,
|
13068 |
+
"logits/rejected": -1.1620241403579712,
|
13069 |
+
"logps/chosen": -164.59759521484375,
|
13070 |
+
"logps/rejected": -208.24905395507812,
|
13071 |
+
"loss": 0.3896,
|
13072 |
+
"rewards/accuracies": 0.90625,
|
13073 |
+
"rewards/chosen": -0.5080645084381104,
|
13074 |
+
"rewards/margins": 2.8074049949645996,
|
13075 |
+
"rewards/rejected": -3.315469980239868,
|
13076 |
+
"step": 1706
|
13077 |
+
},
|
13078 |
+
{
|
13079 |
+
"epoch": 1.9781397032211365,
|
13080 |
+
"grad_norm": 51.95036099086183,
|
13081 |
+
"learning_rate": 6.04685986378195e-11,
|
13082 |
+
"logits/chosen": -1.25301992893219,
|
13083 |
+
"logits/rejected": -1.2303074598312378,
|
13084 |
+
"logps/chosen": -160.34934997558594,
|
13085 |
+
"logps/rejected": -162.81515502929688,
|
13086 |
+
"loss": 0.3678,
|
13087 |
+
"rewards/accuracies": 0.875,
|
13088 |
+
"rewards/chosen": -0.21462872624397278,
|
13089 |
+
"rewards/margins": 1.2838810682296753,
|
13090 |
+
"rewards/rejected": -1.4985097646713257,
|
13091 |
+
"step": 1708
|
13092 |
+
},
|
13093 |
+
{
|
13094 |
+
"epoch": 1.980456026058632,
|
13095 |
+
"grad_norm": 55.67392221664361,
|
13096 |
+
"learning_rate": 4.777866888611148e-11,
|
13097 |
+
"logits/chosen": -1.1367592811584473,
|
13098 |
+
"logits/rejected": -1.2029287815093994,
|
13099 |
+
"logps/chosen": -170.26568603515625,
|
13100 |
+
"logps/rejected": -203.4815216064453,
|
13101 |
+
"loss": 0.3303,
|
13102 |
+
"rewards/accuracies": 0.90625,
|
13103 |
+
"rewards/chosen": -0.21711598336696625,
|
13104 |
+
"rewards/margins": 1.6773165464401245,
|
13105 |
+
"rewards/rejected": -1.8944324254989624,
|
13106 |
+
"step": 1710
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 1.9827723488961273,
|
13110 |
+
"grad_norm": 77.61747497941103,
|
13111 |
+
"learning_rate": 3.658122615880499e-11,
|
13112 |
+
"logits/chosen": -1.198671817779541,
|
13113 |
+
"logits/rejected": -1.1844216585159302,
|
13114 |
+
"logps/chosen": -188.25486755371094,
|
13115 |
+
"logps/rejected": -188.4522247314453,
|
13116 |
+
"loss": 0.4122,
|
13117 |
+
"rewards/accuracies": 0.75,
|
13118 |
+
"rewards/chosen": -0.35006386041641235,
|
13119 |
+
"rewards/margins": 1.136628270149231,
|
13120 |
+
"rewards/rejected": -1.4866920709609985,
|
13121 |
+
"step": 1712
|
13122 |
+
},
|
13123 |
+
{
|
13124 |
+
"epoch": 1.985088671733623,
|
13125 |
+
"grad_norm": 72.68985609286901,
|
13126 |
+
"learning_rate": 2.687643765615366e-11,
|
13127 |
+
"logits/chosen": -1.2630504369735718,
|
13128 |
+
"logits/rejected": -1.1126054525375366,
|
13129 |
+
"logps/chosen": -170.22010803222656,
|
13130 |
+
"logps/rejected": -162.9827880859375,
|
13131 |
+
"loss": 0.4154,
|
13132 |
+
"rewards/accuracies": 0.8125,
|
13133 |
+
"rewards/chosen": -0.1824585199356079,
|
13134 |
+
"rewards/margins": 1.2966349124908447,
|
13135 |
+
"rewards/rejected": -1.479093313217163,
|
13136 |
+
"step": 1714
|
13137 |
+
},
|
13138 |
+
{
|
13139 |
+
"epoch": 1.9874049945711183,
|
13140 |
+
"grad_norm": 57.324360587243646,
|
13141 |
+
"learning_rate": 1.8664448290106606e-11,
|
13142 |
+
"logits/chosen": -1.1106977462768555,
|
13143 |
+
"logits/rejected": -1.1625827550888062,
|
13144 |
+
"logps/chosen": -127.45535278320312,
|
13145 |
+
"logps/rejected": -168.37313842773438,
|
13146 |
+
"loss": 0.4271,
|
13147 |
+
"rewards/accuracies": 0.90625,
|
13148 |
+
"rewards/chosen": -0.41478192806243896,
|
13149 |
+
"rewards/margins": 1.4449265003204346,
|
13150 |
+
"rewards/rejected": -1.8597084283828735,
|
13151 |
+
"step": 1716
|
13152 |
+
},
|
13153 |
+
{
|
13154 |
+
"epoch": 1.989721317408614,
|
13155 |
+
"grad_norm": 64.91297510204477,
|
13156 |
+
"learning_rate": 1.1945380682132355e-11,
|
13157 |
+
"logits/chosen": -1.3344897031784058,
|
13158 |
+
"logits/rejected": -1.3637080192565918,
|
13159 |
+
"logps/chosen": -155.83399963378906,
|
13160 |
+
"logps/rejected": -174.8062744140625,
|
13161 |
+
"loss": 0.4455,
|
13162 |
+
"rewards/accuracies": 0.8125,
|
13163 |
+
"rewards/chosen": -0.2664361596107483,
|
13164 |
+
"rewards/margins": 1.3320696353912354,
|
13165 |
+
"rewards/rejected": -1.5985058546066284,
|
13166 |
+
"step": 1718
|
13167 |
+
},
|
13168 |
+
{
|
13169 |
+
"epoch": 1.9920376402461093,
|
13170 |
+
"grad_norm": 55.133928870906985,
|
13171 |
+
"learning_rate": 6.719335161364803e-12,
|
13172 |
+
"logits/chosen": -1.210727334022522,
|
13173 |
+
"logits/rejected": -1.2122191190719604,
|
13174 |
+
"logps/chosen": -138.89447021484375,
|
13175 |
+
"logps/rejected": -185.5248565673828,
|
13176 |
+
"loss": 0.407,
|
13177 |
+
"rewards/accuracies": 0.84375,
|
13178 |
+
"rewards/chosen": -0.43058472871780396,
|
13179 |
+
"rewards/margins": 2.1915080547332764,
|
13180 |
+
"rewards/rejected": -2.6220927238464355,
|
13181 |
+
"step": 1720
|
13182 |
+
},
|
13183 |
+
{
|
13184 |
+
"epoch": 1.9943539630836047,
|
13185 |
+
"grad_norm": 59.1945232431432,
|
13186 |
+
"learning_rate": 2.9863897631488e-12,
|
13187 |
+
"logits/chosen": -1.1495387554168701,
|
13188 |
+
"logits/rejected": -1.2423110008239746,
|
13189 |
+
"logps/chosen": -168.22439575195312,
|
13190 |
+
"logps/rejected": -205.48471069335938,
|
13191 |
+
"loss": 0.3821,
|
13192 |
+
"rewards/accuracies": 0.875,
|
13193 |
+
"rewards/chosen": -0.808469295501709,
|
13194 |
+
"rewards/margins": 1.811312198638916,
|
13195 |
+
"rewards/rejected": -2.619781494140625,
|
13196 |
+
"step": 1722
|
13197 |
+
},
|
13198 |
+
{
|
13199 |
+
"epoch": 1.9966702859211003,
|
13200 |
+
"grad_norm": 50.35911703570131,
|
13201 |
+
"learning_rate": 7.466002278522232e-13,
|
13202 |
+
"logits/chosen": -1.1888153553009033,
|
13203 |
+
"logits/rejected": -1.1903085708618164,
|
13204 |
+
"logps/chosen": -183.95315551757812,
|
13205 |
+
"logps/rejected": -213.3731689453125,
|
13206 |
+
"loss": 0.3585,
|
13207 |
+
"rewards/accuracies": 0.90625,
|
13208 |
+
"rewards/chosen": -0.34876811504364014,
|
13209 |
+
"rewards/margins": 1.8841259479522705,
|
13210 |
+
"rewards/rejected": -2.232893943786621,
|
13211 |
+
"step": 1724
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 1.9989866087585957,
|
13215 |
+
"grad_norm": 63.15894144617586,
|
13216 |
+
"learning_rate": 0.0,
|
13217 |
+
"logits/chosen": -1.270525574684143,
|
13218 |
+
"logits/rejected": -1.1932576894760132,
|
13219 |
+
"logps/chosen": -133.14906311035156,
|
13220 |
+
"logps/rejected": -160.24928283691406,
|
13221 |
+
"loss": 0.4361,
|
13222 |
+
"rewards/accuracies": 0.875,
|
13223 |
+
"rewards/chosen": -0.38358262181282043,
|
13224 |
+
"rewards/margins": 1.584211826324463,
|
13225 |
+
"rewards/rejected": -1.967794418334961,
|
13226 |
+
"step": 1726
|
13227 |
}
|
13228 |
],
|
13229 |
"logging_steps": 2,
|
|
|
13238 |
"should_evaluate": false,
|
13239 |
"should_log": false,
|
13240 |
"should_save": true,
|
13241 |
+
"should_training_stop": true
|
13242 |
},
|
13243 |
"attributes": {}
|
13244 |
}
|