Training in progress, step 600, checkpoint
Browse files- last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2300 -2
last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc8db81f9ddaa29cae59ac5881d092fda01db995e0f4da172a5db08a35bf3d56
|
3 |
+
size 24090788996
|
last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfb7cf1637269ec43452f1226ff60ceca7e40a620762f2be21ea5790b95de817
|
3 |
+
size 24090788996
|
last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19679cf0f319b67f5f4781a251d3d3b189ba741e42dd93fa0b69838c528b5804
|
3 |
+
size 24090788996
|
last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b57822d3cbbb7035813723d94a43cdbe6ff635b9d800a7a1445860e28eaff6d
|
3 |
+
size 24090788996
|
last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92080d92f7039ba9a28ee0207869fee3670869dc8f88ff1c586400224153afc1
|
3 |
+
size 150693
|
last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2adaa5dad1fb21d06dd1448a46414244004297e4538dc79e31541160fb413a29
|
3 |
+
size 150693
|
last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e100a2fb0ad2e0b16513edb6bb5e410375b4900628822bec6b2e89cd0a4b6eab
|
3 |
+
size 150693
|
last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f100eb6452bbb9cf518d9995cea742dd60e64f07f51aceb70c76caef63912b63
|
3 |
+
size 150693
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step600
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48948a0d9104661249099a4014490ab23bd0ad9a31f6471dee9f9f094a3d63bf
|
3 |
size 4976698672
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b24fa74f6060bb886c4ad908a9e1cbc5b1d32700a032f508c04604951a8181f
|
3 |
size 4999802720
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f1fb4fda3abf0c7433b1013f78feb462c78a1d36b3a3336e8e2ed0f8c341706
|
3 |
size 4915916176
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a42d0ba56615ac5b6ca669d046a3ac05a8adeee3b01ebef26cde18b002e96c8
|
3 |
size 1168138808
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef002048764051a71fb00f8f978e9ec32b780dc850bdb059af362cc56494234b
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37194a6d48612e1a46a2d5d317ead97c70d9fc4569b0118fcd5f84c3dc9daa5a
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17c179483659a784aa1ace2427daff48c556a6bcc3c330e6f3274e4dc95e4b49
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b56857c9b117629f35af2c3d64f522d33a9d8aa94faa81ec6956380a895118c4
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d8f9b18fc83c21830420fd2e6d55afd183068e9c7f7ec7447233ce473235b6a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2305,6 +2305,2304 @@
|
|
2305 |
"eval_samples_per_second": 4.06,
|
2306 |
"eval_steps_per_second": 1.015,
|
2307 |
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2308 |
}
|
2309 |
],
|
2310 |
"logging_steps": 2,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6948968512486428,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 600,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2305 |
"eval_samples_per_second": 4.06,
|
2306 |
"eval_steps_per_second": 1.015,
|
2307 |
"step": 300
|
2308 |
+
},
|
2309 |
+
{
|
2310 |
+
"epoch": 0.3497647484618169,
|
2311 |
+
"grad_norm": 79.77982199610355,
|
2312 |
+
"learning_rate": 1.9248010649538775e-07,
|
2313 |
+
"logits/chosen": -1.3689723014831543,
|
2314 |
+
"logits/rejected": -1.4326424598693848,
|
2315 |
+
"logps/chosen": -186.77511596679688,
|
2316 |
+
"logps/rejected": -194.57289123535156,
|
2317 |
+
"loss": 0.677,
|
2318 |
+
"rewards/accuracies": 0.625,
|
2319 |
+
"rewards/chosen": 0.07687507569789886,
|
2320 |
+
"rewards/margins": 0.245405375957489,
|
2321 |
+
"rewards/rejected": -0.16853031516075134,
|
2322 |
+
"step": 302
|
2323 |
+
},
|
2324 |
+
{
|
2325 |
+
"epoch": 0.3520810712993123,
|
2326 |
+
"grad_norm": 72.83330769963189,
|
2327 |
+
"learning_rate": 1.923324026065944e-07,
|
2328 |
+
"logits/chosen": -1.2983791828155518,
|
2329 |
+
"logits/rejected": -1.301888108253479,
|
2330 |
+
"logps/chosen": -96.0470199584961,
|
2331 |
+
"logps/rejected": -113.31134796142578,
|
2332 |
+
"loss": 0.6028,
|
2333 |
+
"rewards/accuracies": 0.625,
|
2334 |
+
"rewards/chosen": 0.10890144109725952,
|
2335 |
+
"rewards/margins": 0.2682499885559082,
|
2336 |
+
"rewards/rejected": -0.1593485325574875,
|
2337 |
+
"step": 304
|
2338 |
+
},
|
2339 |
+
{
|
2340 |
+
"epoch": 0.3543973941368078,
|
2341 |
+
"grad_norm": 100.14984360245958,
|
2342 |
+
"learning_rate": 1.9218332000994458e-07,
|
2343 |
+
"logits/chosen": -1.4329365491867065,
|
2344 |
+
"logits/rejected": -1.4993162155151367,
|
2345 |
+
"logps/chosen": -186.0762176513672,
|
2346 |
+
"logps/rejected": -214.90533447265625,
|
2347 |
+
"loss": 0.6058,
|
2348 |
+
"rewards/accuracies": 0.65625,
|
2349 |
+
"rewards/chosen": 0.03735332563519478,
|
2350 |
+
"rewards/margins": 0.3314560055732727,
|
2351 |
+
"rewards/rejected": -0.2941026985645294,
|
2352 |
+
"step": 306
|
2353 |
+
},
|
2354 |
+
{
|
2355 |
+
"epoch": 0.3567137169743033,
|
2356 |
+
"grad_norm": 72.19620109844895,
|
2357 |
+
"learning_rate": 1.9203286093154026e-07,
|
2358 |
+
"logits/chosen": -1.2941675186157227,
|
2359 |
+
"logits/rejected": -1.259239673614502,
|
2360 |
+
"logps/chosen": -109.15111541748047,
|
2361 |
+
"logps/rejected": -109.48808288574219,
|
2362 |
+
"loss": 0.6171,
|
2363 |
+
"rewards/accuracies": 0.59375,
|
2364 |
+
"rewards/chosen": 0.24576213955879211,
|
2365 |
+
"rewards/margins": 0.18151941895484924,
|
2366 |
+
"rewards/rejected": 0.06424272805452347,
|
2367 |
+
"step": 308
|
2368 |
+
},
|
2369 |
+
{
|
2370 |
+
"epoch": 0.35903003981179876,
|
2371 |
+
"grad_norm": 106.11628012062671,
|
2372 |
+
"learning_rate": 1.9188102761803715e-07,
|
2373 |
+
"logits/chosen": -1.4155701398849487,
|
2374 |
+
"logits/rejected": -1.469191312789917,
|
2375 |
+
"logps/chosen": -192.42648315429688,
|
2376 |
+
"logps/rejected": -190.7394561767578,
|
2377 |
+
"loss": 0.6785,
|
2378 |
+
"rewards/accuracies": 0.65625,
|
2379 |
+
"rewards/chosen": -0.15398849546909332,
|
2380 |
+
"rewards/margins": 0.36678701639175415,
|
2381 |
+
"rewards/rejected": -0.5207754373550415,
|
2382 |
+
"step": 310
|
2383 |
+
},
|
2384 |
+
{
|
2385 |
+
"epoch": 0.36134636264929426,
|
2386 |
+
"grad_norm": 70.19369769339828,
|
2387 |
+
"learning_rate": 1.9172782233661094e-07,
|
2388 |
+
"logits/chosen": -1.254553198814392,
|
2389 |
+
"logits/rejected": -1.1699531078338623,
|
2390 |
+
"logps/chosen": -127.07028198242188,
|
2391 |
+
"logps/rejected": -145.60787963867188,
|
2392 |
+
"loss": 0.6158,
|
2393 |
+
"rewards/accuracies": 0.6875,
|
2394 |
+
"rewards/chosen": 0.17219696938991547,
|
2395 |
+
"rewards/margins": 0.5162093639373779,
|
2396 |
+
"rewards/rejected": -0.34401237964630127,
|
2397 |
+
"step": 312
|
2398 |
+
},
|
2399 |
+
{
|
2400 |
+
"epoch": 0.3636626854867897,
|
2401 |
+
"grad_norm": 90.77874249334138,
|
2402 |
+
"learning_rate": 1.915732473749236e-07,
|
2403 |
+
"logits/chosen": -1.2084178924560547,
|
2404 |
+
"logits/rejected": -1.1874415874481201,
|
2405 |
+
"logps/chosen": -166.15135192871094,
|
2406 |
+
"logps/rejected": -176.3106231689453,
|
2407 |
+
"loss": 0.629,
|
2408 |
+
"rewards/accuracies": 0.59375,
|
2409 |
+
"rewards/chosen": 0.1258632242679596,
|
2410 |
+
"rewards/margins": 0.4471869468688965,
|
2411 |
+
"rewards/rejected": -0.3213237524032593,
|
2412 |
+
"step": 314
|
2413 |
+
},
|
2414 |
+
{
|
2415 |
+
"epoch": 0.3659790083242852,
|
2416 |
+
"grad_norm": 78.58822831789924,
|
2417 |
+
"learning_rate": 1.914173050410892e-07,
|
2418 |
+
"logits/chosen": -1.3010833263397217,
|
2419 |
+
"logits/rejected": -1.3981972932815552,
|
2420 |
+
"logps/chosen": -94.9105453491211,
|
2421 |
+
"logps/rejected": -108.66416931152344,
|
2422 |
+
"loss": 0.6266,
|
2423 |
+
"rewards/accuracies": 0.65625,
|
2424 |
+
"rewards/chosen": 0.15446007251739502,
|
2425 |
+
"rewards/margins": 0.30908384919166565,
|
2426 |
+
"rewards/rejected": -0.15462377667427063,
|
2427 |
+
"step": 316
|
2428 |
+
},
|
2429 |
+
{
|
2430 |
+
"epoch": 0.36829533116178065,
|
2431 |
+
"grad_norm": 80.5668477187345,
|
2432 |
+
"learning_rate": 1.9125999766363932e-07,
|
2433 |
+
"logits/chosen": -1.4468637704849243,
|
2434 |
+
"logits/rejected": -1.4837853908538818,
|
2435 |
+
"logps/chosen": -121.05176544189453,
|
2436 |
+
"logps/rejected": -132.3884735107422,
|
2437 |
+
"loss": 0.6187,
|
2438 |
+
"rewards/accuracies": 0.5625,
|
2439 |
+
"rewards/chosen": 0.1501280963420868,
|
2440 |
+
"rewards/margins": 0.14075569808483124,
|
2441 |
+
"rewards/rejected": 0.009372413158416748,
|
2442 |
+
"step": 318
|
2443 |
+
},
|
2444 |
+
{
|
2445 |
+
"epoch": 0.37061165399927615,
|
2446 |
+
"grad_norm": 90.09297915942425,
|
2447 |
+
"learning_rate": 1.9110132759148843e-07,
|
2448 |
+
"logits/chosen": -1.239458680152893,
|
2449 |
+
"logits/rejected": -1.2513267993927002,
|
2450 |
+
"logps/chosen": -119.19309997558594,
|
2451 |
+
"logps/rejected": -135.55023193359375,
|
2452 |
+
"loss": 0.6107,
|
2453 |
+
"rewards/accuracies": 0.71875,
|
2454 |
+
"rewards/chosen": -0.07714903354644775,
|
2455 |
+
"rewards/margins": 0.40861696004867554,
|
2456 |
+
"rewards/rejected": -0.4857659935951233,
|
2457 |
+
"step": 320
|
2458 |
+
},
|
2459 |
+
{
|
2460 |
+
"epoch": 0.37292797683677165,
|
2461 |
+
"grad_norm": 130.3488780136265,
|
2462 |
+
"learning_rate": 1.9094129719389885e-07,
|
2463 |
+
"logits/chosen": -1.3481711149215698,
|
2464 |
+
"logits/rejected": -1.328981637954712,
|
2465 |
+
"logps/chosen": -192.10084533691406,
|
2466 |
+
"logps/rejected": -214.688720703125,
|
2467 |
+
"loss": 0.635,
|
2468 |
+
"rewards/accuracies": 0.625,
|
2469 |
+
"rewards/chosen": -0.4045405685901642,
|
2470 |
+
"rewards/margins": 0.6289528012275696,
|
2471 |
+
"rewards/rejected": -1.0334933996200562,
|
2472 |
+
"step": 322
|
2473 |
+
},
|
2474 |
+
{
|
2475 |
+
"epoch": 0.3752442996742671,
|
2476 |
+
"grad_norm": 86.21776365076336,
|
2477 |
+
"learning_rate": 1.907799088604451e-07,
|
2478 |
+
"logits/chosen": -1.1944794654846191,
|
2479 |
+
"logits/rejected": -1.154435157775879,
|
2480 |
+
"logps/chosen": -86.31254577636719,
|
2481 |
+
"logps/rejected": -97.8081283569336,
|
2482 |
+
"loss": 0.6424,
|
2483 |
+
"rewards/accuracies": 0.6875,
|
2484 |
+
"rewards/chosen": 0.0629437267780304,
|
2485 |
+
"rewards/margins": 0.25958341360092163,
|
2486 |
+
"rewards/rejected": -0.19663970172405243,
|
2487 |
+
"step": 324
|
2488 |
+
},
|
2489 |
+
{
|
2490 |
+
"epoch": 0.3775606225117626,
|
2491 |
+
"grad_norm": 109.3152948358386,
|
2492 |
+
"learning_rate": 1.9061716500097862e-07,
|
2493 |
+
"logits/chosen": -1.3203986883163452,
|
2494 |
+
"logits/rejected": -1.3523664474487305,
|
2495 |
+
"logps/chosen": -152.81573486328125,
|
2496 |
+
"logps/rejected": -161.0247039794922,
|
2497 |
+
"loss": 0.6101,
|
2498 |
+
"rewards/accuracies": 0.53125,
|
2499 |
+
"rewards/chosen": -0.4936632812023163,
|
2500 |
+
"rewards/margins": 0.04809580743312836,
|
2501 |
+
"rewards/rejected": -0.5417591333389282,
|
2502 |
+
"step": 326
|
2503 |
+
},
|
2504 |
+
{
|
2505 |
+
"epoch": 0.37987694534925803,
|
2506 |
+
"grad_norm": 91.94400981611243,
|
2507 |
+
"learning_rate": 1.904530680455914e-07,
|
2508 |
+
"logits/chosen": -1.3758294582366943,
|
2509 |
+
"logits/rejected": -1.4080578088760376,
|
2510 |
+
"logps/chosen": -146.73672485351562,
|
2511 |
+
"logps/rejected": -145.2505645751953,
|
2512 |
+
"loss": 0.6278,
|
2513 |
+
"rewards/accuracies": 0.65625,
|
2514 |
+
"rewards/chosen": 0.012471210211515427,
|
2515 |
+
"rewards/margins": 0.4439522325992584,
|
2516 |
+
"rewards/rejected": -0.4314810335636139,
|
2517 |
+
"step": 328
|
2518 |
+
},
|
2519 |
+
{
|
2520 |
+
"epoch": 0.38219326818675353,
|
2521 |
+
"grad_norm": 105.04213501880093,
|
2522 |
+
"learning_rate": 1.9028762044457992e-07,
|
2523 |
+
"logits/chosen": -1.2461824417114258,
|
2524 |
+
"logits/rejected": -1.288218379020691,
|
2525 |
+
"logps/chosen": -126.72929382324219,
|
2526 |
+
"logps/rejected": -151.31341552734375,
|
2527 |
+
"loss": 0.6118,
|
2528 |
+
"rewards/accuracies": 0.6875,
|
2529 |
+
"rewards/chosen": 0.042198315262794495,
|
2530 |
+
"rewards/margins": 0.5073456764221191,
|
2531 |
+
"rewards/rejected": -0.46514737606048584,
|
2532 |
+
"step": 330
|
2533 |
+
},
|
2534 |
+
{
|
2535 |
+
"epoch": 0.38450959102424903,
|
2536 |
+
"grad_norm": 131.99962498687907,
|
2537 |
+
"learning_rate": 1.901208246684085e-07,
|
2538 |
+
"logits/chosen": -1.345144271850586,
|
2539 |
+
"logits/rejected": -1.3419792652130127,
|
2540 |
+
"logps/chosen": -138.4906768798828,
|
2541 |
+
"logps/rejected": -144.3926239013672,
|
2542 |
+
"loss": 0.6483,
|
2543 |
+
"rewards/accuracies": 0.625,
|
2544 |
+
"rewards/chosen": -0.3029904365539551,
|
2545 |
+
"rewards/margins": 0.3120897114276886,
|
2546 |
+
"rewards/rejected": -0.6150801181793213,
|
2547 |
+
"step": 332
|
2548 |
+
},
|
2549 |
+
{
|
2550 |
+
"epoch": 0.3868259138617445,
|
2551 |
+
"grad_norm": 192.23629969436513,
|
2552 |
+
"learning_rate": 1.8995268320767252e-07,
|
2553 |
+
"logits/chosen": -1.3834903240203857,
|
2554 |
+
"logits/rejected": -1.4057523012161255,
|
2555 |
+
"logps/chosen": -138.5772705078125,
|
2556 |
+
"logps/rejected": -148.1931915283203,
|
2557 |
+
"loss": 0.7789,
|
2558 |
+
"rewards/accuracies": 0.65625,
|
2559 |
+
"rewards/chosen": 0.0129515016451478,
|
2560 |
+
"rewards/margins": 0.49864012002944946,
|
2561 |
+
"rewards/rejected": -0.4856886565685272,
|
2562 |
+
"step": 334
|
2563 |
+
},
|
2564 |
+
{
|
2565 |
+
"epoch": 0.38914223669924,
|
2566 |
+
"grad_norm": 82.74631507246218,
|
2567 |
+
"learning_rate": 1.897831985730609e-07,
|
2568 |
+
"logits/chosen": -1.2497293949127197,
|
2569 |
+
"logits/rejected": -1.2685260772705078,
|
2570 |
+
"logps/chosen": -135.58956909179688,
|
2571 |
+
"logps/rejected": -166.16636657714844,
|
2572 |
+
"loss": 0.6435,
|
2573 |
+
"rewards/accuracies": 0.8125,
|
2574 |
+
"rewards/chosen": -0.062045883387327194,
|
2575 |
+
"rewards/margins": 0.9767952561378479,
|
2576 |
+
"rewards/rejected": -1.0388411283493042,
|
2577 |
+
"step": 336
|
2578 |
+
},
|
2579 |
+
{
|
2580 |
+
"epoch": 0.3914585595367354,
|
2581 |
+
"grad_norm": 97.75310342784691,
|
2582 |
+
"learning_rate": 1.896123732953191e-07,
|
2583 |
+
"logits/chosen": -1.2475745677947998,
|
2584 |
+
"logits/rejected": -1.2074342966079712,
|
2585 |
+
"logps/chosen": -108.48465728759766,
|
2586 |
+
"logps/rejected": -131.79908752441406,
|
2587 |
+
"loss": 0.6321,
|
2588 |
+
"rewards/accuracies": 0.71875,
|
2589 |
+
"rewards/chosen": -0.32877668738365173,
|
2590 |
+
"rewards/margins": 0.5046026110649109,
|
2591 |
+
"rewards/rejected": -0.8333792686462402,
|
2592 |
+
"step": 338
|
2593 |
+
},
|
2594 |
+
{
|
2595 |
+
"epoch": 0.3937748823742309,
|
2596 |
+
"grad_norm": 104.56753710703906,
|
2597 |
+
"learning_rate": 1.8944020992521088e-07,
|
2598 |
+
"logits/chosen": -1.331594467163086,
|
2599 |
+
"logits/rejected": -1.4218388795852661,
|
2600 |
+
"logps/chosen": -122.07364654541016,
|
2601 |
+
"logps/rejected": -144.00531005859375,
|
2602 |
+
"loss": 0.6138,
|
2603 |
+
"rewards/accuracies": 0.78125,
|
2604 |
+
"rewards/chosen": 0.15422941744327545,
|
2605 |
+
"rewards/margins": 0.4605112373828888,
|
2606 |
+
"rewards/rejected": -0.30628180503845215,
|
2607 |
+
"step": 340
|
2608 |
+
},
|
2609 |
+
{
|
2610 |
+
"epoch": 0.39609120521172636,
|
2611 |
+
"grad_norm": 104.94507394937493,
|
2612 |
+
"learning_rate": 1.8926671103348047e-07,
|
2613 |
+
"logits/chosen": -1.3103477954864502,
|
2614 |
+
"logits/rejected": -1.3303866386413574,
|
2615 |
+
"logps/chosen": -118.01762390136719,
|
2616 |
+
"logps/rejected": -128.77285766601562,
|
2617 |
+
"loss": 0.698,
|
2618 |
+
"rewards/accuracies": 0.53125,
|
2619 |
+
"rewards/chosen": -0.1733967363834381,
|
2620 |
+
"rewards/margins": 0.22825026512145996,
|
2621 |
+
"rewards/rejected": -0.40164700150489807,
|
2622 |
+
"step": 342
|
2623 |
+
},
|
2624 |
+
{
|
2625 |
+
"epoch": 0.39840752804922186,
|
2626 |
+
"grad_norm": 87.41594646239237,
|
2627 |
+
"learning_rate": 1.8909187921081416e-07,
|
2628 |
+
"logits/chosen": -1.2882866859436035,
|
2629 |
+
"logits/rejected": -1.266202449798584,
|
2630 |
+
"logps/chosen": -144.56747436523438,
|
2631 |
+
"logps/rejected": -142.6608123779297,
|
2632 |
+
"loss": 0.6561,
|
2633 |
+
"rewards/accuracies": 0.53125,
|
2634 |
+
"rewards/chosen": -0.08110320568084717,
|
2635 |
+
"rewards/margins": 0.10048308968544006,
|
2636 |
+
"rewards/rejected": -0.18158632516860962,
|
2637 |
+
"step": 344
|
2638 |
+
},
|
2639 |
+
{
|
2640 |
+
"epoch": 0.40072385088671736,
|
2641 |
+
"grad_norm": 166.0088927921291,
|
2642 |
+
"learning_rate": 1.8891571706780144e-07,
|
2643 |
+
"logits/chosen": -1.3238105773925781,
|
2644 |
+
"logits/rejected": -1.3814265727996826,
|
2645 |
+
"logps/chosen": -135.59217834472656,
|
2646 |
+
"logps/rejected": -158.6577911376953,
|
2647 |
+
"loss": 0.6647,
|
2648 |
+
"rewards/accuracies": 0.75,
|
2649 |
+
"rewards/chosen": -0.2648026645183563,
|
2650 |
+
"rewards/margins": 0.6691212058067322,
|
2651 |
+
"rewards/rejected": -0.9339239001274109,
|
2652 |
+
"step": 346
|
2653 |
+
},
|
2654 |
+
{
|
2655 |
+
"epoch": 0.4030401737242128,
|
2656 |
+
"grad_norm": 92.22069522105578,
|
2657 |
+
"learning_rate": 1.8873822723489633e-07,
|
2658 |
+
"logits/chosen": -1.3072634935379028,
|
2659 |
+
"logits/rejected": -1.3363394737243652,
|
2660 |
+
"logps/chosen": -179.68614196777344,
|
2661 |
+
"logps/rejected": -213.12120056152344,
|
2662 |
+
"loss": 0.6272,
|
2663 |
+
"rewards/accuracies": 0.75,
|
2664 |
+
"rewards/chosen": -0.018616102635860443,
|
2665 |
+
"rewards/margins": 0.4234482944011688,
|
2666 |
+
"rewards/rejected": -0.44206440448760986,
|
2667 |
+
"step": 348
|
2668 |
+
},
|
2669 |
+
{
|
2670 |
+
"epoch": 0.4053564965617083,
|
2671 |
+
"grad_norm": 70.97764990334171,
|
2672 |
+
"learning_rate": 1.8855941236237774e-07,
|
2673 |
+
"logits/chosen": -1.2639405727386475,
|
2674 |
+
"logits/rejected": -1.2773693799972534,
|
2675 |
+
"logps/chosen": -133.8863067626953,
|
2676 |
+
"logps/rejected": -170.3965606689453,
|
2677 |
+
"loss": 0.5784,
|
2678 |
+
"rewards/accuracies": 0.65625,
|
2679 |
+
"rewards/chosen": 0.36297571659088135,
|
2680 |
+
"rewards/margins": 0.6825499534606934,
|
2681 |
+
"rewards/rejected": -0.3195742070674896,
|
2682 |
+
"step": 350
|
2683 |
+
},
|
2684 |
+
{
|
2685 |
+
"epoch": 0.40767281939920375,
|
2686 |
+
"grad_norm": 87.3271520781356,
|
2687 |
+
"learning_rate": 1.883792751203102e-07,
|
2688 |
+
"logits/chosen": -1.2711012363433838,
|
2689 |
+
"logits/rejected": -1.2672007083892822,
|
2690 |
+
"logps/chosen": -169.25314331054688,
|
2691 |
+
"logps/rejected": -167.83010864257812,
|
2692 |
+
"loss": 0.608,
|
2693 |
+
"rewards/accuracies": 0.65625,
|
2694 |
+
"rewards/chosen": -0.06947077065706253,
|
2695 |
+
"rewards/margins": 0.39652663469314575,
|
2696 |
+
"rewards/rejected": -0.4659973978996277,
|
2697 |
+
"step": 352
|
2698 |
+
},
|
2699 |
+
{
|
2700 |
+
"epoch": 0.40998914223669924,
|
2701 |
+
"grad_norm": 82.42288813891042,
|
2702 |
+
"learning_rate": 1.8819781819850382e-07,
|
2703 |
+
"logits/chosen": -1.2538509368896484,
|
2704 |
+
"logits/rejected": -1.2403154373168945,
|
2705 |
+
"logps/chosen": -112.01508331298828,
|
2706 |
+
"logps/rejected": -122.62294006347656,
|
2707 |
+
"loss": 0.6,
|
2708 |
+
"rewards/accuracies": 0.78125,
|
2709 |
+
"rewards/chosen": 0.310922235250473,
|
2710 |
+
"rewards/margins": 0.5129318237304688,
|
2711 |
+
"rewards/rejected": -0.20200954377651215,
|
2712 |
+
"step": 354
|
2713 |
+
},
|
2714 |
+
{
|
2715 |
+
"epoch": 0.41230546507419474,
|
2716 |
+
"grad_norm": 92.66996742577295,
|
2717 |
+
"learning_rate": 1.880150443064742e-07,
|
2718 |
+
"logits/chosen": -1.1228657960891724,
|
2719 |
+
"logits/rejected": -1.1974968910217285,
|
2720 |
+
"logps/chosen": -129.4398193359375,
|
2721 |
+
"logps/rejected": -178.6856689453125,
|
2722 |
+
"loss": 0.6907,
|
2723 |
+
"rewards/accuracies": 0.71875,
|
2724 |
+
"rewards/chosen": 0.1207706406712532,
|
2725 |
+
"rewards/margins": 0.8560737371444702,
|
2726 |
+
"rewards/rejected": -0.7353031039237976,
|
2727 |
+
"step": 356
|
2728 |
+
},
|
2729 |
+
{
|
2730 |
+
"epoch": 0.4146217879116902,
|
2731 |
+
"grad_norm": 85.77942086621498,
|
2732 |
+
"learning_rate": 1.8783095617340192e-07,
|
2733 |
+
"logits/chosen": -1.3269970417022705,
|
2734 |
+
"logits/rejected": -1.3102359771728516,
|
2735 |
+
"logps/chosen": -138.91845703125,
|
2736 |
+
"logps/rejected": -150.00466918945312,
|
2737 |
+
"loss": 0.6704,
|
2738 |
+
"rewards/accuracies": 0.53125,
|
2739 |
+
"rewards/chosen": -0.25449270009994507,
|
2740 |
+
"rewards/margins": 0.06581351906061172,
|
2741 |
+
"rewards/rejected": -0.32030627131462097,
|
2742 |
+
"step": 358
|
2743 |
+
},
|
2744 |
+
{
|
2745 |
+
"epoch": 0.4169381107491857,
|
2746 |
+
"grad_norm": 66.74779859823646,
|
2747 |
+
"learning_rate": 1.876455565480918e-07,
|
2748 |
+
"logits/chosen": -1.395142912864685,
|
2749 |
+
"logits/rejected": -1.4558305740356445,
|
2750 |
+
"logps/chosen": -138.25567626953125,
|
2751 |
+
"logps/rejected": -142.72232055664062,
|
2752 |
+
"loss": 0.608,
|
2753 |
+
"rewards/accuracies": 0.65625,
|
2754 |
+
"rewards/chosen": 0.22209802269935608,
|
2755 |
+
"rewards/margins": 0.33867061138153076,
|
2756 |
+
"rewards/rejected": -0.11657258868217468,
|
2757 |
+
"step": 360
|
2758 |
+
},
|
2759 |
+
{
|
2760 |
+
"epoch": 0.41925443358668113,
|
2761 |
+
"grad_norm": 69.35743210486372,
|
2762 |
+
"learning_rate": 1.8745884819893192e-07,
|
2763 |
+
"logits/chosen": -1.3764009475708008,
|
2764 |
+
"logits/rejected": -1.4009249210357666,
|
2765 |
+
"logps/chosen": -125.95867919921875,
|
2766 |
+
"logps/rejected": -147.38038635253906,
|
2767 |
+
"loss": 0.5892,
|
2768 |
+
"rewards/accuracies": 0.46875,
|
2769 |
+
"rewards/chosen": 0.04307159036397934,
|
2770 |
+
"rewards/margins": 0.25595974922180176,
|
2771 |
+
"rewards/rejected": -0.21288815140724182,
|
2772 |
+
"step": 362
|
2773 |
+
},
|
2774 |
+
{
|
2775 |
+
"epoch": 0.42157075642417663,
|
2776 |
+
"grad_norm": 91.06098837601228,
|
2777 |
+
"learning_rate": 1.8727083391385219e-07,
|
2778 |
+
"logits/chosen": -1.3126693964004517,
|
2779 |
+
"logits/rejected": -1.359320044517517,
|
2780 |
+
"logps/chosen": -122.15340423583984,
|
2781 |
+
"logps/rejected": -152.7900390625,
|
2782 |
+
"loss": 0.6084,
|
2783 |
+
"rewards/accuracies": 0.65625,
|
2784 |
+
"rewards/chosen": 0.16955101490020752,
|
2785 |
+
"rewards/margins": 0.33028605580329895,
|
2786 |
+
"rewards/rejected": -0.16073507070541382,
|
2787 |
+
"step": 364
|
2788 |
+
},
|
2789 |
+
{
|
2790 |
+
"epoch": 0.4238870792616721,
|
2791 |
+
"grad_norm": 60.84745087172502,
|
2792 |
+
"learning_rate": 1.8708151650028278e-07,
|
2793 |
+
"logits/chosen": -1.3809125423431396,
|
2794 |
+
"logits/rejected": -1.403237223625183,
|
2795 |
+
"logps/chosen": -109.20733642578125,
|
2796 |
+
"logps/rejected": -135.84494018554688,
|
2797 |
+
"loss": 0.6428,
|
2798 |
+
"rewards/accuracies": 0.65625,
|
2799 |
+
"rewards/chosen": 0.13176926970481873,
|
2800 |
+
"rewards/margins": 0.44196146726608276,
|
2801 |
+
"rewards/rejected": -0.31019219756126404,
|
2802 |
+
"step": 366
|
2803 |
+
},
|
2804 |
+
{
|
2805 |
+
"epoch": 0.42620340209916757,
|
2806 |
+
"grad_norm": 83.0289812455712,
|
2807 |
+
"learning_rate": 1.8689089878511214e-07,
|
2808 |
+
"logits/chosen": -1.2712593078613281,
|
2809 |
+
"logits/rejected": -1.3146370649337769,
|
2810 |
+
"logps/chosen": -104.22183990478516,
|
2811 |
+
"logps/rejected": -117.42278289794922,
|
2812 |
+
"loss": 0.6601,
|
2813 |
+
"rewards/accuracies": 0.59375,
|
2814 |
+
"rewards/chosen": -0.04589027911424637,
|
2815 |
+
"rewards/margins": 0.24963931739330292,
|
2816 |
+
"rewards/rejected": -0.2955296039581299,
|
2817 |
+
"step": 368
|
2818 |
+
},
|
2819 |
+
{
|
2820 |
+
"epoch": 0.42851972493666307,
|
2821 |
+
"grad_norm": 80.18268668813586,
|
2822 |
+
"learning_rate": 1.866989836146449e-07,
|
2823 |
+
"logits/chosen": -1.367477536201477,
|
2824 |
+
"logits/rejected": -1.4047478437423706,
|
2825 |
+
"logps/chosen": -156.0530242919922,
|
2826 |
+
"logps/rejected": -166.14857482910156,
|
2827 |
+
"loss": 0.6033,
|
2828 |
+
"rewards/accuracies": 0.65625,
|
2829 |
+
"rewards/chosen": 0.03725311905145645,
|
2830 |
+
"rewards/margins": 0.22227245569229126,
|
2831 |
+
"rewards/rejected": -0.18501931428909302,
|
2832 |
+
"step": 370
|
2833 |
+
},
|
2834 |
+
{
|
2835 |
+
"epoch": 0.4308360477741585,
|
2836 |
+
"grad_norm": 85.02940823274966,
|
2837 |
+
"learning_rate": 1.8650577385455924e-07,
|
2838 |
+
"logits/chosen": -1.3402721881866455,
|
2839 |
+
"logits/rejected": -1.3483717441558838,
|
2840 |
+
"logps/chosen": -129.09817504882812,
|
2841 |
+
"logps/rejected": -133.04421997070312,
|
2842 |
+
"loss": 0.578,
|
2843 |
+
"rewards/accuracies": 0.71875,
|
2844 |
+
"rewards/chosen": 0.19856195151805878,
|
2845 |
+
"rewards/margins": 0.32389020919799805,
|
2846 |
+
"rewards/rejected": -0.12532827258110046,
|
2847 |
+
"step": 372
|
2848 |
+
},
|
2849 |
+
{
|
2850 |
+
"epoch": 0.433152370611654,
|
2851 |
+
"grad_norm": 84.18493319136046,
|
2852 |
+
"learning_rate": 1.8631127238986416e-07,
|
2853 |
+
"logits/chosen": -1.3070781230926514,
|
2854 |
+
"logits/rejected": -1.3111450672149658,
|
2855 |
+
"logps/chosen": -100.65834045410156,
|
2856 |
+
"logps/rejected": -119.19929504394531,
|
2857 |
+
"loss": 0.57,
|
2858 |
+
"rewards/accuracies": 0.59375,
|
2859 |
+
"rewards/chosen": 0.15119151771068573,
|
2860 |
+
"rewards/margins": 0.44069719314575195,
|
2861 |
+
"rewards/rejected": -0.28950563073158264,
|
2862 |
+
"step": 374
|
2863 |
+
},
|
2864 |
+
{
|
2865 |
+
"epoch": 0.43546869344914946,
|
2866 |
+
"grad_norm": 78.13851817895889,
|
2867 |
+
"learning_rate": 1.8611548212485647e-07,
|
2868 |
+
"logits/chosen": -1.3796460628509521,
|
2869 |
+
"logits/rejected": -1.4454896450042725,
|
2870 |
+
"logps/chosen": -137.24407958984375,
|
2871 |
+
"logps/rejected": -168.12208557128906,
|
2872 |
+
"loss": 0.6024,
|
2873 |
+
"rewards/accuracies": 0.6875,
|
2874 |
+
"rewards/chosen": 0.2298029363155365,
|
2875 |
+
"rewards/margins": 0.5796483159065247,
|
2876 |
+
"rewards/rejected": -0.34984540939331055,
|
2877 |
+
"step": 376
|
2878 |
+
},
|
2879 |
+
{
|
2880 |
+
"epoch": 0.43778501628664496,
|
2881 |
+
"grad_norm": 87.51427473434556,
|
2882 |
+
"learning_rate": 1.8591840598307724e-07,
|
2883 |
+
"logits/chosen": -1.3684715032577515,
|
2884 |
+
"logits/rejected": -1.41554856300354,
|
2885 |
+
"logps/chosen": -156.48861694335938,
|
2886 |
+
"logps/rejected": -166.43325805664062,
|
2887 |
+
"loss": 0.6099,
|
2888 |
+
"rewards/accuracies": 0.8125,
|
2889 |
+
"rewards/chosen": 0.05118772014975548,
|
2890 |
+
"rewards/margins": 0.6047709584236145,
|
2891 |
+
"rewards/rejected": -0.5535832047462463,
|
2892 |
+
"step": 378
|
2893 |
+
},
|
2894 |
+
{
|
2895 |
+
"epoch": 0.4401013391241404,
|
2896 |
+
"grad_norm": 97.94077875373094,
|
2897 |
+
"learning_rate": 1.8572004690726835e-07,
|
2898 |
+
"logits/chosen": -1.4304860830307007,
|
2899 |
+
"logits/rejected": -1.3829154968261719,
|
2900 |
+
"logps/chosen": -137.7032928466797,
|
2901 |
+
"logps/rejected": -159.42665100097656,
|
2902 |
+
"loss": 0.6256,
|
2903 |
+
"rewards/accuracies": 0.75,
|
2904 |
+
"rewards/chosen": 0.05912143737077713,
|
2905 |
+
"rewards/margins": 0.9181233644485474,
|
2906 |
+
"rewards/rejected": -0.8590019941329956,
|
2907 |
+
"step": 380
|
2908 |
+
},
|
2909 |
+
{
|
2910 |
+
"epoch": 0.4424176619616359,
|
2911 |
+
"grad_norm": 117.47560157505089,
|
2912 |
+
"learning_rate": 1.8552040785932843e-07,
|
2913 |
+
"logits/chosen": -1.2082271575927734,
|
2914 |
+
"logits/rejected": -1.32054603099823,
|
2915 |
+
"logps/chosen": -129.3510284423828,
|
2916 |
+
"logps/rejected": -139.3075714111328,
|
2917 |
+
"loss": 0.6776,
|
2918 |
+
"rewards/accuracies": 0.625,
|
2919 |
+
"rewards/chosen": 0.08690177649259567,
|
2920 |
+
"rewards/margins": 0.17026250064373016,
|
2921 |
+
"rewards/rejected": -0.08336074650287628,
|
2922 |
+
"step": 382
|
2923 |
+
},
|
2924 |
+
{
|
2925 |
+
"epoch": 0.4447339847991314,
|
2926 |
+
"grad_norm": 83.36856549076099,
|
2927 |
+
"learning_rate": 1.8531949182026864e-07,
|
2928 |
+
"logits/chosen": -1.213942289352417,
|
2929 |
+
"logits/rejected": -1.2501431703567505,
|
2930 |
+
"logps/chosen": -87.4649658203125,
|
2931 |
+
"logps/rejected": -105.31576538085938,
|
2932 |
+
"loss": 0.6473,
|
2933 |
+
"rewards/accuracies": 0.6875,
|
2934 |
+
"rewards/chosen": -0.07069863379001617,
|
2935 |
+
"rewards/margins": 0.28086185455322266,
|
2936 |
+
"rewards/rejected": -0.3515605032444,
|
2937 |
+
"step": 384
|
2938 |
+
},
|
2939 |
+
{
|
2940 |
+
"epoch": 0.44705030763662684,
|
2941 |
+
"grad_norm": 93.35125079656054,
|
2942 |
+
"learning_rate": 1.851173017901682e-07,
|
2943 |
+
"logits/chosen": -1.2774831056594849,
|
2944 |
+
"logits/rejected": -1.3458952903747559,
|
2945 |
+
"logps/chosen": -134.04624938964844,
|
2946 |
+
"logps/rejected": -148.02565002441406,
|
2947 |
+
"loss": 0.6516,
|
2948 |
+
"rewards/accuracies": 0.5,
|
2949 |
+
"rewards/chosen": -0.0875653326511383,
|
2950 |
+
"rewards/margins": 0.029335327446460724,
|
2951 |
+
"rewards/rejected": -0.11690068244934082,
|
2952 |
+
"step": 386
|
2953 |
+
},
|
2954 |
+
{
|
2955 |
+
"epoch": 0.44936663047412234,
|
2956 |
+
"grad_norm": 94.38591902404973,
|
2957 |
+
"learning_rate": 1.8491384078812957e-07,
|
2958 |
+
"logits/chosen": -1.3489183187484741,
|
2959 |
+
"logits/rejected": -1.3692617416381836,
|
2960 |
+
"logps/chosen": -158.86729431152344,
|
2961 |
+
"logps/rejected": -175.22946166992188,
|
2962 |
+
"loss": 0.6085,
|
2963 |
+
"rewards/accuracies": 0.75,
|
2964 |
+
"rewards/chosen": 0.01937798410654068,
|
2965 |
+
"rewards/margins": 0.4567859470844269,
|
2966 |
+
"rewards/rejected": -0.4374079406261444,
|
2967 |
+
"step": 388
|
2968 |
+
},
|
2969 |
+
{
|
2970 |
+
"epoch": 0.4516829533116178,
|
2971 |
+
"grad_norm": 82.79409553577226,
|
2972 |
+
"learning_rate": 1.847091118522333e-07,
|
2973 |
+
"logits/chosen": -1.2354220151901245,
|
2974 |
+
"logits/rejected": -1.1955327987670898,
|
2975 |
+
"logps/chosen": -100.98146057128906,
|
2976 |
+
"logps/rejected": -106.97394561767578,
|
2977 |
+
"loss": 0.6118,
|
2978 |
+
"rewards/accuracies": 0.75,
|
2979 |
+
"rewards/chosen": 0.024054907262325287,
|
2980 |
+
"rewards/margins": 0.4244306981563568,
|
2981 |
+
"rewards/rejected": -0.40037575364112854,
|
2982 |
+
"step": 390
|
2983 |
+
},
|
2984 |
+
{
|
2985 |
+
"epoch": 0.4539992761491133,
|
2986 |
+
"grad_norm": 68.23646218496863,
|
2987 |
+
"learning_rate": 1.8450311803949288e-07,
|
2988 |
+
"logits/chosen": -1.4198896884918213,
|
2989 |
+
"logits/rejected": -1.339991807937622,
|
2990 |
+
"logps/chosen": -96.33162689208984,
|
2991 |
+
"logps/rejected": -106.24251556396484,
|
2992 |
+
"loss": 0.626,
|
2993 |
+
"rewards/accuracies": 0.71875,
|
2994 |
+
"rewards/chosen": 0.14840683341026306,
|
2995 |
+
"rewards/margins": 0.3869977295398712,
|
2996 |
+
"rewards/rejected": -0.23859092593193054,
|
2997 |
+
"step": 392
|
2998 |
+
},
|
2999 |
+
{
|
3000 |
+
"epoch": 0.4563155989866088,
|
3001 |
+
"grad_norm": 90.98509885957323,
|
3002 |
+
"learning_rate": 1.842958624258088e-07,
|
3003 |
+
"logits/chosen": -1.4057539701461792,
|
3004 |
+
"logits/rejected": -1.4758132696151733,
|
3005 |
+
"logps/chosen": -122.16340637207031,
|
3006 |
+
"logps/rejected": -123.98712158203125,
|
3007 |
+
"loss": 0.6429,
|
3008 |
+
"rewards/accuracies": 0.5,
|
3009 |
+
"rewards/chosen": 0.053804248571395874,
|
3010 |
+
"rewards/margins": 0.13191546499729156,
|
3011 |
+
"rewards/rejected": -0.0781112089753151,
|
3012 |
+
"step": 394
|
3013 |
+
},
|
3014 |
+
{
|
3015 |
+
"epoch": 0.4586319218241042,
|
3016 |
+
"grad_norm": 90.51866810043896,
|
3017 |
+
"learning_rate": 1.8408734810592286e-07,
|
3018 |
+
"logits/chosen": -1.3948010206222534,
|
3019 |
+
"logits/rejected": -1.4117646217346191,
|
3020 |
+
"logps/chosen": -170.54193115234375,
|
3021 |
+
"logps/rejected": -179.2427978515625,
|
3022 |
+
"loss": 0.5834,
|
3023 |
+
"rewards/accuracies": 0.71875,
|
3024 |
+
"rewards/chosen": 0.059171393513679504,
|
3025 |
+
"rewards/margins": 0.42534855008125305,
|
3026 |
+
"rewards/rejected": -0.36617720127105713,
|
3027 |
+
"step": 396
|
3028 |
+
},
|
3029 |
+
{
|
3030 |
+
"epoch": 0.4609482446615997,
|
3031 |
+
"grad_norm": 93.16409936228983,
|
3032 |
+
"learning_rate": 1.838775781933718e-07,
|
3033 |
+
"logits/chosen": -1.2591919898986816,
|
3034 |
+
"logits/rejected": -1.278662085533142,
|
3035 |
+
"logps/chosen": -133.6868133544922,
|
3036 |
+
"logps/rejected": -160.47731018066406,
|
3037 |
+
"loss": 0.6789,
|
3038 |
+
"rewards/accuracies": 0.5625,
|
3039 |
+
"rewards/chosen": -0.12275616079568863,
|
3040 |
+
"rewards/margins": 0.09390115737915039,
|
3041 |
+
"rewards/rejected": -0.21665732562541962,
|
3042 |
+
"step": 398
|
3043 |
+
},
|
3044 |
+
{
|
3045 |
+
"epoch": 0.46326456749909517,
|
3046 |
+
"grad_norm": 85.0019450300031,
|
3047 |
+
"learning_rate": 1.8366655582044093e-07,
|
3048 |
+
"logits/chosen": -1.295358419418335,
|
3049 |
+
"logits/rejected": -1.3356658220291138,
|
3050 |
+
"logps/chosen": -82.7631607055664,
|
3051 |
+
"logps/rejected": -102.0246810913086,
|
3052 |
+
"loss": 0.6151,
|
3053 |
+
"rewards/accuracies": 0.65625,
|
3054 |
+
"rewards/chosen": -0.2104763686656952,
|
3055 |
+
"rewards/margins": 0.3289705812931061,
|
3056 |
+
"rewards/rejected": -0.5394470691680908,
|
3057 |
+
"step": 400
|
3058 |
+
},
|
3059 |
+
{
|
3060 |
+
"epoch": 0.46326456749909517,
|
3061 |
+
"eval_logits/chosen": -1.3069441318511963,
|
3062 |
+
"eval_logits/rejected": -1.3023654222488403,
|
3063 |
+
"eval_logps/chosen": -140.2086944580078,
|
3064 |
+
"eval_logps/rejected": -139.57632446289062,
|
3065 |
+
"eval_loss": 0.6798678040504456,
|
3066 |
+
"eval_rewards/accuracies": 0.6000000238418579,
|
3067 |
+
"eval_rewards/chosen": -0.4577521085739136,
|
3068 |
+
"eval_rewards/margins": 0.10452325642108917,
|
3069 |
+
"eval_rewards/rejected": -0.5622754096984863,
|
3070 |
+
"eval_runtime": 26.7292,
|
3071 |
+
"eval_samples_per_second": 3.741,
|
3072 |
+
"eval_steps_per_second": 0.935,
|
3073 |
+
"step": 400
|
3074 |
+
},
|
3075 |
+
{
|
3076 |
+
"epoch": 0.46558089033659067,
|
3077 |
+
"grad_norm": 107.62190686868198,
|
3078 |
+
"learning_rate": 1.834542841381173e-07,
|
3079 |
+
"logits/chosen": -1.4000458717346191,
|
3080 |
+
"logits/rejected": -1.4169011116027832,
|
3081 |
+
"logps/chosen": -187.33409118652344,
|
3082 |
+
"logps/rejected": -207.20140075683594,
|
3083 |
+
"loss": 0.5555,
|
3084 |
+
"rewards/accuracies": 0.53125,
|
3085 |
+
"rewards/chosen": -0.11752481758594513,
|
3086 |
+
"rewards/margins": 0.4881589412689209,
|
3087 |
+
"rewards/rejected": -0.6056837439537048,
|
3088 |
+
"step": 402
|
3089 |
+
},
|
3090 |
+
{
|
3091 |
+
"epoch": 0.4678972131740861,
|
3092 |
+
"grad_norm": 81.84841168291128,
|
3093 |
+
"learning_rate": 1.8324076631604262e-07,
|
3094 |
+
"logits/chosen": -1.2451642751693726,
|
3095 |
+
"logits/rejected": -1.2964147329330444,
|
3096 |
+
"logps/chosen": -136.41270446777344,
|
3097 |
+
"logps/rejected": -156.53018188476562,
|
3098 |
+
"loss": 0.6282,
|
3099 |
+
"rewards/accuracies": 0.625,
|
3100 |
+
"rewards/chosen": -0.1907982975244522,
|
3101 |
+
"rewards/margins": 0.6749911308288574,
|
3102 |
+
"rewards/rejected": -0.8657894134521484,
|
3103 |
+
"step": 404
|
3104 |
+
},
|
3105 |
+
{
|
3106 |
+
"epoch": 0.4702135360115816,
|
3107 |
+
"grad_norm": 70.51232634632699,
|
3108 |
+
"learning_rate": 1.8302600554246598e-07,
|
3109 |
+
"logits/chosen": -1.2217371463775635,
|
3110 |
+
"logits/rejected": -1.2302532196044922,
|
3111 |
+
"logps/chosen": -109.1505355834961,
|
3112 |
+
"logps/rejected": -124.399169921875,
|
3113 |
+
"loss": 0.5908,
|
3114 |
+
"rewards/accuracies": 0.71875,
|
3115 |
+
"rewards/chosen": -0.011890493333339691,
|
3116 |
+
"rewards/margins": 0.41306906938552856,
|
3117 |
+
"rewards/rejected": -0.42495957016944885,
|
3118 |
+
"step": 406
|
3119 |
+
},
|
3120 |
+
{
|
3121 |
+
"epoch": 0.4725298588490771,
|
3122 |
+
"grad_norm": 157.73563743497198,
|
3123 |
+
"learning_rate": 1.8281000502419624e-07,
|
3124 |
+
"logits/chosen": -1.316713809967041,
|
3125 |
+
"logits/rejected": -1.3389533758163452,
|
3126 |
+
"logps/chosen": -123.63529968261719,
|
3127 |
+
"logps/rejected": -128.41409301757812,
|
3128 |
+
"loss": 0.6549,
|
3129 |
+
"rewards/accuracies": 0.59375,
|
3130 |
+
"rewards/chosen": -0.4301506578922272,
|
3131 |
+
"rewards/margins": 0.16745811700820923,
|
3132 |
+
"rewards/rejected": -0.597608745098114,
|
3133 |
+
"step": 408
|
3134 |
+
},
|
3135 |
+
{
|
3136 |
+
"epoch": 0.47484618168657255,
|
3137 |
+
"grad_norm": 107.85099770446011,
|
3138 |
+
"learning_rate": 1.8259276798655412e-07,
|
3139 |
+
"logits/chosen": -1.3569673299789429,
|
3140 |
+
"logits/rejected": -1.3319692611694336,
|
3141 |
+
"logps/chosen": -149.51708984375,
|
3142 |
+
"logps/rejected": -185.8908233642578,
|
3143 |
+
"loss": 0.6863,
|
3144 |
+
"rewards/accuracies": 0.59375,
|
3145 |
+
"rewards/chosen": -0.11509159207344055,
|
3146 |
+
"rewards/margins": 0.3641398549079895,
|
3147 |
+
"rewards/rejected": -0.47923144698143005,
|
3148 |
+
"step": 410
|
3149 |
+
},
|
3150 |
+
{
|
3151 |
+
"epoch": 0.47716250452406805,
|
3152 |
+
"grad_norm": 109.31239844961944,
|
3153 |
+
"learning_rate": 1.8237429767332405e-07,
|
3154 |
+
"logits/chosen": -1.3673866987228394,
|
3155 |
+
"logits/rejected": -1.4460492134094238,
|
3156 |
+
"logps/chosen": -144.90838623046875,
|
3157 |
+
"logps/rejected": -157.9684295654297,
|
3158 |
+
"loss": 0.6105,
|
3159 |
+
"rewards/accuracies": 0.9375,
|
3160 |
+
"rewards/chosen": -0.08836071193218231,
|
3161 |
+
"rewards/margins": 0.6367740631103516,
|
3162 |
+
"rewards/rejected": -0.7251348495483398,
|
3163 |
+
"step": 412
|
3164 |
+
},
|
3165 |
+
{
|
3166 |
+
"epoch": 0.4794788273615635,
|
3167 |
+
"grad_norm": 79.38161196529609,
|
3168 |
+
"learning_rate": 1.8215459734670573e-07,
|
3169 |
+
"logits/chosen": -1.341538667678833,
|
3170 |
+
"logits/rejected": -1.371129035949707,
|
3171 |
+
"logps/chosen": -135.0418243408203,
|
3172 |
+
"logps/rejected": -181.38201904296875,
|
3173 |
+
"loss": 0.6121,
|
3174 |
+
"rewards/accuracies": 0.8125,
|
3175 |
+
"rewards/chosen": 0.04946846514940262,
|
3176 |
+
"rewards/margins": 0.8221450448036194,
|
3177 |
+
"rewards/rejected": -0.7726765871047974,
|
3178 |
+
"step": 414
|
3179 |
+
},
|
3180 |
+
{
|
3181 |
+
"epoch": 0.481795150199059,
|
3182 |
+
"grad_norm": 98.8037188643182,
|
3183 |
+
"learning_rate": 1.8193367028726547e-07,
|
3184 |
+
"logits/chosen": -1.1779212951660156,
|
3185 |
+
"logits/rejected": -1.2224653959274292,
|
3186 |
+
"logps/chosen": -91.48204040527344,
|
3187 |
+
"logps/rejected": -109.18719482421875,
|
3188 |
+
"loss": 0.6932,
|
3189 |
+
"rewards/accuracies": 0.625,
|
3190 |
+
"rewards/chosen": 0.05973606929183006,
|
3191 |
+
"rewards/margins": 0.11246003955602646,
|
3192 |
+
"rewards/rejected": -0.0527239665389061,
|
3193 |
+
"step": 416
|
3194 |
+
},
|
3195 |
+
{
|
3196 |
+
"epoch": 0.4841114730365545,
|
3197 |
+
"grad_norm": 75.72657378652657,
|
3198 |
+
"learning_rate": 1.8171151979388712e-07,
|
3199 |
+
"logits/chosen": -1.2831331491470337,
|
3200 |
+
"logits/rejected": -1.3463534116744995,
|
3201 |
+
"logps/chosen": -155.19076538085938,
|
3202 |
+
"logps/rejected": -191.88758850097656,
|
3203 |
+
"loss": 0.612,
|
3204 |
+
"rewards/accuracies": 0.625,
|
3205 |
+
"rewards/chosen": -0.32773423194885254,
|
3206 |
+
"rewards/margins": 0.40678921341896057,
|
3207 |
+
"rewards/rejected": -0.7345234751701355,
|
3208 |
+
"step": 418
|
3209 |
+
},
|
3210 |
+
{
|
3211 |
+
"epoch": 0.48642779587404994,
|
3212 |
+
"grad_norm": 88.72825200499656,
|
3213 |
+
"learning_rate": 1.8148814918372285e-07,
|
3214 |
+
"logits/chosen": -1.2322022914886475,
|
3215 |
+
"logits/rejected": -1.2740528583526611,
|
3216 |
+
"logps/chosen": -125.58689880371094,
|
3217 |
+
"logps/rejected": -145.04537963867188,
|
3218 |
+
"loss": 0.6336,
|
3219 |
+
"rewards/accuracies": 0.5625,
|
3220 |
+
"rewards/chosen": -0.02973347157239914,
|
3221 |
+
"rewards/margins": 0.28559258580207825,
|
3222 |
+
"rewards/rejected": -0.3153260350227356,
|
3223 |
+
"step": 420
|
3224 |
+
},
|
3225 |
+
{
|
3226 |
+
"epoch": 0.48874411871154544,
|
3227 |
+
"grad_norm": 84.14984078776182,
|
3228 |
+
"learning_rate": 1.8126356179214365e-07,
|
3229 |
+
"logits/chosen": -1.3616023063659668,
|
3230 |
+
"logits/rejected": -1.3728755712509155,
|
3231 |
+
"logps/chosen": -113.55232238769531,
|
3232 |
+
"logps/rejected": -120.91179656982422,
|
3233 |
+
"loss": 0.6093,
|
3234 |
+
"rewards/accuracies": 0.5625,
|
3235 |
+
"rewards/chosen": -0.14902538061141968,
|
3236 |
+
"rewards/margins": 0.14539653062820435,
|
3237 |
+
"rewards/rejected": -0.294421911239624,
|
3238 |
+
"step": 422
|
3239 |
+
},
|
3240 |
+
{
|
3241 |
+
"epoch": 0.4910604415490409,
|
3242 |
+
"grad_norm": 102.58844455062285,
|
3243 |
+
"learning_rate": 1.8103776097268942e-07,
|
3244 |
+
"logits/chosen": -1.3973523378372192,
|
3245 |
+
"logits/rejected": -1.4224525690078735,
|
3246 |
+
"logps/chosen": -146.35865783691406,
|
3247 |
+
"logps/rejected": -155.32872009277344,
|
3248 |
+
"loss": 0.5969,
|
3249 |
+
"rewards/accuracies": 0.6875,
|
3250 |
+
"rewards/chosen": -0.18615968525409698,
|
3251 |
+
"rewards/margins": 0.1702008694410324,
|
3252 |
+
"rewards/rejected": -0.3563604950904846,
|
3253 |
+
"step": 424
|
3254 |
+
},
|
3255 |
+
{
|
3256 |
+
"epoch": 0.4933767643865364,
|
3257 |
+
"grad_norm": 97.6281549014596,
|
3258 |
+
"learning_rate": 1.8081075009701908e-07,
|
3259 |
+
"logits/chosen": -1.3393031358718872,
|
3260 |
+
"logits/rejected": -1.3568938970565796,
|
3261 |
+
"logps/chosen": -156.75132751464844,
|
3262 |
+
"logps/rejected": -183.3557891845703,
|
3263 |
+
"loss": 0.5524,
|
3264 |
+
"rewards/accuracies": 0.6875,
|
3265 |
+
"rewards/chosen": 0.006147988140583038,
|
3266 |
+
"rewards/margins": 0.6164807677268982,
|
3267 |
+
"rewards/rejected": -0.6103328466415405,
|
3268 |
+
"step": 426
|
3269 |
+
},
|
3270 |
+
{
|
3271 |
+
"epoch": 0.4956930872240318,
|
3272 |
+
"grad_norm": 79.28532180545582,
|
3273 |
+
"learning_rate": 1.8058253255486004e-07,
|
3274 |
+
"logits/chosen": -1.479441523551941,
|
3275 |
+
"logits/rejected": -1.455161213874817,
|
3276 |
+
"logps/chosen": -149.18377685546875,
|
3277 |
+
"logps/rejected": -175.40121459960938,
|
3278 |
+
"loss": 0.6324,
|
3279 |
+
"rewards/accuracies": 0.5,
|
3280 |
+
"rewards/chosen": -0.16930466890335083,
|
3281 |
+
"rewards/margins": 0.3928312659263611,
|
3282 |
+
"rewards/rejected": -0.5621359348297119,
|
3283 |
+
"step": 428
|
3284 |
+
},
|
3285 |
+
{
|
3286 |
+
"epoch": 0.4980094100615273,
|
3287 |
+
"grad_norm": 75.50103825872334,
|
3288 |
+
"learning_rate": 1.8035311175395766e-07,
|
3289 |
+
"logits/chosen": -1.279894232749939,
|
3290 |
+
"logits/rejected": -1.366225004196167,
|
3291 |
+
"logps/chosen": -149.7015838623047,
|
3292 |
+
"logps/rejected": -169.37600708007812,
|
3293 |
+
"loss": 0.634,
|
3294 |
+
"rewards/accuracies": 0.625,
|
3295 |
+
"rewards/chosen": 0.10414651781320572,
|
3296 |
+
"rewards/margins": 0.31277552247047424,
|
3297 |
+
"rewards/rejected": -0.20862898230552673,
|
3298 |
+
"step": 430
|
3299 |
+
},
|
3300 |
+
{
|
3301 |
+
"epoch": 0.5003257328990228,
|
3302 |
+
"grad_norm": 110.71149959510932,
|
3303 |
+
"learning_rate": 1.8012249112002445e-07,
|
3304 |
+
"logits/chosen": -1.3446143865585327,
|
3305 |
+
"logits/rejected": -1.346205234527588,
|
3306 |
+
"logps/chosen": -135.6072998046875,
|
3307 |
+
"logps/rejected": -148.6031951904297,
|
3308 |
+
"loss": 0.6534,
|
3309 |
+
"rewards/accuracies": 0.53125,
|
3310 |
+
"rewards/chosen": -0.04478984698653221,
|
3311 |
+
"rewards/margins": 0.20214848220348358,
|
3312 |
+
"rewards/rejected": -0.2469383329153061,
|
3313 |
+
"step": 432
|
3314 |
+
},
|
3315 |
+
{
|
3316 |
+
"epoch": 0.5026420557365183,
|
3317 |
+
"grad_norm": 99.01624284418935,
|
3318 |
+
"learning_rate": 1.7989067409668867e-07,
|
3319 |
+
"logits/chosen": -1.3353965282440186,
|
3320 |
+
"logits/rejected": -1.3816275596618652,
|
3321 |
+
"logps/chosen": -83.31758117675781,
|
3322 |
+
"logps/rejected": -101.72441101074219,
|
3323 |
+
"loss": 0.65,
|
3324 |
+
"rewards/accuracies": 0.75,
|
3325 |
+
"rewards/chosen": 0.16134825348854065,
|
3326 |
+
"rewards/margins": 0.24150311946868896,
|
3327 |
+
"rewards/rejected": -0.0801548883318901,
|
3328 |
+
"step": 434
|
3329 |
+
},
|
3330 |
+
{
|
3331 |
+
"epoch": 0.5049583785740137,
|
3332 |
+
"grad_norm": 85.01833595262721,
|
3333 |
+
"learning_rate": 1.7965766414544326e-07,
|
3334 |
+
"logits/chosen": -1.3208928108215332,
|
3335 |
+
"logits/rejected": -1.4323692321777344,
|
3336 |
+
"logps/chosen": -170.11387634277344,
|
3337 |
+
"logps/rejected": -190.21917724609375,
|
3338 |
+
"loss": 0.5937,
|
3339 |
+
"rewards/accuracies": 0.6875,
|
3340 |
+
"rewards/chosen": -0.06436862796545029,
|
3341 |
+
"rewards/margins": 0.4921523928642273,
|
3342 |
+
"rewards/rejected": -0.5565209984779358,
|
3343 |
+
"step": 436
|
3344 |
+
},
|
3345 |
+
{
|
3346 |
+
"epoch": 0.5072747014115092,
|
3347 |
+
"grad_norm": 78.22902080084621,
|
3348 |
+
"learning_rate": 1.794234647455938e-07,
|
3349 |
+
"logits/chosen": -1.5033388137817383,
|
3350 |
+
"logits/rejected": -1.4195587635040283,
|
3351 |
+
"logps/chosen": -167.2239227294922,
|
3352 |
+
"logps/rejected": -187.388427734375,
|
3353 |
+
"loss": 0.5993,
|
3354 |
+
"rewards/accuracies": 0.71875,
|
3355 |
+
"rewards/chosen": -0.00830845721065998,
|
3356 |
+
"rewards/margins": 0.36755993962287903,
|
3357 |
+
"rewards/rejected": -0.37586843967437744,
|
3358 |
+
"step": 438
|
3359 |
+
},
|
3360 |
+
{
|
3361 |
+
"epoch": 0.5095910242490047,
|
3362 |
+
"grad_norm": 93.03449604866357,
|
3363 |
+
"learning_rate": 1.7918807939420688e-07,
|
3364 |
+
"logits/chosen": -1.2785309553146362,
|
3365 |
+
"logits/rejected": -1.3855379819869995,
|
3366 |
+
"logps/chosen": -106.93773651123047,
|
3367 |
+
"logps/rejected": -136.5991668701172,
|
3368 |
+
"loss": 0.5876,
|
3369 |
+
"rewards/accuracies": 0.6875,
|
3370 |
+
"rewards/chosen": 0.07631123065948486,
|
3371 |
+
"rewards/margins": 0.45985180139541626,
|
3372 |
+
"rewards/rejected": -0.3835405707359314,
|
3373 |
+
"step": 440
|
3374 |
+
},
|
3375 |
+
{
|
3376 |
+
"epoch": 0.5119073470865002,
|
3377 |
+
"grad_norm": 98.9210182597883,
|
3378 |
+
"learning_rate": 1.7895151160605755e-07,
|
3379 |
+
"logits/chosen": -1.4166314601898193,
|
3380 |
+
"logits/rejected": -1.3835158348083496,
|
3381 |
+
"logps/chosen": -187.5051727294922,
|
3382 |
+
"logps/rejected": -196.1830596923828,
|
3383 |
+
"loss": 0.5841,
|
3384 |
+
"rewards/accuracies": 0.59375,
|
3385 |
+
"rewards/chosen": -0.13197794556617737,
|
3386 |
+
"rewards/margins": 0.36578553915023804,
|
3387 |
+
"rewards/rejected": -0.49776342511177063,
|
3388 |
+
"step": 442
|
3389 |
+
},
|
3390 |
+
{
|
3391 |
+
"epoch": 0.5142236699239957,
|
3392 |
+
"grad_norm": 83.90736102267026,
|
3393 |
+
"learning_rate": 1.7871376491357716e-07,
|
3394 |
+
"logits/chosen": -1.3803664445877075,
|
3395 |
+
"logits/rejected": -1.3876008987426758,
|
3396 |
+
"logps/chosen": -147.97230529785156,
|
3397 |
+
"logps/rejected": -158.1250762939453,
|
3398 |
+
"loss": 0.6266,
|
3399 |
+
"rewards/accuracies": 0.65625,
|
3400 |
+
"rewards/chosen": 0.05537159740924835,
|
3401 |
+
"rewards/margins": 0.3145188093185425,
|
3402 |
+
"rewards/rejected": -0.2591472268104553,
|
3403 |
+
"step": 444
|
3404 |
+
},
|
3405 |
+
{
|
3406 |
+
"epoch": 0.5165399927614911,
|
3407 |
+
"grad_norm": 83.18792426148275,
|
3408 |
+
"learning_rate": 1.7847484286680036e-07,
|
3409 |
+
"logits/chosen": -1.2037944793701172,
|
3410 |
+
"logits/rejected": -1.3015272617340088,
|
3411 |
+
"logps/chosen": -116.46647644042969,
|
3412 |
+
"logps/rejected": -133.59059143066406,
|
3413 |
+
"loss": 0.6422,
|
3414 |
+
"rewards/accuracies": 0.71875,
|
3415 |
+
"rewards/chosen": -0.2899719476699829,
|
3416 |
+
"rewards/margins": 0.47784021496772766,
|
3417 |
+
"rewards/rejected": -0.767812192440033,
|
3418 |
+
"step": 446
|
3419 |
+
},
|
3420 |
+
{
|
3421 |
+
"epoch": 0.5188563155989866,
|
3422 |
+
"grad_norm": 86.96480601224319,
|
3423 |
+
"learning_rate": 1.782347490333123e-07,
|
3424 |
+
"logits/chosen": -1.3997318744659424,
|
3425 |
+
"logits/rejected": -1.3888890743255615,
|
3426 |
+
"logps/chosen": -168.47235107421875,
|
3427 |
+
"logps/rejected": -173.81881713867188,
|
3428 |
+
"loss": 0.5883,
|
3429 |
+
"rewards/accuracies": 0.65625,
|
3430 |
+
"rewards/chosen": 0.2631508708000183,
|
3431 |
+
"rewards/margins": 0.40129777789115906,
|
3432 |
+
"rewards/rejected": -0.13814686238765717,
|
3433 |
+
"step": 448
|
3434 |
+
},
|
3435 |
+
{
|
3436 |
+
"epoch": 0.5211726384364821,
|
3437 |
+
"grad_norm": 68.31450376756777,
|
3438 |
+
"learning_rate": 1.7799348699819518e-07,
|
3439 |
+
"logits/chosen": -1.3524158000946045,
|
3440 |
+
"logits/rejected": -1.3299603462219238,
|
3441 |
+
"logps/chosen": -121.39910888671875,
|
3442 |
+
"logps/rejected": -131.10423278808594,
|
3443 |
+
"loss": 0.5874,
|
3444 |
+
"rewards/accuracies": 0.75,
|
3445 |
+
"rewards/chosen": 0.28284794092178345,
|
3446 |
+
"rewards/margins": 0.3859240412712097,
|
3447 |
+
"rewards/rejected": -0.10307610780000687,
|
3448 |
+
"step": 450
|
3449 |
+
},
|
3450 |
+
{
|
3451 |
+
"epoch": 0.5234889612739776,
|
3452 |
+
"grad_norm": 111.76852046416136,
|
3453 |
+
"learning_rate": 1.7775106036397474e-07,
|
3454 |
+
"logits/chosen": -1.2830047607421875,
|
3455 |
+
"logits/rejected": -1.3414244651794434,
|
3456 |
+
"logps/chosen": -130.662353515625,
|
3457 |
+
"logps/rejected": -179.69061279296875,
|
3458 |
+
"loss": 0.5992,
|
3459 |
+
"rewards/accuracies": 0.75,
|
3460 |
+
"rewards/chosen": 0.22120808064937592,
|
3461 |
+
"rewards/margins": 1.2555629014968872,
|
3462 |
+
"rewards/rejected": -1.0343549251556396,
|
3463 |
+
"step": 452
|
3464 |
+
},
|
3465 |
+
{
|
3466 |
+
"epoch": 0.525805284111473,
|
3467 |
+
"grad_norm": 67.76003255019495,
|
3468 |
+
"learning_rate": 1.775074727505667e-07,
|
3469 |
+
"logits/chosen": -1.1533057689666748,
|
3470 |
+
"logits/rejected": -1.294029951095581,
|
3471 |
+
"logps/chosen": -149.02322387695312,
|
3472 |
+
"logps/rejected": -172.8708953857422,
|
3473 |
+
"loss": 0.5608,
|
3474 |
+
"rewards/accuracies": 0.5,
|
3475 |
+
"rewards/chosen": -0.04835113137960434,
|
3476 |
+
"rewards/margins": 0.28162479400634766,
|
3477 |
+
"rewards/rejected": -0.3299759328365326,
|
3478 |
+
"step": 454
|
3479 |
+
},
|
3480 |
+
{
|
3481 |
+
"epoch": 0.5281216069489685,
|
3482 |
+
"grad_norm": 85.70884472679678,
|
3483 |
+
"learning_rate": 1.7726272779522228e-07,
|
3484 |
+
"logits/chosen": -1.2949302196502686,
|
3485 |
+
"logits/rejected": -1.387807846069336,
|
3486 |
+
"logps/chosen": -159.39170837402344,
|
3487 |
+
"logps/rejected": -189.28244018554688,
|
3488 |
+
"loss": 0.6753,
|
3489 |
+
"rewards/accuracies": 0.71875,
|
3490 |
+
"rewards/chosen": 0.12795251607894897,
|
3491 |
+
"rewards/margins": 0.3282526135444641,
|
3492 |
+
"rewards/rejected": -0.20030008256435394,
|
3493 |
+
"step": 456
|
3494 |
+
},
|
3495 |
+
{
|
3496 |
+
"epoch": 0.530437929786464,
|
3497 |
+
"grad_norm": 69.96049512457706,
|
3498 |
+
"learning_rate": 1.7701682915247437e-07,
|
3499 |
+
"logits/chosen": -1.1357134580612183,
|
3500 |
+
"logits/rejected": -1.2111129760742188,
|
3501 |
+
"logps/chosen": -168.00326538085938,
|
3502 |
+
"logps/rejected": -185.22506713867188,
|
3503 |
+
"loss": 0.6102,
|
3504 |
+
"rewards/accuracies": 0.71875,
|
3505 |
+
"rewards/chosen": -0.46182161569595337,
|
3506 |
+
"rewards/margins": 0.4477265477180481,
|
3507 |
+
"rewards/rejected": -0.9095481634140015,
|
3508 |
+
"step": 458
|
3509 |
+
},
|
3510 |
+
{
|
3511 |
+
"epoch": 0.5327542526239595,
|
3512 |
+
"grad_norm": 101.3186304412605,
|
3513 |
+
"learning_rate": 1.7676978049408259e-07,
|
3514 |
+
"logits/chosen": -1.3433293104171753,
|
3515 |
+
"logits/rejected": -1.3274402618408203,
|
3516 |
+
"logps/chosen": -129.25802612304688,
|
3517 |
+
"logps/rejected": -149.58999633789062,
|
3518 |
+
"loss": 0.6877,
|
3519 |
+
"rewards/accuracies": 0.65625,
|
3520 |
+
"rewards/chosen": -0.09710556268692017,
|
3521 |
+
"rewards/margins": 0.5576457977294922,
|
3522 |
+
"rewards/rejected": -0.6547513604164124,
|
3523 |
+
"step": 460
|
3524 |
+
},
|
3525 |
+
{
|
3526 |
+
"epoch": 0.535070575461455,
|
3527 |
+
"grad_norm": 101.53493027467981,
|
3528 |
+
"learning_rate": 1.7652158550897863e-07,
|
3529 |
+
"logits/chosen": -1.2119991779327393,
|
3530 |
+
"logits/rejected": -1.254407525062561,
|
3531 |
+
"logps/chosen": -124.32587432861328,
|
3532 |
+
"logps/rejected": -141.7906036376953,
|
3533 |
+
"loss": 0.6527,
|
3534 |
+
"rewards/accuracies": 0.625,
|
3535 |
+
"rewards/chosen": -0.07976742088794708,
|
3536 |
+
"rewards/margins": 0.11116158217191696,
|
3537 |
+
"rewards/rejected": -0.19092898070812225,
|
3538 |
+
"step": 462
|
3539 |
+
},
|
3540 |
+
{
|
3541 |
+
"epoch": 0.5373868982989504,
|
3542 |
+
"grad_norm": 72.67582060276438,
|
3543 |
+
"learning_rate": 1.7627224790321116e-07,
|
3544 |
+
"logits/chosen": -1.3650070428848267,
|
3545 |
+
"logits/rejected": -1.3934192657470703,
|
3546 |
+
"logps/chosen": -111.0053939819336,
|
3547 |
+
"logps/rejected": -128.06703186035156,
|
3548 |
+
"loss": 0.6384,
|
3549 |
+
"rewards/accuracies": 0.625,
|
3550 |
+
"rewards/chosen": 0.09877490997314453,
|
3551 |
+
"rewards/margins": 0.22652901709079742,
|
3552 |
+
"rewards/rejected": -0.1277541220188141,
|
3553 |
+
"step": 464
|
3554 |
+
},
|
3555 |
+
{
|
3556 |
+
"epoch": 0.5397032211364459,
|
3557 |
+
"grad_norm": 104.80291110492522,
|
3558 |
+
"learning_rate": 1.7602177139989042e-07,
|
3559 |
+
"logits/chosen": -1.2948188781738281,
|
3560 |
+
"logits/rejected": -1.3249576091766357,
|
3561 |
+
"logps/chosen": -113.75486755371094,
|
3562 |
+
"logps/rejected": -135.57427978515625,
|
3563 |
+
"loss": 0.6462,
|
3564 |
+
"rewards/accuracies": 0.59375,
|
3565 |
+
"rewards/chosen": 0.04134136065840721,
|
3566 |
+
"rewards/margins": 0.30063849687576294,
|
3567 |
+
"rewards/rejected": -0.25929710268974304,
|
3568 |
+
"step": 466
|
3569 |
+
},
|
3570 |
+
{
|
3571 |
+
"epoch": 0.5420195439739414,
|
3572 |
+
"grad_norm": 78.82420405990091,
|
3573 |
+
"learning_rate": 1.7577015973913274e-07,
|
3574 |
+
"logits/chosen": -1.2992827892303467,
|
3575 |
+
"logits/rejected": -1.3570318222045898,
|
3576 |
+
"logps/chosen": -131.05203247070312,
|
3577 |
+
"logps/rejected": -151.40420532226562,
|
3578 |
+
"loss": 0.6198,
|
3579 |
+
"rewards/accuracies": 0.71875,
|
3580 |
+
"rewards/chosen": 0.12259967625141144,
|
3581 |
+
"rewards/margins": 0.3709834814071655,
|
3582 |
+
"rewards/rejected": -0.2483838051557541,
|
3583 |
+
"step": 468
|
3584 |
+
},
|
3585 |
+
{
|
3586 |
+
"epoch": 0.5443358668114369,
|
3587 |
+
"grad_norm": 72.33642230267687,
|
3588 |
+
"learning_rate": 1.755174166780045e-07,
|
3589 |
+
"logits/chosen": -1.1955764293670654,
|
3590 |
+
"logits/rejected": -1.304951786994934,
|
3591 |
+
"logps/chosen": -132.34945678710938,
|
3592 |
+
"logps/rejected": -160.3063201904297,
|
3593 |
+
"loss": 0.5581,
|
3594 |
+
"rewards/accuracies": 0.78125,
|
3595 |
+
"rewards/chosen": 0.14929035305976868,
|
3596 |
+
"rewards/margins": 0.9690365791320801,
|
3597 |
+
"rewards/rejected": -0.8197463154792786,
|
3598 |
+
"step": 470
|
3599 |
+
},
|
3600 |
+
{
|
3601 |
+
"epoch": 0.5466521896489324,
|
3602 |
+
"grad_norm": 95.99345130843376,
|
3603 |
+
"learning_rate": 1.7526354599046632e-07,
|
3604 |
+
"logits/chosen": -1.3738641738891602,
|
3605 |
+
"logits/rejected": -1.4558396339416504,
|
3606 |
+
"logps/chosen": -124.96098327636719,
|
3607 |
+
"logps/rejected": -148.17123413085938,
|
3608 |
+
"loss": 0.6421,
|
3609 |
+
"rewards/accuracies": 0.6875,
|
3610 |
+
"rewards/chosen": 0.09812385588884354,
|
3611 |
+
"rewards/margins": 0.24952289462089539,
|
3612 |
+
"rewards/rejected": -0.15139903128147125,
|
3613 |
+
"step": 472
|
3614 |
+
},
|
3615 |
+
{
|
3616 |
+
"epoch": 0.5489685124864278,
|
3617 |
+
"grad_norm": 80.16504208727451,
|
3618 |
+
"learning_rate": 1.7500855146731648e-07,
|
3619 |
+
"logits/chosen": -1.2267169952392578,
|
3620 |
+
"logits/rejected": -1.2515380382537842,
|
3621 |
+
"logps/chosen": -148.4540557861328,
|
3622 |
+
"logps/rejected": -180.935791015625,
|
3623 |
+
"loss": 0.6187,
|
3624 |
+
"rewards/accuracies": 0.59375,
|
3625 |
+
"rewards/chosen": -0.06260286271572113,
|
3626 |
+
"rewards/margins": 1.7078866958618164,
|
3627 |
+
"rewards/rejected": -1.7704894542694092,
|
3628 |
+
"step": 474
|
3629 |
+
},
|
3630 |
+
{
|
3631 |
+
"epoch": 0.5512848353239233,
|
3632 |
+
"grad_norm": 113.11770155446688,
|
3633 |
+
"learning_rate": 1.747524369161343e-07,
|
3634 |
+
"logits/chosen": -1.3779189586639404,
|
3635 |
+
"logits/rejected": -1.3472117185592651,
|
3636 |
+
"logps/chosen": -137.42312622070312,
|
3637 |
+
"logps/rejected": -141.66329956054688,
|
3638 |
+
"loss": 0.6569,
|
3639 |
+
"rewards/accuracies": 0.59375,
|
3640 |
+
"rewards/chosen": -0.24386143684387207,
|
3641 |
+
"rewards/margins": 0.2617953419685364,
|
3642 |
+
"rewards/rejected": -0.5056568384170532,
|
3643 |
+
"step": 476
|
3644 |
+
},
|
3645 |
+
{
|
3646 |
+
"epoch": 0.5536011581614187,
|
3647 |
+
"grad_norm": 119.83970905986772,
|
3648 |
+
"learning_rate": 1.744952061612234e-07,
|
3649 |
+
"logits/chosen": -1.4478602409362793,
|
3650 |
+
"logits/rejected": -1.470253348350525,
|
3651 |
+
"logps/chosen": -162.07476806640625,
|
3652 |
+
"logps/rejected": -187.415283203125,
|
3653 |
+
"loss": 0.6087,
|
3654 |
+
"rewards/accuracies": 0.6875,
|
3655 |
+
"rewards/chosen": 0.03821418434381485,
|
3656 |
+
"rewards/margins": 0.7775447368621826,
|
3657 |
+
"rewards/rejected": -0.739330530166626,
|
3658 |
+
"step": 478
|
3659 |
+
},
|
3660 |
+
{
|
3661 |
+
"epoch": 0.5559174809989142,
|
3662 |
+
"grad_norm": 82.18148965783794,
|
3663 |
+
"learning_rate": 1.7423686304355468e-07,
|
3664 |
+
"logits/chosen": -1.4132378101348877,
|
3665 |
+
"logits/rejected": -1.4143118858337402,
|
3666 |
+
"logps/chosen": -135.87957763671875,
|
3667 |
+
"logps/rejected": -154.1642608642578,
|
3668 |
+
"loss": 0.604,
|
3669 |
+
"rewards/accuracies": 0.65625,
|
3670 |
+
"rewards/chosen": -0.36500078439712524,
|
3671 |
+
"rewards/margins": 0.42764222621917725,
|
3672 |
+
"rewards/rejected": -0.7926430106163025,
|
3673 |
+
"step": 480
|
3674 |
+
},
|
3675 |
+
{
|
3676 |
+
"epoch": 0.5582338038364097,
|
3677 |
+
"grad_norm": 80.80323897214724,
|
3678 |
+
"learning_rate": 1.7397741142070867e-07,
|
3679 |
+
"logits/chosen": -1.3779712915420532,
|
3680 |
+
"logits/rejected": -1.3945672512054443,
|
3681 |
+
"logps/chosen": -172.9818115234375,
|
3682 |
+
"logps/rejected": -181.16062927246094,
|
3683 |
+
"loss": 0.5964,
|
3684 |
+
"rewards/accuracies": 0.6875,
|
3685 |
+
"rewards/chosen": 0.04211435094475746,
|
3686 |
+
"rewards/margins": 0.4079417586326599,
|
3687 |
+
"rewards/rejected": -0.36582741141319275,
|
3688 |
+
"step": 482
|
3689 |
+
},
|
3690 |
+
{
|
3691 |
+
"epoch": 0.5605501266739051,
|
3692 |
+
"grad_norm": 76.61028661180849,
|
3693 |
+
"learning_rate": 1.737168551668182e-07,
|
3694 |
+
"logits/chosen": -1.190808653831482,
|
3695 |
+
"logits/rejected": -1.271024465560913,
|
3696 |
+
"logps/chosen": -131.51797485351562,
|
3697 |
+
"logps/rejected": -167.06590270996094,
|
3698 |
+
"loss": 0.5975,
|
3699 |
+
"rewards/accuracies": 0.75,
|
3700 |
+
"rewards/chosen": -0.07135076820850372,
|
3701 |
+
"rewards/margins": 0.8735796213150024,
|
3702 |
+
"rewards/rejected": -0.9449302554130554,
|
3703 |
+
"step": 484
|
3704 |
+
},
|
3705 |
+
{
|
3706 |
+
"epoch": 0.5628664495114006,
|
3707 |
+
"grad_norm": 80.91548302041826,
|
3708 |
+
"learning_rate": 1.7345519817251053e-07,
|
3709 |
+
"logits/chosen": -1.3176366090774536,
|
3710 |
+
"logits/rejected": -1.331200122833252,
|
3711 |
+
"logps/chosen": -145.1810760498047,
|
3712 |
+
"logps/rejected": -171.1893768310547,
|
3713 |
+
"loss": 0.6177,
|
3714 |
+
"rewards/accuracies": 0.625,
|
3715 |
+
"rewards/chosen": -0.01799055188894272,
|
3716 |
+
"rewards/margins": 0.4896019399166107,
|
3717 |
+
"rewards/rejected": -0.507592499256134,
|
3718 |
+
"step": 486
|
3719 |
+
},
|
3720 |
+
{
|
3721 |
+
"epoch": 0.5651827723488961,
|
3722 |
+
"grad_norm": 89.40658710689003,
|
3723 |
+
"learning_rate": 1.7319244434484895e-07,
|
3724 |
+
"logits/chosen": -1.2093366384506226,
|
3725 |
+
"logits/rejected": -1.1616159677505493,
|
3726 |
+
"logps/chosen": -140.53761291503906,
|
3727 |
+
"logps/rejected": -141.9064483642578,
|
3728 |
+
"loss": 0.6064,
|
3729 |
+
"rewards/accuracies": 0.6875,
|
3730 |
+
"rewards/chosen": -0.2011549472808838,
|
3731 |
+
"rewards/margins": 0.3880937099456787,
|
3732 |
+
"rewards/rejected": -0.5892486572265625,
|
3733 |
+
"step": 488
|
3734 |
+
},
|
3735 |
+
{
|
3736 |
+
"epoch": 0.5674990951863916,
|
3737 |
+
"grad_norm": 76.26303147749239,
|
3738 |
+
"learning_rate": 1.7292859760727492e-07,
|
3739 |
+
"logits/chosen": -1.2799924612045288,
|
3740 |
+
"logits/rejected": -1.296557903289795,
|
3741 |
+
"logps/chosen": -117.47547912597656,
|
3742 |
+
"logps/rejected": -129.87294006347656,
|
3743 |
+
"loss": 0.6132,
|
3744 |
+
"rewards/accuracies": 0.71875,
|
3745 |
+
"rewards/chosen": -0.10736295580863953,
|
3746 |
+
"rewards/margins": 0.3569309115409851,
|
3747 |
+
"rewards/rejected": -0.464293897151947,
|
3748 |
+
"step": 490
|
3749 |
+
},
|
3750 |
+
{
|
3751 |
+
"epoch": 0.5698154180238871,
|
3752 |
+
"grad_norm": 95.13972864679343,
|
3753 |
+
"learning_rate": 1.7266366189954905e-07,
|
3754 |
+
"logits/chosen": -1.348731517791748,
|
3755 |
+
"logits/rejected": -1.3340685367584229,
|
3756 |
+
"logps/chosen": -150.54696655273438,
|
3757 |
+
"logps/rejected": -185.81204223632812,
|
3758 |
+
"loss": 0.6421,
|
3759 |
+
"rewards/accuracies": 0.59375,
|
3760 |
+
"rewards/chosen": -0.1124522015452385,
|
3761 |
+
"rewards/margins": 0.7442688941955566,
|
3762 |
+
"rewards/rejected": -0.856721043586731,
|
3763 |
+
"step": 492
|
3764 |
+
},
|
3765 |
+
{
|
3766 |
+
"epoch": 0.5721317408613825,
|
3767 |
+
"grad_norm": 106.06131234014966,
|
3768 |
+
"learning_rate": 1.7239764117769258e-07,
|
3769 |
+
"logits/chosen": -1.3093186616897583,
|
3770 |
+
"logits/rejected": -1.3834538459777832,
|
3771 |
+
"logps/chosen": -193.04637145996094,
|
3772 |
+
"logps/rejected": -233.44293212890625,
|
3773 |
+
"loss": 0.6109,
|
3774 |
+
"rewards/accuracies": 0.78125,
|
3775 |
+
"rewards/chosen": -0.16808415949344635,
|
3776 |
+
"rewards/margins": 1.2206228971481323,
|
3777 |
+
"rewards/rejected": -1.388707160949707,
|
3778 |
+
"step": 494
|
3779 |
+
},
|
3780 |
+
{
|
3781 |
+
"epoch": 0.574448063698878,
|
3782 |
+
"grad_norm": 96.07655487217647,
|
3783 |
+
"learning_rate": 1.7213053941392816e-07,
|
3784 |
+
"logits/chosen": -1.330100417137146,
|
3785 |
+
"logits/rejected": -1.354781150817871,
|
3786 |
+
"logps/chosen": -157.2327880859375,
|
3787 |
+
"logps/rejected": -173.35081481933594,
|
3788 |
+
"loss": 0.572,
|
3789 |
+
"rewards/accuracies": 0.625,
|
3790 |
+
"rewards/chosen": 0.055093757808208466,
|
3791 |
+
"rewards/margins": 0.6396900415420532,
|
3792 |
+
"rewards/rejected": -0.5845962166786194,
|
3793 |
+
"step": 496
|
3794 |
+
},
|
3795 |
+
{
|
3796 |
+
"epoch": 0.5767643865363735,
|
3797 |
+
"grad_norm": 90.87905253835972,
|
3798 |
+
"learning_rate": 1.7186236059662046e-07,
|
3799 |
+
"logits/chosen": -1.4015512466430664,
|
3800 |
+
"logits/rejected": -1.4518334865570068,
|
3801 |
+
"logps/chosen": -132.65196228027344,
|
3802 |
+
"logps/rejected": -143.88650512695312,
|
3803 |
+
"loss": 0.6587,
|
3804 |
+
"rewards/accuracies": 0.59375,
|
3805 |
+
"rewards/chosen": -0.07620470970869064,
|
3806 |
+
"rewards/margins": 0.09850985556840897,
|
3807 |
+
"rewards/rejected": -0.1747145652770996,
|
3808 |
+
"step": 498
|
3809 |
+
},
|
3810 |
+
{
|
3811 |
+
"epoch": 0.579080709373869,
|
3812 |
+
"grad_norm": 58.33509354958709,
|
3813 |
+
"learning_rate": 1.7159310873021693e-07,
|
3814 |
+
"logits/chosen": -1.464751124382019,
|
3815 |
+
"logits/rejected": -1.4334102869033813,
|
3816 |
+
"logps/chosen": -111.387939453125,
|
3817 |
+
"logps/rejected": -117.49159240722656,
|
3818 |
+
"loss": 0.5577,
|
3819 |
+
"rewards/accuracies": 0.78125,
|
3820 |
+
"rewards/chosen": 0.29490286111831665,
|
3821 |
+
"rewards/margins": 0.8083434700965881,
|
3822 |
+
"rewards/rejected": -0.5134405493736267,
|
3823 |
+
"step": 500
|
3824 |
+
},
|
3825 |
+
{
|
3826 |
+
"epoch": 0.579080709373869,
|
3827 |
+
"eval_logits/chosen": -1.3100072145462036,
|
3828 |
+
"eval_logits/rejected": -1.304487943649292,
|
3829 |
+
"eval_logps/chosen": -139.44586181640625,
|
3830 |
+
"eval_logps/rejected": -139.48992919921875,
|
3831 |
+
"eval_loss": 0.654407262802124,
|
3832 |
+
"eval_rewards/accuracies": 0.6000000238418579,
|
3833 |
+
"eval_rewards/chosen": -0.38146913051605225,
|
3834 |
+
"eval_rewards/margins": 0.17216716706752777,
|
3835 |
+
"eval_rewards/rejected": -0.5536363124847412,
|
3836 |
+
"eval_runtime": 24.0833,
|
3837 |
+
"eval_samples_per_second": 4.152,
|
3838 |
+
"eval_steps_per_second": 1.038,
|
3839 |
+
"step": 500
|
3840 |
+
},
|
3841 |
+
{
|
3842 |
+
"epoch": 0.5813970322113644,
|
3843 |
+
"grad_norm": 85.71410602529986,
|
3844 |
+
"learning_rate": 1.7132278783518754e-07,
|
3845 |
+
"logits/chosen": -1.2767977714538574,
|
3846 |
+
"logits/rejected": -1.3142091035842896,
|
3847 |
+
"logps/chosen": -132.83477783203125,
|
3848 |
+
"logps/rejected": -152.29600524902344,
|
3849 |
+
"loss": 0.6423,
|
3850 |
+
"rewards/accuracies": 0.4375,
|
3851 |
+
"rewards/chosen": -0.3583824038505554,
|
3852 |
+
"rewards/margins": 0.2208695262670517,
|
3853 |
+
"rewards/rejected": -0.5792520046234131,
|
3854 |
+
"step": 502
|
3855 |
+
},
|
3856 |
+
{
|
3857 |
+
"epoch": 0.5837133550488599,
|
3858 |
+
"grad_norm": 74.14800888172827,
|
3859 |
+
"learning_rate": 1.7105140194796522e-07,
|
3860 |
+
"logits/chosen": -1.3712527751922607,
|
3861 |
+
"logits/rejected": -1.425230860710144,
|
3862 |
+
"logps/chosen": -175.75039672851562,
|
3863 |
+
"logps/rejected": -202.72731018066406,
|
3864 |
+
"loss": 0.5921,
|
3865 |
+
"rewards/accuracies": 0.71875,
|
3866 |
+
"rewards/chosen": -0.03632951155304909,
|
3867 |
+
"rewards/margins": 0.9486851692199707,
|
3868 |
+
"rewards/rejected": -0.9850146174430847,
|
3869 |
+
"step": 504
|
3870 |
+
},
|
3871 |
+
{
|
3872 |
+
"epoch": 0.5860296778863554,
|
3873 |
+
"grad_norm": 71.00059592227518,
|
3874 |
+
"learning_rate": 1.707789551208852e-07,
|
3875 |
+
"logits/chosen": -1.2654979228973389,
|
3876 |
+
"logits/rejected": -1.3367087841033936,
|
3877 |
+
"logps/chosen": -107.92752075195312,
|
3878 |
+
"logps/rejected": -137.77261352539062,
|
3879 |
+
"loss": 0.5964,
|
3880 |
+
"rewards/accuracies": 0.75,
|
3881 |
+
"rewards/chosen": 0.22204995155334473,
|
3882 |
+
"rewards/margins": 0.47908443212509155,
|
3883 |
+
"rewards/rejected": -0.2570344805717468,
|
3884 |
+
"step": 506
|
3885 |
+
},
|
3886 |
+
{
|
3887 |
+
"epoch": 0.5883460007238509,
|
3888 |
+
"grad_norm": 80.94401296109848,
|
3889 |
+
"learning_rate": 1.705054514221248e-07,
|
3890 |
+
"logits/chosen": -1.359083652496338,
|
3891 |
+
"logits/rejected": -1.262428879737854,
|
3892 |
+
"logps/chosen": -128.09751892089844,
|
3893 |
+
"logps/rejected": -112.98042297363281,
|
3894 |
+
"loss": 0.5995,
|
3895 |
+
"rewards/accuracies": 0.71875,
|
3896 |
+
"rewards/chosen": -0.007966872304677963,
|
3897 |
+
"rewards/margins": 0.3093283772468567,
|
3898 |
+
"rewards/rejected": -0.31729522347450256,
|
3899 |
+
"step": 508
|
3900 |
+
},
|
3901 |
+
{
|
3902 |
+
"epoch": 0.5906623235613464,
|
3903 |
+
"grad_norm": 117.75539638738908,
|
3904 |
+
"learning_rate": 1.7023089493564246e-07,
|
3905 |
+
"logits/chosen": -1.3026072978973389,
|
3906 |
+
"logits/rejected": -1.3078409433364868,
|
3907 |
+
"logps/chosen": -157.6989288330078,
|
3908 |
+
"logps/rejected": -171.07347106933594,
|
3909 |
+
"loss": 0.6652,
|
3910 |
+
"rewards/accuracies": 0.65625,
|
3911 |
+
"rewards/chosen": -0.17687593400478363,
|
3912 |
+
"rewards/margins": 0.22165895998477936,
|
3913 |
+
"rewards/rejected": -0.398534893989563,
|
3914 |
+
"step": 510
|
3915 |
+
},
|
3916 |
+
{
|
3917 |
+
"epoch": 0.5929786463988418,
|
3918 |
+
"grad_norm": 86.54731628654646,
|
3919 |
+
"learning_rate": 1.6995528976111692e-07,
|
3920 |
+
"logits/chosen": -1.3644428253173828,
|
3921 |
+
"logits/rejected": -1.359837532043457,
|
3922 |
+
"logps/chosen": -118.70327758789062,
|
3923 |
+
"logps/rejected": -129.3509979248047,
|
3924 |
+
"loss": 0.6307,
|
3925 |
+
"rewards/accuracies": 0.625,
|
3926 |
+
"rewards/chosen": 0.16805267333984375,
|
3927 |
+
"rewards/margins": 0.45584040880203247,
|
3928 |
+
"rewards/rejected": -0.2877877354621887,
|
3929 |
+
"step": 512
|
3930 |
+
},
|
3931 |
+
{
|
3932 |
+
"epoch": 0.5952949692363373,
|
3933 |
+
"grad_norm": 87.28044950941617,
|
3934 |
+
"learning_rate": 1.6967864001388587e-07,
|
3935 |
+
"logits/chosen": -1.383012294769287,
|
3936 |
+
"logits/rejected": -1.372816562652588,
|
3937 |
+
"logps/chosen": -112.56473541259766,
|
3938 |
+
"logps/rejected": -113.43563842773438,
|
3939 |
+
"loss": 0.5892,
|
3940 |
+
"rewards/accuracies": 0.6875,
|
3941 |
+
"rewards/chosen": 0.07795768231153488,
|
3942 |
+
"rewards/margins": 0.3844006359577179,
|
3943 |
+
"rewards/rejected": -0.30644291639328003,
|
3944 |
+
"step": 514
|
3945 |
+
},
|
3946 |
+
{
|
3947 |
+
"epoch": 0.5976112920738328,
|
3948 |
+
"grad_norm": 93.89694914049578,
|
3949 |
+
"learning_rate": 1.6940094982488465e-07,
|
3950 |
+
"logits/chosen": -1.3544152975082397,
|
3951 |
+
"logits/rejected": -1.4398796558380127,
|
3952 |
+
"logps/chosen": -174.69073486328125,
|
3953 |
+
"logps/rejected": -213.37953186035156,
|
3954 |
+
"loss": 0.6402,
|
3955 |
+
"rewards/accuracies": 0.71875,
|
3956 |
+
"rewards/chosen": 0.2562227249145508,
|
3957 |
+
"rewards/margins": 0.7131789922714233,
|
3958 |
+
"rewards/rejected": -0.45695626735687256,
|
3959 |
+
"step": 516
|
3960 |
+
},
|
3961 |
+
{
|
3962 |
+
"epoch": 0.5999276149113283,
|
3963 |
+
"grad_norm": 93.38206179293249,
|
3964 |
+
"learning_rate": 1.6912222334058434e-07,
|
3965 |
+
"logits/chosen": -1.3199559450149536,
|
3966 |
+
"logits/rejected": -1.3303453922271729,
|
3967 |
+
"logps/chosen": -113.59899139404297,
|
3968 |
+
"logps/rejected": -145.6167449951172,
|
3969 |
+
"loss": 0.5803,
|
3970 |
+
"rewards/accuracies": 0.6875,
|
3971 |
+
"rewards/chosen": 0.020612459629774094,
|
3972 |
+
"rewards/margins": 0.46917960047721863,
|
3973 |
+
"rewards/rejected": -0.4485671818256378,
|
3974 |
+
"step": 518
|
3975 |
+
},
|
3976 |
+
{
|
3977 |
+
"epoch": 0.6022439377488238,
|
3978 |
+
"grad_norm": 105.46320125024906,
|
3979 |
+
"learning_rate": 1.6884246472293017e-07,
|
3980 |
+
"logits/chosen": -1.2990922927856445,
|
3981 |
+
"logits/rejected": -1.32880437374115,
|
3982 |
+
"logps/chosen": -156.3465576171875,
|
3983 |
+
"logps/rejected": -181.81884765625,
|
3984 |
+
"loss": 0.5906,
|
3985 |
+
"rewards/accuracies": 0.65625,
|
3986 |
+
"rewards/chosen": 0.04846584051847458,
|
3987 |
+
"rewards/margins": 0.4274147152900696,
|
3988 |
+
"rewards/rejected": -0.3789488971233368,
|
3989 |
+
"step": 520
|
3990 |
+
},
|
3991 |
+
{
|
3992 |
+
"epoch": 0.6045602605863192,
|
3993 |
+
"grad_norm": 75.97476536999818,
|
3994 |
+
"learning_rate": 1.68561678149279e-07,
|
3995 |
+
"logits/chosen": -1.324131727218628,
|
3996 |
+
"logits/rejected": -1.3583768606185913,
|
3997 |
+
"logps/chosen": -158.01376342773438,
|
3998 |
+
"logps/rejected": -170.33180236816406,
|
3999 |
+
"loss": 0.62,
|
4000 |
+
"rewards/accuracies": 0.65625,
|
4001 |
+
"rewards/chosen": -0.15412873029708862,
|
4002 |
+
"rewards/margins": 0.3743273615837097,
|
4003 |
+
"rewards/rejected": -0.5284560322761536,
|
4004 |
+
"step": 522
|
4005 |
+
},
|
4006 |
+
{
|
4007 |
+
"epoch": 0.6068765834238147,
|
4008 |
+
"grad_norm": 104.26484808062503,
|
4009 |
+
"learning_rate": 1.6827986781233728e-07,
|
4010 |
+
"logits/chosen": -1.244482159614563,
|
4011 |
+
"logits/rejected": -1.3129115104675293,
|
4012 |
+
"logps/chosen": -168.09619140625,
|
4013 |
+
"logps/rejected": -183.01235961914062,
|
4014 |
+
"loss": 0.6265,
|
4015 |
+
"rewards/accuracies": 0.6875,
|
4016 |
+
"rewards/chosen": -0.21132177114486694,
|
4017 |
+
"rewards/margins": 0.3104555606842041,
|
4018 |
+
"rewards/rejected": -0.521777331829071,
|
4019 |
+
"step": 524
|
4020 |
+
},
|
4021 |
+
{
|
4022 |
+
"epoch": 0.6091929062613102,
|
4023 |
+
"grad_norm": 114.78689524134293,
|
4024 |
+
"learning_rate": 1.6799703792009824e-07,
|
4025 |
+
"logits/chosen": -1.5139933824539185,
|
4026 |
+
"logits/rejected": -1.4369456768035889,
|
4027 |
+
"logps/chosen": -179.37973022460938,
|
4028 |
+
"logps/rejected": -178.68380737304688,
|
4029 |
+
"loss": 0.6447,
|
4030 |
+
"rewards/accuracies": 0.75,
|
4031 |
+
"rewards/chosen": 0.1471785008907318,
|
4032 |
+
"rewards/margins": 0.5135056376457214,
|
4033 |
+
"rewards/rejected": -0.366327166557312,
|
4034 |
+
"step": 526
|
4035 |
+
},
|
4036 |
+
{
|
4037 |
+
"epoch": 0.6115092290988057,
|
4038 |
+
"grad_norm": 71.37258403318782,
|
4039 |
+
"learning_rate": 1.6771319269577914e-07,
|
4040 |
+
"logits/chosen": -1.246570348739624,
|
4041 |
+
"logits/rejected": -1.2840875387191772,
|
4042 |
+
"logps/chosen": -128.31802368164062,
|
4043 |
+
"logps/rejected": -164.46771240234375,
|
4044 |
+
"loss": 0.5578,
|
4045 |
+
"rewards/accuracies": 0.625,
|
4046 |
+
"rewards/chosen": -0.00241958349943161,
|
4047 |
+
"rewards/margins": 0.5187560319900513,
|
4048 |
+
"rewards/rejected": -0.5211755633354187,
|
4049 |
+
"step": 528
|
4050 |
+
},
|
4051 |
+
{
|
4052 |
+
"epoch": 0.6138255519363012,
|
4053 |
+
"grad_norm": 69.57805371439099,
|
4054 |
+
"learning_rate": 1.6742833637775812e-07,
|
4055 |
+
"logits/chosen": -1.323167085647583,
|
4056 |
+
"logits/rejected": -1.3477709293365479,
|
4057 |
+
"logps/chosen": -146.45350646972656,
|
4058 |
+
"logps/rejected": -181.66311645507812,
|
4059 |
+
"loss": 0.5717,
|
4060 |
+
"rewards/accuracies": 0.6875,
|
4061 |
+
"rewards/chosen": -0.19867736101150513,
|
4062 |
+
"rewards/margins": 0.9410180449485779,
|
4063 |
+
"rewards/rejected": -1.1396952867507935,
|
4064 |
+
"step": 530
|
4065 |
+
},
|
4066 |
+
{
|
4067 |
+
"epoch": 0.6161418747737966,
|
4068 |
+
"grad_norm": 171.030005968529,
|
4069 |
+
"learning_rate": 1.6714247321951105e-07,
|
4070 |
+
"logits/chosen": -1.380966067314148,
|
4071 |
+
"logits/rejected": -1.4481279850006104,
|
4072 |
+
"logps/chosen": -164.24951171875,
|
4073 |
+
"logps/rejected": -182.36082458496094,
|
4074 |
+
"loss": 0.6188,
|
4075 |
+
"rewards/accuracies": 0.75,
|
4076 |
+
"rewards/chosen": -0.18503104150295258,
|
4077 |
+
"rewards/margins": 0.3646969497203827,
|
4078 |
+
"rewards/rejected": -0.5497279167175293,
|
4079 |
+
"step": 532
|
4080 |
+
},
|
4081 |
+
{
|
4082 |
+
"epoch": 0.6184581976112921,
|
4083 |
+
"grad_norm": 92.73704263508813,
|
4084 |
+
"learning_rate": 1.668556074895479e-07,
|
4085 |
+
"logits/chosen": -1.3130195140838623,
|
4086 |
+
"logits/rejected": -1.3079559803009033,
|
4087 |
+
"logps/chosen": -163.1666717529297,
|
4088 |
+
"logps/rejected": -171.744873046875,
|
4089 |
+
"loss": 0.6301,
|
4090 |
+
"rewards/accuracies": 0.625,
|
4091 |
+
"rewards/chosen": -0.020428307354450226,
|
4092 |
+
"rewards/margins": 0.3073387145996094,
|
4093 |
+
"rewards/rejected": -0.327767014503479,
|
4094 |
+
"step": 534
|
4095 |
+
},
|
4096 |
+
{
|
4097 |
+
"epoch": 0.6207745204487876,
|
4098 |
+
"grad_norm": 80.77491644213549,
|
4099 |
+
"learning_rate": 1.6656774347134907e-07,
|
4100 |
+
"logits/chosen": -1.3122167587280273,
|
4101 |
+
"logits/rejected": -1.3465042114257812,
|
4102 |
+
"logps/chosen": -122.46326446533203,
|
4103 |
+
"logps/rejected": -140.5079803466797,
|
4104 |
+
"loss": 0.651,
|
4105 |
+
"rewards/accuracies": 0.59375,
|
4106 |
+
"rewards/chosen": -0.09150812029838562,
|
4107 |
+
"rewards/margins": 0.42649781703948975,
|
4108 |
+
"rewards/rejected": -0.518005907535553,
|
4109 |
+
"step": 536
|
4110 |
+
},
|
4111 |
+
{
|
4112 |
+
"epoch": 0.6230908432862831,
|
4113 |
+
"grad_norm": 84.46218412236821,
|
4114 |
+
"learning_rate": 1.6627888546330136e-07,
|
4115 |
+
"logits/chosen": -1.4094092845916748,
|
4116 |
+
"logits/rejected": -1.4629356861114502,
|
4117 |
+
"logps/chosen": -185.64651489257812,
|
4118 |
+
"logps/rejected": -204.06578063964844,
|
4119 |
+
"loss": 0.5885,
|
4120 |
+
"rewards/accuracies": 0.59375,
|
4121 |
+
"rewards/chosen": -0.06263245642185211,
|
4122 |
+
"rewards/margins": 0.5876613855361938,
|
4123 |
+
"rewards/rejected": -0.6502938270568848,
|
4124 |
+
"step": 538
|
4125 |
+
},
|
4126 |
+
{
|
4127 |
+
"epoch": 0.6254071661237784,
|
4128 |
+
"grad_norm": 81.10930034348203,
|
4129 |
+
"learning_rate": 1.659890377786339e-07,
|
4130 |
+
"logits/chosen": -1.3104676008224487,
|
4131 |
+
"logits/rejected": -1.2645026445388794,
|
4132 |
+
"logps/chosen": -160.73683166503906,
|
4133 |
+
"logps/rejected": -208.7481689453125,
|
4134 |
+
"loss": 0.5968,
|
4135 |
+
"rewards/accuracies": 0.71875,
|
4136 |
+
"rewards/chosen": -0.17388193309307098,
|
4137 |
+
"rewards/margins": 1.4726815223693848,
|
4138 |
+
"rewards/rejected": -1.6465635299682617,
|
4139 |
+
"step": 540
|
4140 |
+
},
|
4141 |
+
{
|
4142 |
+
"epoch": 0.627723488961274,
|
4143 |
+
"grad_norm": 70.09647822541486,
|
4144 |
+
"learning_rate": 1.656982047453536e-07,
|
4145 |
+
"logits/chosen": -1.3550140857696533,
|
4146 |
+
"logits/rejected": -1.3001039028167725,
|
4147 |
+
"logps/chosen": -152.96685791015625,
|
4148 |
+
"logps/rejected": -185.17835998535156,
|
4149 |
+
"loss": 0.5726,
|
4150 |
+
"rewards/accuracies": 0.65625,
|
4151 |
+
"rewards/chosen": -0.005441240966320038,
|
4152 |
+
"rewards/margins": 0.716077983379364,
|
4153 |
+
"rewards/rejected": -0.7215193510055542,
|
4154 |
+
"step": 542
|
4155 |
+
},
|
4156 |
+
{
|
4157 |
+
"epoch": 0.6300398117987694,
|
4158 |
+
"grad_norm": 114.90916867192477,
|
4159 |
+
"learning_rate": 1.6540639070618066e-07,
|
4160 |
+
"logits/chosen": -1.3001914024353027,
|
4161 |
+
"logits/rejected": -1.3512235879898071,
|
4162 |
+
"logps/chosen": -162.81076049804688,
|
4163 |
+
"logps/rejected": -186.64080810546875,
|
4164 |
+
"loss": 0.5977,
|
4165 |
+
"rewards/accuracies": 0.78125,
|
4166 |
+
"rewards/chosen": -0.11464500427246094,
|
4167 |
+
"rewards/margins": 0.6204842329025269,
|
4168 |
+
"rewards/rejected": -0.735129177570343,
|
4169 |
+
"step": 544
|
4170 |
+
},
|
4171 |
+
{
|
4172 |
+
"epoch": 0.6323561346362649,
|
4173 |
+
"grad_norm": 114.79194762522887,
|
4174 |
+
"learning_rate": 1.6511360001848367e-07,
|
4175 |
+
"logits/chosen": -1.1840189695358276,
|
4176 |
+
"logits/rejected": -1.2202097177505493,
|
4177 |
+
"logps/chosen": -133.49606323242188,
|
4178 |
+
"logps/rejected": -157.7266387939453,
|
4179 |
+
"loss": 0.6263,
|
4180 |
+
"rewards/accuracies": 0.90625,
|
4181 |
+
"rewards/chosen": 0.1880410611629486,
|
4182 |
+
"rewards/margins": 0.6589545607566833,
|
4183 |
+
"rewards/rejected": -0.47091349959373474,
|
4184 |
+
"step": 546
|
4185 |
+
},
|
4186 |
+
{
|
4187 |
+
"epoch": 0.6346724574737604,
|
4188 |
+
"grad_norm": 93.62652784226147,
|
4189 |
+
"learning_rate": 1.6481983705421448e-07,
|
4190 |
+
"logits/chosen": -1.306709885597229,
|
4191 |
+
"logits/rejected": -1.378722071647644,
|
4192 |
+
"logps/chosen": -105.61914825439453,
|
4193 |
+
"logps/rejected": -134.32098388671875,
|
4194 |
+
"loss": 0.6102,
|
4195 |
+
"rewards/accuracies": 0.71875,
|
4196 |
+
"rewards/chosen": 0.14075130224227905,
|
4197 |
+
"rewards/margins": 0.543403685092926,
|
4198 |
+
"rewards/rejected": -0.4026523530483246,
|
4199 |
+
"step": 548
|
4200 |
+
},
|
4201 |
+
{
|
4202 |
+
"epoch": 0.6369887803112558,
|
4203 |
+
"grad_norm": 73.81068535979944,
|
4204 |
+
"learning_rate": 1.6452510619984298e-07,
|
4205 |
+
"logits/chosen": -1.2993462085723877,
|
4206 |
+
"logits/rejected": -1.3260908126831055,
|
4207 |
+
"logps/chosen": -113.44615936279297,
|
4208 |
+
"logps/rejected": -114.76972961425781,
|
4209 |
+
"loss": 0.6459,
|
4210 |
+
"rewards/accuracies": 0.625,
|
4211 |
+
"rewards/chosen": -0.17589446902275085,
|
4212 |
+
"rewards/margins": 0.14515961706638336,
|
4213 |
+
"rewards/rejected": -0.3210541009902954,
|
4214 |
+
"step": 550
|
4215 |
+
},
|
4216 |
+
{
|
4217 |
+
"epoch": 0.6393051031487513,
|
4218 |
+
"grad_norm": 67.36004183948565,
|
4219 |
+
"learning_rate": 1.642294118562917e-07,
|
4220 |
+
"logits/chosen": -1.365562915802002,
|
4221 |
+
"logits/rejected": -1.3586573600769043,
|
4222 |
+
"logps/chosen": -124.21674346923828,
|
4223 |
+
"logps/rejected": -129.41188049316406,
|
4224 |
+
"loss": 0.6016,
|
4225 |
+
"rewards/accuracies": 0.5625,
|
4226 |
+
"rewards/chosen": -0.026413168758153915,
|
4227 |
+
"rewards/margins": 0.1695682406425476,
|
4228 |
+
"rewards/rejected": -0.19598142802715302,
|
4229 |
+
"step": 552
|
4230 |
+
},
|
4231 |
+
{
|
4232 |
+
"epoch": 0.6416214259862468,
|
4233 |
+
"grad_norm": 109.88457230810822,
|
4234 |
+
"learning_rate": 1.6393275843886988e-07,
|
4235 |
+
"logits/chosen": -1.2051353454589844,
|
4236 |
+
"logits/rejected": -1.1777970790863037,
|
4237 |
+
"logps/chosen": -169.16192626953125,
|
4238 |
+
"logps/rejected": -165.87405395507812,
|
4239 |
+
"loss": 0.7026,
|
4240 |
+
"rewards/accuracies": 0.5625,
|
4241 |
+
"rewards/chosen": -0.2536877989768982,
|
4242 |
+
"rewards/margins": -0.15165254473686218,
|
4243 |
+
"rewards/rejected": -0.10203523933887482,
|
4244 |
+
"step": 554
|
4245 |
+
},
|
4246 |
+
{
|
4247 |
+
"epoch": 0.6439377488237423,
|
4248 |
+
"grad_norm": 75.11287220032575,
|
4249 |
+
"learning_rate": 1.636351503772077e-07,
|
4250 |
+
"logits/chosen": -1.4200119972229004,
|
4251 |
+
"logits/rejected": -1.4631352424621582,
|
4252 |
+
"logps/chosen": -198.4412841796875,
|
4253 |
+
"logps/rejected": -217.67779541015625,
|
4254 |
+
"loss": 0.5673,
|
4255 |
+
"rewards/accuracies": 0.65625,
|
4256 |
+
"rewards/chosen": 0.11143307387828827,
|
4257 |
+
"rewards/margins": 0.5114270448684692,
|
4258 |
+
"rewards/rejected": -0.39999401569366455,
|
4259 |
+
"step": 556
|
4260 |
+
},
|
4261 |
+
{
|
4262 |
+
"epoch": 0.6462540716612378,
|
4263 |
+
"grad_norm": 76.6884503172935,
|
4264 |
+
"learning_rate": 1.6333659211519013e-07,
|
4265 |
+
"logits/chosen": -1.250978708267212,
|
4266 |
+
"logits/rejected": -1.3204269409179688,
|
4267 |
+
"logps/chosen": -122.6414794921875,
|
4268 |
+
"logps/rejected": -148.8828887939453,
|
4269 |
+
"loss": 0.577,
|
4270 |
+
"rewards/accuracies": 0.84375,
|
4271 |
+
"rewards/chosen": 0.0017823921516537666,
|
4272 |
+
"rewards/margins": 0.8125737905502319,
|
4273 |
+
"rewards/rejected": -0.8107913732528687,
|
4274 |
+
"step": 558
|
4275 |
+
},
|
4276 |
+
{
|
4277 |
+
"epoch": 0.6485703944987332,
|
4278 |
+
"grad_norm": 97.61750654608176,
|
4279 |
+
"learning_rate": 1.630370881108905e-07,
|
4280 |
+
"logits/chosen": -1.4659614562988281,
|
4281 |
+
"logits/rejected": -1.410871148109436,
|
4282 |
+
"logps/chosen": -158.7913055419922,
|
4283 |
+
"logps/rejected": -179.6865692138672,
|
4284 |
+
"loss": 0.6643,
|
4285 |
+
"rewards/accuracies": 0.59375,
|
4286 |
+
"rewards/chosen": 0.09925530850887299,
|
4287 |
+
"rewards/margins": 0.4364185631275177,
|
4288 |
+
"rewards/rejected": -0.33716320991516113,
|
4289 |
+
"step": 560
|
4290 |
+
},
|
4291 |
+
{
|
4292 |
+
"epoch": 0.6508867173362287,
|
4293 |
+
"grad_norm": 81.27814399323799,
|
4294 |
+
"learning_rate": 1.627366428365039e-07,
|
4295 |
+
"logits/chosen": -1.3590463399887085,
|
4296 |
+
"logits/rejected": -1.402485966682434,
|
4297 |
+
"logps/chosen": -154.32345581054688,
|
4298 |
+
"logps/rejected": -171.76239013671875,
|
4299 |
+
"loss": 0.6142,
|
4300 |
+
"rewards/accuracies": 0.6875,
|
4301 |
+
"rewards/chosen": -0.35029542446136475,
|
4302 |
+
"rewards/margins": 0.5618267059326172,
|
4303 |
+
"rewards/rejected": -0.9121222496032715,
|
4304 |
+
"step": 562
|
4305 |
+
},
|
4306 |
+
{
|
4307 |
+
"epoch": 0.6532030401737242,
|
4308 |
+
"grad_norm": 94.5819842213993,
|
4309 |
+
"learning_rate": 1.6243526077828058e-07,
|
4310 |
+
"logits/chosen": -1.368080973625183,
|
4311 |
+
"logits/rejected": -1.3609378337860107,
|
4312 |
+
"logps/chosen": -144.53123474121094,
|
4313 |
+
"logps/rejected": -149.87338256835938,
|
4314 |
+
"loss": 0.6336,
|
4315 |
+
"rewards/accuracies": 0.90625,
|
4316 |
+
"rewards/chosen": 0.22654207050800323,
|
4317 |
+
"rewards/margins": 0.999248206615448,
|
4318 |
+
"rewards/rejected": -0.7727060914039612,
|
4319 |
+
"step": 564
|
4320 |
+
},
|
4321 |
+
{
|
4322 |
+
"epoch": 0.6555193630112197,
|
4323 |
+
"grad_norm": 77.01269214216319,
|
4324 |
+
"learning_rate": 1.6213294643645882e-07,
|
4325 |
+
"logits/chosen": -1.2939796447753906,
|
4326 |
+
"logits/rejected": -1.3303455114364624,
|
4327 |
+
"logps/chosen": -136.7617645263672,
|
4328 |
+
"logps/rejected": -174.4918670654297,
|
4329 |
+
"loss": 0.5761,
|
4330 |
+
"rewards/accuracies": 0.59375,
|
4331 |
+
"rewards/chosen": -0.4167702794075012,
|
4332 |
+
"rewards/margins": 0.5882107615470886,
|
4333 |
+
"rewards/rejected": -1.0049810409545898,
|
4334 |
+
"step": 566
|
4335 |
+
},
|
4336 |
+
{
|
4337 |
+
"epoch": 0.6578356858487152,
|
4338 |
+
"grad_norm": 93.03042212849894,
|
4339 |
+
"learning_rate": 1.618297043251977e-07,
|
4340 |
+
"logits/chosen": -1.346666932106018,
|
4341 |
+
"logits/rejected": -1.4271106719970703,
|
4342 |
+
"logps/chosen": -123.94332885742188,
|
4343 |
+
"logps/rejected": -144.5997772216797,
|
4344 |
+
"loss": 0.5749,
|
4345 |
+
"rewards/accuracies": 0.625,
|
4346 |
+
"rewards/chosen": -0.06060848757624626,
|
4347 |
+
"rewards/margins": 0.27999287843704224,
|
4348 |
+
"rewards/rejected": -0.3406013548374176,
|
4349 |
+
"step": 568
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 0.6601520086862106,
|
4353 |
+
"grad_norm": 78.42710600355083,
|
4354 |
+
"learning_rate": 1.6152553897250987e-07,
|
4355 |
+
"logits/chosen": -1.1860871315002441,
|
4356 |
+
"logits/rejected": -1.2453413009643555,
|
4357 |
+
"logps/chosen": -119.57302856445312,
|
4358 |
+
"logps/rejected": -140.7362518310547,
|
4359 |
+
"loss": 0.5865,
|
4360 |
+
"rewards/accuracies": 0.71875,
|
4361 |
+
"rewards/chosen": -0.14186443388462067,
|
4362 |
+
"rewards/margins": 0.509893000125885,
|
4363 |
+
"rewards/rejected": -0.6517573595046997,
|
4364 |
+
"step": 570
|
4365 |
+
},
|
4366 |
+
{
|
4367 |
+
"epoch": 0.6624683315237061,
|
4368 |
+
"grad_norm": 79.87983043947283,
|
4369 |
+
"learning_rate": 1.6122045492019374e-07,
|
4370 |
+
"logits/chosen": -1.2810924053192139,
|
4371 |
+
"logits/rejected": -1.3013286590576172,
|
4372 |
+
"logps/chosen": -124.72441101074219,
|
4373 |
+
"logps/rejected": -155.4320831298828,
|
4374 |
+
"loss": 0.6214,
|
4375 |
+
"rewards/accuracies": 0.71875,
|
4376 |
+
"rewards/chosen": -0.17904864251613617,
|
4377 |
+
"rewards/margins": 0.5865851640701294,
|
4378 |
+
"rewards/rejected": -0.7656337022781372,
|
4379 |
+
"step": 572
|
4380 |
+
},
|
4381 |
+
{
|
4382 |
+
"epoch": 0.6647846543612016,
|
4383 |
+
"grad_norm": 83.06538656604619,
|
4384 |
+
"learning_rate": 1.6091445672376577e-07,
|
4385 |
+
"logits/chosen": -1.2900563478469849,
|
4386 |
+
"logits/rejected": -1.3495041131973267,
|
4387 |
+
"logps/chosen": -132.9668426513672,
|
4388 |
+
"logps/rejected": -158.88734436035156,
|
4389 |
+
"loss": 0.72,
|
4390 |
+
"rewards/accuracies": 0.71875,
|
4391 |
+
"rewards/chosen": 0.11348069459199905,
|
4392 |
+
"rewards/margins": 0.5080840587615967,
|
4393 |
+
"rewards/rejected": -0.3946034610271454,
|
4394 |
+
"step": 574
|
4395 |
+
},
|
4396 |
+
{
|
4397 |
+
"epoch": 0.6671009771986971,
|
4398 |
+
"grad_norm": 81.11691381402433,
|
4399 |
+
"learning_rate": 1.6060754895239242e-07,
|
4400 |
+
"logits/chosen": -1.3639813661575317,
|
4401 |
+
"logits/rejected": -1.3099185228347778,
|
4402 |
+
"logps/chosen": -129.63088989257812,
|
4403 |
+
"logps/rejected": -138.73565673828125,
|
4404 |
+
"loss": 0.5229,
|
4405 |
+
"rewards/accuracies": 0.625,
|
4406 |
+
"rewards/chosen": -0.1598489135503769,
|
4407 |
+
"rewards/margins": 0.4970959722995758,
|
4408 |
+
"rewards/rejected": -0.6569448709487915,
|
4409 |
+
"step": 576
|
4410 |
+
},
|
4411 |
+
{
|
4412 |
+
"epoch": 0.6694173000361926,
|
4413 |
+
"grad_norm": 97.23417815050283,
|
4414 |
+
"learning_rate": 1.6029973618882188e-07,
|
4415 |
+
"logits/chosen": -1.4283655881881714,
|
4416 |
+
"logits/rejected": -1.4487836360931396,
|
4417 |
+
"logps/chosen": -133.85809326171875,
|
4418 |
+
"logps/rejected": -156.39669799804688,
|
4419 |
+
"loss": 0.5988,
|
4420 |
+
"rewards/accuracies": 0.59375,
|
4421 |
+
"rewards/chosen": 0.003942415118217468,
|
4422 |
+
"rewards/margins": 0.2732374668121338,
|
4423 |
+
"rewards/rejected": -0.26929500699043274,
|
4424 |
+
"step": 578
|
4425 |
+
},
|
4426 |
+
{
|
4427 |
+
"epoch": 0.671733622873688,
|
4428 |
+
"grad_norm": 66.172594585089,
|
4429 |
+
"learning_rate": 1.599910230293158e-07,
|
4430 |
+
"logits/chosen": -1.2562668323516846,
|
4431 |
+
"logits/rejected": -1.3157635927200317,
|
4432 |
+
"logps/chosen": -166.57229614257812,
|
4433 |
+
"logps/rejected": -168.74867248535156,
|
4434 |
+
"loss": 0.5781,
|
4435 |
+
"rewards/accuracies": 0.78125,
|
4436 |
+
"rewards/chosen": 0.08036249130964279,
|
4437 |
+
"rewards/margins": 0.8519478440284729,
|
4438 |
+
"rewards/rejected": -0.7715852856636047,
|
4439 |
+
"step": 580
|
4440 |
+
},
|
4441 |
+
{
|
4442 |
+
"epoch": 0.6740499457111835,
|
4443 |
+
"grad_norm": 74.76192509954373,
|
4444 |
+
"learning_rate": 1.596814140835805e-07,
|
4445 |
+
"logits/chosen": -1.256306767463684,
|
4446 |
+
"logits/rejected": -1.3627066612243652,
|
4447 |
+
"logps/chosen": -163.5234375,
|
4448 |
+
"logps/rejected": -197.15310668945312,
|
4449 |
+
"loss": 0.5397,
|
4450 |
+
"rewards/accuracies": 0.8125,
|
4451 |
+
"rewards/chosen": 0.26100030541419983,
|
4452 |
+
"rewards/margins": 0.756481409072876,
|
4453 |
+
"rewards/rejected": -0.495481014251709,
|
4454 |
+
"step": 582
|
4455 |
+
},
|
4456 |
+
{
|
4457 |
+
"epoch": 0.676366268548679,
|
4458 |
+
"grad_norm": 70.48378313519777,
|
4459 |
+
"learning_rate": 1.5937091397469813e-07,
|
4460 |
+
"logits/chosen": -1.395608901977539,
|
4461 |
+
"logits/rejected": -1.4762039184570312,
|
4462 |
+
"logps/chosen": -147.21681213378906,
|
4463 |
+
"logps/rejected": -167.70651245117188,
|
4464 |
+
"loss": 0.6367,
|
4465 |
+
"rewards/accuracies": 0.53125,
|
4466 |
+
"rewards/chosen": 0.1700185090303421,
|
4467 |
+
"rewards/margins": 0.24376149475574493,
|
4468 |
+
"rewards/rejected": -0.07374300062656403,
|
4469 |
+
"step": 584
|
4470 |
+
},
|
4471 |
+
{
|
4472 |
+
"epoch": 0.6786825913861745,
|
4473 |
+
"grad_norm": 71.83056408251632,
|
4474 |
+
"learning_rate": 1.5905952733905773e-07,
|
4475 |
+
"logits/chosen": -1.3281779289245605,
|
4476 |
+
"logits/rejected": -1.37840735912323,
|
4477 |
+
"logps/chosen": -150.58189392089844,
|
4478 |
+
"logps/rejected": -174.99461364746094,
|
4479 |
+
"loss": 0.6312,
|
4480 |
+
"rewards/accuracies": 0.65625,
|
4481 |
+
"rewards/chosen": 0.022576339542865753,
|
4482 |
+
"rewards/margins": 0.32831788063049316,
|
4483 |
+
"rewards/rejected": -0.3057415187358856,
|
4484 |
+
"step": 586
|
4485 |
+
},
|
4486 |
+
{
|
4487 |
+
"epoch": 0.6809989142236699,
|
4488 |
+
"grad_norm": 83.87757428866225,
|
4489 |
+
"learning_rate": 1.5874725882628598e-07,
|
4490 |
+
"logits/chosen": -1.2740365266799927,
|
4491 |
+
"logits/rejected": -1.3645150661468506,
|
4492 |
+
"logps/chosen": -119.89724731445312,
|
4493 |
+
"logps/rejected": -143.9542694091797,
|
4494 |
+
"loss": 0.6469,
|
4495 |
+
"rewards/accuracies": 0.625,
|
4496 |
+
"rewards/chosen": -0.08816975355148315,
|
4497 |
+
"rewards/margins": 0.3666497468948364,
|
4498 |
+
"rewards/rejected": -0.4548195004463196,
|
4499 |
+
"step": 588
|
4500 |
+
},
|
4501 |
+
{
|
4502 |
+
"epoch": 0.6833152370611654,
|
4503 |
+
"grad_norm": 95.15763805819658,
|
4504 |
+
"learning_rate": 1.5843411309917773e-07,
|
4505 |
+
"logits/chosen": -1.1707677841186523,
|
4506 |
+
"logits/rejected": -1.2354707717895508,
|
4507 |
+
"logps/chosen": -137.05491638183594,
|
4508 |
+
"logps/rejected": -164.23329162597656,
|
4509 |
+
"loss": 0.6325,
|
4510 |
+
"rewards/accuracies": 0.75,
|
4511 |
+
"rewards/chosen": -0.04570431262254715,
|
4512 |
+
"rewards/margins": 0.837788999080658,
|
4513 |
+
"rewards/rejected": -0.8834933638572693,
|
4514 |
+
"step": 590
|
4515 |
+
},
|
4516 |
+
{
|
4517 |
+
"epoch": 0.6856315598986609,
|
4518 |
+
"grad_norm": 89.31999210372004,
|
4519 |
+
"learning_rate": 1.5812009483362641e-07,
|
4520 |
+
"logits/chosen": -1.3425350189208984,
|
4521 |
+
"logits/rejected": -1.3389382362365723,
|
4522 |
+
"logps/chosen": -114.12051391601562,
|
4523 |
+
"logps/rejected": -130.07469177246094,
|
4524 |
+
"loss": 0.6058,
|
4525 |
+
"rewards/accuracies": 0.71875,
|
4526 |
+
"rewards/chosen": 0.10132614523172379,
|
4527 |
+
"rewards/margins": 0.26681679487228394,
|
4528 |
+
"rewards/rejected": -0.16549064218997955,
|
4529 |
+
"step": 592
|
4530 |
+
},
|
4531 |
+
{
|
4532 |
+
"epoch": 0.6879478827361564,
|
4533 |
+
"grad_norm": 86.55981599933118,
|
4534 |
+
"learning_rate": 1.5780520871855416e-07,
|
4535 |
+
"logits/chosen": -1.4263215065002441,
|
4536 |
+
"logits/rejected": -1.4661970138549805,
|
4537 |
+
"logps/chosen": -146.5399932861328,
|
4538 |
+
"logps/rejected": -162.22738647460938,
|
4539 |
+
"loss": 0.6227,
|
4540 |
+
"rewards/accuracies": 0.625,
|
4541 |
+
"rewards/chosen": 0.1263137012720108,
|
4542 |
+
"rewards/margins": 0.7135946750640869,
|
4543 |
+
"rewards/rejected": -0.5872809886932373,
|
4544 |
+
"step": 594
|
4545 |
+
},
|
4546 |
+
{
|
4547 |
+
"epoch": 0.6902642055736519,
|
4548 |
+
"grad_norm": 77.11466861321054,
|
4549 |
+
"learning_rate": 1.5748945945584194e-07,
|
4550 |
+
"logits/chosen": -1.1571879386901855,
|
4551 |
+
"logits/rejected": -1.2603471279144287,
|
4552 |
+
"logps/chosen": -138.8977813720703,
|
4553 |
+
"logps/rejected": -177.0740509033203,
|
4554 |
+
"loss": 0.6776,
|
4555 |
+
"rewards/accuracies": 0.625,
|
4556 |
+
"rewards/chosen": -0.02539961040019989,
|
4557 |
+
"rewards/margins": 0.39455336332321167,
|
4558 |
+
"rewards/rejected": -0.41995295882225037,
|
4559 |
+
"step": 596
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 0.6925805284111473,
|
4563 |
+
"grad_norm": 81.2459965814331,
|
4564 |
+
"learning_rate": 1.5717285176025912e-07,
|
4565 |
+
"logits/chosen": -1.2991225719451904,
|
4566 |
+
"logits/rejected": -1.38021981716156,
|
4567 |
+
"logps/chosen": -151.61753845214844,
|
4568 |
+
"logps/rejected": -176.51548767089844,
|
4569 |
+
"loss": 0.5917,
|
4570 |
+
"rewards/accuracies": 0.78125,
|
4571 |
+
"rewards/chosen": 0.20256918668746948,
|
4572 |
+
"rewards/margins": 0.4604591131210327,
|
4573 |
+
"rewards/rejected": -0.25788992643356323,
|
4574 |
+
"step": 598
|
4575 |
+
},
|
4576 |
+
{
|
4577 |
+
"epoch": 0.6948968512486428,
|
4578 |
+
"grad_norm": 70.92510708156665,
|
4579 |
+
"learning_rate": 1.568553903593933e-07,
|
4580 |
+
"logits/chosen": -1.444725513458252,
|
4581 |
+
"logits/rejected": -1.3993281126022339,
|
4582 |
+
"logps/chosen": -111.9288330078125,
|
4583 |
+
"logps/rejected": -113.10049438476562,
|
4584 |
+
"loss": 0.6366,
|
4585 |
+
"rewards/accuracies": 0.53125,
|
4586 |
+
"rewards/chosen": -0.008920304477214813,
|
4587 |
+
"rewards/margins": 0.010683823376893997,
|
4588 |
+
"rewards/rejected": -0.019604135304689407,
|
4589 |
+
"step": 600
|
4590 |
+
},
|
4591 |
+
{
|
4592 |
+
"epoch": 0.6948968512486428,
|
4593 |
+
"eval_logits/chosen": -1.3430299758911133,
|
4594 |
+
"eval_logits/rejected": -1.3360421657562256,
|
4595 |
+
"eval_logps/chosen": -137.48741149902344,
|
4596 |
+
"eval_logps/rejected": -138.31024169921875,
|
4597 |
+
"eval_loss": 0.6260569095611572,
|
4598 |
+
"eval_rewards/accuracies": 0.6399999856948853,
|
4599 |
+
"eval_rewards/chosen": -0.185623899102211,
|
4600 |
+
"eval_rewards/margins": 0.25004515051841736,
|
4601 |
+
"eval_rewards/rejected": -0.43566906452178955,
|
4602 |
+
"eval_runtime": 24.9391,
|
4603 |
+
"eval_samples_per_second": 4.01,
|
4604 |
+
"eval_steps_per_second": 1.002,
|
4605 |
+
"step": 600
|
4606 |
}
|
4607 |
],
|
4608 |
"logging_steps": 2,
|