RyanYr commited on
Commit
07112cd
1 Parent(s): 12c0121

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6722a97b0b4e8ac164766b954c4f4bd20c3f22259dcc99abdd2a54bed1e54ebc
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ddc6b7a9bb4fcdb46cdb1830b37ea67dc4cc7e6c343ef4865cc27ca390beafb
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:478d375f061004cbf281130b047929a505f779d3bf30ae5917214ec805ad6ac7
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9fe32d49d4b0104f2453c1bd44d33e45a7f8786d8351e2cc57ab1ef97aceace
3
+ size 24090788996
last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8c9c9a7d2fcfa2e8fa40312006739b2c850b9a5ecfacd34f5a60173282093
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:259ffd7944316a19d3cd4a130f207a063579c77fad5d447554aeff12156085d6
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d29c1fe957781c3a62402feef79dfb38cf56b8fddb352092f6ffd32c6211e320
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:921a755d22277c2d84c9363b6dc0c6e459c1e6fbaddb89bc814ee9c5db4f54fb
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1200
 
1
+ global_step1500
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:490b89e78ff904b995825330f5ce8e5fa6c2b37c660e95d7b400021114917123
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec16c0b98fcebf6052a9aa927090800759b4fb6700367ad2c29354ecbf45f9f7
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb7ffd46563ff9ab39f6aced4141ad3f324d906dff502b8a1abf7cad146a8847
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd85acac0b1cf3d6b603028d0abef6bbae49730ebe45add6807617156b350d1c
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f84c6354be1f2b2ad0cf9f00c6b4a71c832f6082e1980570655c9db0034d55c4
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2df1f85a263fda844c3a5170fff9df97853e8127b0a5eddf0ad7744a2325916c
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:455aff8b0320fce7909efe3e2352281f10e0960882e59d328a0e7a06bfe38c09
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e480518cebd078a58ebf6e0cea1f57aa4919ad9372aba8cc8a04682ef0e504f
3
  size 1168138808
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8044e4c53158c210a17648ba8f2dc2d25a25bbfc55f686015542618eb652a33e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f003069486a57c6ac033f30cf4c4213eb6b7d659bab68a5a50fdb8da7c4118
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cd85d7fa425e7888c973f1c2985ac15ca21b5e6171fe140a401c2bc75ca46ff
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a016ef89b4392d083b2c15a7cf06a39bc61a759f648cf6dc03f1c32b89a526aa
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7915667371a58f1598639e0d1c20a0c59c783c14580cd040a6631eb4ea2311e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b56fe0893036dc052d18d90feba4328b90ea71561942150b07406ac3d7a700e
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35dd78929ad7f0fbf37fdb1284e8edf0424350f6e6ce1cd5a3ee78979af3d3cb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c203d12c2c308dab785ed672c9ca27fb6a2f72acd1e1552d1516c7b0006013
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb6fabf84a4db93ed80ee4a419e3ff880be7088e879fe26d3b47309e87cf9f04
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ace6290c890a8d1e173a6da04a3c0a74aa055e1dc2c0b019def7feb7e061c29
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3683985460765449,
5
  "eval_steps": 100,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9199,6 +9199,2304 @@
9199
  "eval_samples_per_second": 4.941,
9200
  "eval_steps_per_second": 1.235,
9201
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9202
  }
9203
  ],
9204
  "logging_steps": 2,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.710498182595681,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9199
  "eval_samples_per_second": 4.941,
9200
  "eval_steps_per_second": 1.235,
9201
  "step": 1200
9202
+ },
9203
+ {
9204
+ "epoch": 1.3706792103200058,
9205
+ "grad_norm": 64.03519542961854,
9206
+ "learning_rate": 4.98902460615475e-08,
9207
+ "logits/chosen": -1.3389551639556885,
9208
+ "logits/rejected": -1.3822637796401978,
9209
+ "logps/chosen": -175.14820861816406,
9210
+ "logps/rejected": -222.5068359375,
9211
+ "loss": 0.4712,
9212
+ "rewards/accuracies": 0.75,
9213
+ "rewards/chosen": -0.35669347643852234,
9214
+ "rewards/margins": 1.1921041011810303,
9215
+ "rewards/rejected": -1.548797607421875,
9216
+ "step": 1202
9217
+ },
9218
+ {
9219
+ "epoch": 1.3729598745634666,
9220
+ "grad_norm": 51.71910925101713,
9221
+ "learning_rate": 4.956146819977166e-08,
9222
+ "logits/chosen": -1.253232717514038,
9223
+ "logits/rejected": -1.2863636016845703,
9224
+ "logps/chosen": -176.71702575683594,
9225
+ "logps/rejected": -210.63230895996094,
9226
+ "loss": 0.4178,
9227
+ "rewards/accuracies": 0.84375,
9228
+ "rewards/chosen": -0.32385319471359253,
9229
+ "rewards/margins": 1.246777057647705,
9230
+ "rewards/rejected": -1.570630431175232,
9231
+ "step": 1204
9232
+ },
9233
+ {
9234
+ "epoch": 1.3752405388069275,
9235
+ "grad_norm": 56.79692940311852,
9236
+ "learning_rate": 4.923341996611603e-08,
9237
+ "logits/chosen": -1.1557482481002808,
9238
+ "logits/rejected": -1.180600881576538,
9239
+ "logps/chosen": -150.3299102783203,
9240
+ "logps/rejected": -167.61911010742188,
9241
+ "loss": 0.4255,
9242
+ "rewards/accuracies": 0.6875,
9243
+ "rewards/chosen": -0.3654107451438904,
9244
+ "rewards/margins": 0.7367621064186096,
9245
+ "rewards/rejected": -1.1021727323532104,
9246
+ "step": 1206
9247
+ },
9248
+ {
9249
+ "epoch": 1.3775212030503885,
9250
+ "grad_norm": 57.06474376403373,
9251
+ "learning_rate": 4.890610610602437e-08,
9252
+ "logits/chosen": -1.297890067100525,
9253
+ "logits/rejected": -1.347840666770935,
9254
+ "logps/chosen": -209.23716735839844,
9255
+ "logps/rejected": -259.1905517578125,
9256
+ "loss": 0.3961,
9257
+ "rewards/accuracies": 0.875,
9258
+ "rewards/chosen": -0.45185887813568115,
9259
+ "rewards/margins": 1.451210856437683,
9260
+ "rewards/rejected": -1.9030694961547852,
9261
+ "step": 1208
9262
+ },
9263
+ {
9264
+ "epoch": 1.3798018672938492,
9265
+ "grad_norm": 60.29263036985363,
9266
+ "learning_rate": 4.8579531354317225e-08,
9267
+ "logits/chosen": -1.397212266921997,
9268
+ "logits/rejected": -1.3925597667694092,
9269
+ "logps/chosen": -183.3704833984375,
9270
+ "logps/rejected": -195.10496520996094,
9271
+ "loss": 0.4579,
9272
+ "rewards/accuracies": 0.75,
9273
+ "rewards/chosen": -0.7833220362663269,
9274
+ "rewards/margins": 0.7912861108779907,
9275
+ "rewards/rejected": -1.574608325958252,
9276
+ "step": 1210
9277
+ },
9278
+ {
9279
+ "epoch": 1.3820825315373102,
9280
+ "grad_norm": 52.32294118146283,
9281
+ "learning_rate": 4.825370043512339e-08,
9282
+ "logits/chosen": -1.3067998886108398,
9283
+ "logits/rejected": -1.3849916458129883,
9284
+ "logps/chosen": -162.16232299804688,
9285
+ "logps/rejected": -195.49609375,
9286
+ "loss": 0.4108,
9287
+ "rewards/accuracies": 0.875,
9288
+ "rewards/chosen": -0.33422791957855225,
9289
+ "rewards/margins": 1.0588434934616089,
9290
+ "rewards/rejected": -1.3930714130401611,
9291
+ "step": 1212
9292
+ },
9293
+ {
9294
+ "epoch": 1.3843631957807712,
9295
+ "grad_norm": 59.66742993964118,
9296
+ "learning_rate": 4.792861806181171e-08,
9297
+ "logits/chosen": -1.30001962184906,
9298
+ "logits/rejected": -1.386979341506958,
9299
+ "logps/chosen": -170.89529418945312,
9300
+ "logps/rejected": -197.6066131591797,
9301
+ "loss": 0.4525,
9302
+ "rewards/accuracies": 0.8125,
9303
+ "rewards/chosen": -0.2925351560115814,
9304
+ "rewards/margins": 0.9441651105880737,
9305
+ "rewards/rejected": -1.2367002964019775,
9306
+ "step": 1214
9307
+ },
9308
+ {
9309
+ "epoch": 1.3866438600242321,
9310
+ "grad_norm": 65.32724813467337,
9311
+ "learning_rate": 4.760428893692273e-08,
9312
+ "logits/chosen": -1.2875810861587524,
9313
+ "logits/rejected": -1.3601633310317993,
9314
+ "logps/chosen": -160.88156127929688,
9315
+ "logps/rejected": -190.6903076171875,
9316
+ "loss": 0.4288,
9317
+ "rewards/accuracies": 0.71875,
9318
+ "rewards/chosen": -0.6599161624908447,
9319
+ "rewards/margins": 0.7817404270172119,
9320
+ "rewards/rejected": -1.4416565895080566,
9321
+ "step": 1216
9322
+ },
9323
+ {
9324
+ "epoch": 1.388924524267693,
9325
+ "grad_norm": 52.68799788674585,
9326
+ "learning_rate": 4.728071775210069e-08,
9327
+ "logits/chosen": -1.3100471496582031,
9328
+ "logits/rejected": -1.295668125152588,
9329
+ "logps/chosen": -157.36289978027344,
9330
+ "logps/rejected": -179.96127319335938,
9331
+ "loss": 0.4265,
9332
+ "rewards/accuracies": 0.75,
9333
+ "rewards/chosen": -0.36468327045440674,
9334
+ "rewards/margins": 0.9460710883140564,
9335
+ "rewards/rejected": -1.3107542991638184,
9336
+ "step": 1218
9337
+ },
9338
+ {
9339
+ "epoch": 1.3912051885111538,
9340
+ "grad_norm": 59.812629327754166,
9341
+ "learning_rate": 4.695790918802576e-08,
9342
+ "logits/chosen": -1.4171504974365234,
9343
+ "logits/rejected": -1.3959426879882812,
9344
+ "logps/chosen": -226.54220581054688,
9345
+ "logps/rejected": -242.77142333984375,
9346
+ "loss": 0.4516,
9347
+ "rewards/accuracies": 0.71875,
9348
+ "rewards/chosen": -0.6130508184432983,
9349
+ "rewards/margins": 1.1557624340057373,
9350
+ "rewards/rejected": -1.768813133239746,
9351
+ "step": 1220
9352
+ },
9353
+ {
9354
+ "epoch": 1.3934858527546148,
9355
+ "grad_norm": 66.91529385196273,
9356
+ "learning_rate": 4.663586791434628e-08,
9357
+ "logits/chosen": -1.1362406015396118,
9358
+ "logits/rejected": -1.2372556924819946,
9359
+ "logps/chosen": -182.88442993164062,
9360
+ "logps/rejected": -217.1130828857422,
9361
+ "loss": 0.4164,
9362
+ "rewards/accuracies": 0.78125,
9363
+ "rewards/chosen": -0.5246292352676392,
9364
+ "rewards/margins": 1.0273668766021729,
9365
+ "rewards/rejected": -1.551996111869812,
9366
+ "step": 1222
9367
+ },
9368
+ {
9369
+ "epoch": 1.3957665169980757,
9370
+ "grad_norm": 57.29717940934624,
9371
+ "learning_rate": 4.631459858961122e-08,
9372
+ "logits/chosen": -1.219807744026184,
9373
+ "logits/rejected": -1.2460401058197021,
9374
+ "logps/chosen": -153.93907165527344,
9375
+ "logps/rejected": -200.4837188720703,
9376
+ "loss": 0.4358,
9377
+ "rewards/accuracies": 0.84375,
9378
+ "rewards/chosen": -0.2589726746082306,
9379
+ "rewards/margins": 1.4164376258850098,
9380
+ "rewards/rejected": -1.675410509109497,
9381
+ "step": 1224
9382
+ },
9383
+ {
9384
+ "epoch": 1.3980471812415365,
9385
+ "grad_norm": 61.08923102786796,
9386
+ "learning_rate": 4.5994105861202715e-08,
9387
+ "logits/chosen": -1.3403871059417725,
9388
+ "logits/rejected": -1.3382725715637207,
9389
+ "logps/chosen": -147.61839294433594,
9390
+ "logps/rejected": -150.13453674316406,
9391
+ "loss": 0.4522,
9392
+ "rewards/accuracies": 0.875,
9393
+ "rewards/chosen": -0.403720498085022,
9394
+ "rewards/margins": 0.7644599080085754,
9395
+ "rewards/rejected": -1.1681804656982422,
9396
+ "step": 1226
9397
+ },
9398
+ {
9399
+ "epoch": 1.4003278454849974,
9400
+ "grad_norm": 55.589268418781224,
9401
+ "learning_rate": 4.5674394365268965e-08,
9402
+ "logits/chosen": -1.1604863405227661,
9403
+ "logits/rejected": -1.2054895162582397,
9404
+ "logps/chosen": -169.14215087890625,
9405
+ "logps/rejected": -203.62513732910156,
9406
+ "loss": 0.4333,
9407
+ "rewards/accuracies": 0.78125,
9408
+ "rewards/chosen": -0.32466423511505127,
9409
+ "rewards/margins": 1.318765640258789,
9410
+ "rewards/rejected": -1.6434298753738403,
9411
+ "step": 1228
9412
+ },
9413
+ {
9414
+ "epoch": 1.4026085097284584,
9415
+ "grad_norm": 50.39174380587635,
9416
+ "learning_rate": 4.535546872665707e-08,
9417
+ "logits/chosen": -1.2403908967971802,
9418
+ "logits/rejected": -1.3284348249435425,
9419
+ "logps/chosen": -154.95285034179688,
9420
+ "logps/rejected": -183.48558044433594,
9421
+ "loss": 0.5428,
9422
+ "rewards/accuracies": 0.75,
9423
+ "rewards/chosen": -0.2970479428768158,
9424
+ "rewards/margins": 0.813206672668457,
9425
+ "rewards/rejected": -1.1102546453475952,
9426
+ "step": 1230
9427
+ },
9428
+ {
9429
+ "epoch": 1.4048891739719194,
9430
+ "grad_norm": 64.62636544867223,
9431
+ "learning_rate": 4.5037333558846145e-08,
9432
+ "logits/chosen": -1.1855900287628174,
9433
+ "logits/rejected": -1.1975244283676147,
9434
+ "logps/chosen": -102.4107437133789,
9435
+ "logps/rejected": -124.4788589477539,
9436
+ "loss": 0.4184,
9437
+ "rewards/accuracies": 0.84375,
9438
+ "rewards/chosen": -0.20104435086250305,
9439
+ "rewards/margins": 0.7573148608207703,
9440
+ "rewards/rejected": -0.9583592414855957,
9441
+ "step": 1232
9442
+ },
9443
+ {
9444
+ "epoch": 1.4071698382153803,
9445
+ "grad_norm": 65.48395677041327,
9446
+ "learning_rate": 4.471999346388069e-08,
9447
+ "logits/chosen": -1.2656984329223633,
9448
+ "logits/rejected": -1.2520796060562134,
9449
+ "logps/chosen": -145.40838623046875,
9450
+ "logps/rejected": -172.25161743164062,
9451
+ "loss": 0.4563,
9452
+ "rewards/accuracies": 0.875,
9453
+ "rewards/chosen": -0.48840075731277466,
9454
+ "rewards/margins": 0.857879102230072,
9455
+ "rewards/rejected": -1.3462798595428467,
9456
+ "step": 1234
9457
+ },
9458
+ {
9459
+ "epoch": 1.409450502458841,
9460
+ "grad_norm": 53.15387042970869,
9461
+ "learning_rate": 4.4403453032303765e-08,
9462
+ "logits/chosen": -1.393466591835022,
9463
+ "logits/rejected": -1.4557361602783203,
9464
+ "logps/chosen": -160.5977020263672,
9465
+ "logps/rejected": -189.77520751953125,
9466
+ "loss": 0.4181,
9467
+ "rewards/accuracies": 0.90625,
9468
+ "rewards/chosen": -0.379285603761673,
9469
+ "rewards/margins": 1.075732946395874,
9470
+ "rewards/rejected": -1.4550185203552246,
9471
+ "step": 1236
9472
+ },
9473
+ {
9474
+ "epoch": 1.411731166702302,
9475
+ "grad_norm": 63.15445502776712,
9476
+ "learning_rate": 4.4087716843090895e-08,
9477
+ "logits/chosen": -1.3475301265716553,
9478
+ "logits/rejected": -1.3905658721923828,
9479
+ "logps/chosen": -166.34042358398438,
9480
+ "logps/rejected": -213.61553955078125,
9481
+ "loss": 0.461,
9482
+ "rewards/accuracies": 0.75,
9483
+ "rewards/chosen": -0.3821715712547302,
9484
+ "rewards/margins": 0.9115235805511475,
9485
+ "rewards/rejected": -1.2936952114105225,
9486
+ "step": 1238
9487
+ },
9488
+ {
9489
+ "epoch": 1.414011830945763,
9490
+ "grad_norm": 52.37085158764497,
9491
+ "learning_rate": 4.3772789463583627e-08,
9492
+ "logits/chosen": -1.3224272727966309,
9493
+ "logits/rejected": -1.3503855466842651,
9494
+ "logps/chosen": -172.18899536132812,
9495
+ "logps/rejected": -193.6123504638672,
9496
+ "loss": 0.4386,
9497
+ "rewards/accuracies": 0.78125,
9498
+ "rewards/chosen": -0.44380372762680054,
9499
+ "rewards/margins": 0.857738733291626,
9500
+ "rewards/rejected": -1.3015424013137817,
9501
+ "step": 1240
9502
+ },
9503
+ {
9504
+ "epoch": 1.4162924951892237,
9505
+ "grad_norm": 64.55355325183278,
9506
+ "learning_rate": 4.345867544942353e-08,
9507
+ "logits/chosen": -1.2631657123565674,
9508
+ "logits/rejected": -1.3294970989227295,
9509
+ "logps/chosen": -181.64718627929688,
9510
+ "logps/rejected": -230.25701904296875,
9511
+ "loss": 0.4121,
9512
+ "rewards/accuracies": 0.71875,
9513
+ "rewards/chosen": -0.5138639211654663,
9514
+ "rewards/margins": 0.8278074264526367,
9515
+ "rewards/rejected": -1.341671347618103,
9516
+ "step": 1242
9517
+ },
9518
+ {
9519
+ "epoch": 1.4185731594326847,
9520
+ "grad_norm": 65.60045840124785,
9521
+ "learning_rate": 4.314537934448628e-08,
9522
+ "logits/chosen": -1.2260847091674805,
9523
+ "logits/rejected": -1.2379428148269653,
9524
+ "logps/chosen": -175.8683624267578,
9525
+ "logps/rejected": -202.67051696777344,
9526
+ "loss": 0.4525,
9527
+ "rewards/accuracies": 0.875,
9528
+ "rewards/chosen": -0.4962596893310547,
9529
+ "rewards/margins": 1.1486889123916626,
9530
+ "rewards/rejected": -1.6449487209320068,
9531
+ "step": 1244
9532
+ },
9533
+ {
9534
+ "epoch": 1.4208538236761457,
9535
+ "grad_norm": 77.13313632492196,
9536
+ "learning_rate": 4.283290568081591e-08,
9537
+ "logits/chosen": -1.1557011604309082,
9538
+ "logits/rejected": -1.2198400497436523,
9539
+ "logps/chosen": -171.91201782226562,
9540
+ "logps/rejected": -200.71144104003906,
9541
+ "loss": 0.4431,
9542
+ "rewards/accuracies": 0.84375,
9543
+ "rewards/chosen": -0.4828857481479645,
9544
+ "rewards/margins": 0.8654166460037231,
9545
+ "rewards/rejected": -1.3483023643493652,
9546
+ "step": 1246
9547
+ },
9548
+ {
9549
+ "epoch": 1.4231344879196066,
9550
+ "grad_norm": 55.00661044224625,
9551
+ "learning_rate": 4.2521258978559314e-08,
9552
+ "logits/chosen": -1.258105993270874,
9553
+ "logits/rejected": -1.282645583152771,
9554
+ "logps/chosen": -180.73272705078125,
9555
+ "logps/rejected": -241.80764770507812,
9556
+ "loss": 0.4077,
9557
+ "rewards/accuracies": 0.875,
9558
+ "rewards/chosen": -0.5227749347686768,
9559
+ "rewards/margins": 1.5435858964920044,
9560
+ "rewards/rejected": -2.0663607120513916,
9561
+ "step": 1248
9562
+ },
9563
+ {
9564
+ "epoch": 1.4254151521630676,
9565
+ "grad_norm": 53.935655347865456,
9566
+ "learning_rate": 4.2210443745900804e-08,
9567
+ "logits/chosen": -1.1817071437835693,
9568
+ "logits/rejected": -1.247178077697754,
9569
+ "logps/chosen": -151.1819610595703,
9570
+ "logps/rejected": -170.9989776611328,
9571
+ "loss": 0.4273,
9572
+ "rewards/accuracies": 0.8125,
9573
+ "rewards/chosen": -0.3710220456123352,
9574
+ "rewards/margins": 0.9180817604064941,
9575
+ "rewards/rejected": -1.2891038656234741,
9576
+ "step": 1250
9577
+ },
9578
+ {
9579
+ "epoch": 1.4276958164065285,
9580
+ "grad_norm": 57.828569007499375,
9581
+ "learning_rate": 4.190046447899689e-08,
9582
+ "logits/chosen": -1.2078405618667603,
9583
+ "logits/rejected": -1.2860413789749146,
9584
+ "logps/chosen": -141.80389404296875,
9585
+ "logps/rejected": -168.6719512939453,
9586
+ "loss": 0.4219,
9587
+ "rewards/accuracies": 0.75,
9588
+ "rewards/chosen": -0.4949069023132324,
9589
+ "rewards/margins": 0.8605579137802124,
9590
+ "rewards/rejected": -1.3554648160934448,
9591
+ "step": 1252
9592
+ },
9593
+ {
9594
+ "epoch": 1.4299764806499893,
9595
+ "grad_norm": 49.820693081313166,
9596
+ "learning_rate": 4.159132566191129e-08,
9597
+ "logits/chosen": -1.3393913507461548,
9598
+ "logits/rejected": -1.393333911895752,
9599
+ "logps/chosen": -139.9639434814453,
9600
+ "logps/rejected": -185.2383575439453,
9601
+ "loss": 0.401,
9602
+ "rewards/accuracies": 0.78125,
9603
+ "rewards/chosen": -0.3605578541755676,
9604
+ "rewards/margins": 1.1692156791687012,
9605
+ "rewards/rejected": -1.5297735929489136,
9606
+ "step": 1254
9607
+ },
9608
+ {
9609
+ "epoch": 1.4322571448934502,
9610
+ "grad_norm": 56.66007237622535,
9611
+ "learning_rate": 4.1283031766550014e-08,
9612
+ "logits/chosen": -1.159570336341858,
9613
+ "logits/rejected": -1.2179524898529053,
9614
+ "logps/chosen": -167.4163818359375,
9615
+ "logps/rejected": -245.31614685058594,
9616
+ "loss": 0.4344,
9617
+ "rewards/accuracies": 0.75,
9618
+ "rewards/chosen": -0.5021023750305176,
9619
+ "rewards/margins": 1.504926085472107,
9620
+ "rewards/rejected": -2.007028341293335,
9621
+ "step": 1256
9622
+ },
9623
+ {
9624
+ "epoch": 1.4345378091369112,
9625
+ "grad_norm": 70.62489205877687,
9626
+ "learning_rate": 4.097558725259672e-08,
9627
+ "logits/chosen": -1.33053457736969,
9628
+ "logits/rejected": -1.388944149017334,
9629
+ "logps/chosen": -178.24188232421875,
9630
+ "logps/rejected": -234.7947998046875,
9631
+ "loss": 0.3998,
9632
+ "rewards/accuracies": 0.78125,
9633
+ "rewards/chosen": -0.40949881076812744,
9634
+ "rewards/margins": 1.050101399421692,
9635
+ "rewards/rejected": -1.4596002101898193,
9636
+ "step": 1258
9637
+ },
9638
+ {
9639
+ "epoch": 1.436818473380372,
9640
+ "grad_norm": 66.98990507182315,
9641
+ "learning_rate": 4.0668996567448154e-08,
9642
+ "logits/chosen": -1.414581060409546,
9643
+ "logits/rejected": -1.4014796018600464,
9644
+ "logps/chosen": -169.44393920898438,
9645
+ "logps/rejected": -187.62033081054688,
9646
+ "loss": 0.4344,
9647
+ "rewards/accuracies": 0.65625,
9648
+ "rewards/chosen": -0.3710756003856659,
9649
+ "rewards/margins": 0.5481195449829102,
9650
+ "rewards/rejected": -0.9191950559616089,
9651
+ "step": 1260
9652
+ },
9653
+ {
9654
+ "epoch": 1.439099137623833,
9655
+ "grad_norm": 57.85916556533107,
9656
+ "learning_rate": 4.0363264146149844e-08,
9657
+ "logits/chosen": -1.191425085067749,
9658
+ "logits/rejected": -1.2502682209014893,
9659
+ "logps/chosen": -190.24957275390625,
9660
+ "logps/rejected": -221.98013305664062,
9661
+ "loss": 0.4816,
9662
+ "rewards/accuracies": 0.875,
9663
+ "rewards/chosen": -0.5489203333854675,
9664
+ "rewards/margins": 1.0626386404037476,
9665
+ "rewards/rejected": -1.6115591526031494,
9666
+ "step": 1262
9667
+ },
9668
+ {
9669
+ "epoch": 1.4413798018672939,
9670
+ "grad_norm": 54.90589553967945,
9671
+ "learning_rate": 4.005839441133198e-08,
9672
+ "logits/chosen": -1.243033766746521,
9673
+ "logits/rejected": -1.3693134784698486,
9674
+ "logps/chosen": -153.51661682128906,
9675
+ "logps/rejected": -217.17115783691406,
9676
+ "loss": 0.4357,
9677
+ "rewards/accuracies": 0.75,
9678
+ "rewards/chosen": -0.41171663999557495,
9679
+ "rewards/margins": 1.114880084991455,
9680
+ "rewards/rejected": -1.5265967845916748,
9681
+ "step": 1264
9682
+ },
9683
+ {
9684
+ "epoch": 1.4436604661107548,
9685
+ "grad_norm": 62.949403049310924,
9686
+ "learning_rate": 3.9754391773145326e-08,
9687
+ "logits/chosen": -1.3396437168121338,
9688
+ "logits/rejected": -1.3314851522445679,
9689
+ "logps/chosen": -195.9013671875,
9690
+ "logps/rejected": -226.99224853515625,
9691
+ "loss": 0.4549,
9692
+ "rewards/accuracies": 0.78125,
9693
+ "rewards/chosen": -0.6362702250480652,
9694
+ "rewards/margins": 0.9720792174339294,
9695
+ "rewards/rejected": -1.6083494424819946,
9696
+ "step": 1266
9697
+ },
9698
+ {
9699
+ "epoch": 1.4459411303542158,
9700
+ "grad_norm": 109.10943345175065,
9701
+ "learning_rate": 3.945126062919756e-08,
9702
+ "logits/chosen": -1.4142718315124512,
9703
+ "logits/rejected": -1.3863128423690796,
9704
+ "logps/chosen": -232.01536560058594,
9705
+ "logps/rejected": -258.9195861816406,
9706
+ "loss": 0.4683,
9707
+ "rewards/accuracies": 0.84375,
9708
+ "rewards/chosen": -0.5452659130096436,
9709
+ "rewards/margins": 1.1544592380523682,
9710
+ "rewards/rejected": -1.6997252702713013,
9711
+ "step": 1268
9712
+ },
9713
+ {
9714
+ "epoch": 1.4482217945976765,
9715
+ "grad_norm": 66.47161188134956,
9716
+ "learning_rate": 3.914900536448959e-08,
9717
+ "logits/chosen": -1.277639389038086,
9718
+ "logits/rejected": -1.2443594932556152,
9719
+ "logps/chosen": -167.44473266601562,
9720
+ "logps/rejected": -201.05442810058594,
9721
+ "loss": 0.468,
9722
+ "rewards/accuracies": 0.75,
9723
+ "rewards/chosen": -0.40504151582717896,
9724
+ "rewards/margins": 1.4868779182434082,
9725
+ "rewards/rejected": -1.8919193744659424,
9726
+ "step": 1270
9727
+ },
9728
+ {
9729
+ "epoch": 1.4505024588411375,
9730
+ "grad_norm": 65.21617644771925,
9731
+ "learning_rate": 3.8847630351352045e-08,
9732
+ "logits/chosen": -1.2669049501419067,
9733
+ "logits/rejected": -1.2787154912948608,
9734
+ "logps/chosen": -244.93118286132812,
9735
+ "logps/rejected": -307.34344482421875,
9736
+ "loss": 0.4429,
9737
+ "rewards/accuracies": 0.78125,
9738
+ "rewards/chosen": -0.9488785862922668,
9739
+ "rewards/margins": 1.5935635566711426,
9740
+ "rewards/rejected": -2.5424418449401855,
9741
+ "step": 1272
9742
+ },
9743
+ {
9744
+ "epoch": 1.4527831230845984,
9745
+ "grad_norm": 67.80268214039134,
9746
+ "learning_rate": 3.854713994938221e-08,
9747
+ "logits/chosen": -1.3307723999023438,
9748
+ "logits/rejected": -1.400517463684082,
9749
+ "logps/chosen": -156.8104248046875,
9750
+ "logps/rejected": -182.17807006835938,
9751
+ "loss": 0.4631,
9752
+ "rewards/accuracies": 0.78125,
9753
+ "rewards/chosen": -0.2960550785064697,
9754
+ "rewards/margins": 0.9029641151428223,
9755
+ "rewards/rejected": -1.1990193128585815,
9756
+ "step": 1274
9757
+ },
9758
+ {
9759
+ "epoch": 1.4550637873280592,
9760
+ "grad_norm": 66.08251651441701,
9761
+ "learning_rate": 3.8247538505380816e-08,
9762
+ "logits/chosen": -1.3477903604507446,
9763
+ "logits/rejected": -1.4483450651168823,
9764
+ "logps/chosen": -194.99876403808594,
9765
+ "logps/rejected": -235.32986450195312,
9766
+ "loss": 0.4232,
9767
+ "rewards/accuracies": 0.78125,
9768
+ "rewards/chosen": -0.44531339406967163,
9769
+ "rewards/margins": 0.8939595222473145,
9770
+ "rewards/rejected": -1.3392727375030518,
9771
+ "step": 1276
9772
+ },
9773
+ {
9774
+ "epoch": 1.4573444515715201,
9775
+ "grad_norm": 64.19995573754183,
9776
+ "learning_rate": 3.794883035328921e-08,
9777
+ "logits/chosen": -1.2755396366119385,
9778
+ "logits/rejected": -1.3425655364990234,
9779
+ "logps/chosen": -186.4855499267578,
9780
+ "logps/rejected": -233.87648010253906,
9781
+ "loss": 0.4074,
9782
+ "rewards/accuracies": 0.84375,
9783
+ "rewards/chosen": -0.3420637249946594,
9784
+ "rewards/margins": 1.2022613286972046,
9785
+ "rewards/rejected": -1.5443251132965088,
9786
+ "step": 1278
9787
+ },
9788
+ {
9789
+ "epoch": 1.4596251158149811,
9790
+ "grad_norm": 51.505850800471386,
9791
+ "learning_rate": 3.765101981412665e-08,
9792
+ "logits/chosen": -1.0226508378982544,
9793
+ "logits/rejected": -1.1150188446044922,
9794
+ "logps/chosen": -165.54244995117188,
9795
+ "logps/rejected": -209.08641052246094,
9796
+ "loss": 0.4498,
9797
+ "rewards/accuracies": 0.78125,
9798
+ "rewards/chosen": -0.6313174962997437,
9799
+ "rewards/margins": 0.6761065721511841,
9800
+ "rewards/rejected": -1.3074240684509277,
9801
+ "step": 1280
9802
+ },
9803
+ {
9804
+ "epoch": 1.461905780058442,
9805
+ "grad_norm": 65.45552204323278,
9806
+ "learning_rate": 3.735411119592782e-08,
9807
+ "logits/chosen": -1.1841048002243042,
9808
+ "logits/rejected": -1.1225578784942627,
9809
+ "logps/chosen": -210.95909118652344,
9810
+ "logps/rejected": -228.45896911621094,
9811
+ "loss": 0.4092,
9812
+ "rewards/accuracies": 0.90625,
9813
+ "rewards/chosen": -0.6781859397888184,
9814
+ "rewards/margins": 1.1642651557922363,
9815
+ "rewards/rejected": -1.8424510955810547,
9816
+ "step": 1282
9817
+ },
9818
+ {
9819
+ "epoch": 1.464186444301903,
9820
+ "grad_norm": 54.509138851706474,
9821
+ "learning_rate": 3.705810879368047e-08,
9822
+ "logits/chosen": -1.260365605354309,
9823
+ "logits/rejected": -1.2909530401229858,
9824
+ "logps/chosen": -191.3780517578125,
9825
+ "logps/rejected": -205.57510375976562,
9826
+ "loss": 0.4331,
9827
+ "rewards/accuracies": 0.84375,
9828
+ "rewards/chosen": -0.3257947564125061,
9829
+ "rewards/margins": 1.1021668910980225,
9830
+ "rewards/rejected": -1.4279615879058838,
9831
+ "step": 1284
9832
+ },
9833
+ {
9834
+ "epoch": 1.4664671085453638,
9835
+ "grad_norm": 62.120618048817526,
9836
+ "learning_rate": 3.6763016889263345e-08,
9837
+ "logits/chosen": -1.233807921409607,
9838
+ "logits/rejected": -1.1972962617874146,
9839
+ "logps/chosen": -130.00839233398438,
9840
+ "logps/rejected": -157.5312957763672,
9841
+ "loss": 0.4483,
9842
+ "rewards/accuracies": 0.78125,
9843
+ "rewards/chosen": -0.3486970067024231,
9844
+ "rewards/margins": 0.8312156796455383,
9845
+ "rewards/rejected": -1.1799125671386719,
9846
+ "step": 1286
9847
+ },
9848
+ {
9849
+ "epoch": 1.4687477727888247,
9850
+ "grad_norm": 62.47302706565703,
9851
+ "learning_rate": 3.6468839751384206e-08,
9852
+ "logits/chosen": -1.2912284135818481,
9853
+ "logits/rejected": -1.286245584487915,
9854
+ "logps/chosen": -225.38636779785156,
9855
+ "logps/rejected": -253.08624267578125,
9856
+ "loss": 0.4193,
9857
+ "rewards/accuracies": 0.8125,
9858
+ "rewards/chosen": -0.7340028285980225,
9859
+ "rewards/margins": 1.2588945627212524,
9860
+ "rewards/rejected": -1.9928972721099854,
9861
+ "step": 1288
9862
+ },
9863
+ {
9864
+ "epoch": 1.4710284370322857,
9865
+ "grad_norm": 56.815010583661,
9866
+ "learning_rate": 3.6175581635518015e-08,
9867
+ "logits/chosen": -1.3371250629425049,
9868
+ "logits/rejected": -1.3072669506072998,
9869
+ "logps/chosen": -188.99107360839844,
9870
+ "logps/rejected": -214.9119873046875,
9871
+ "loss": 0.4364,
9872
+ "rewards/accuracies": 0.875,
9873
+ "rewards/chosen": -0.46143385767936707,
9874
+ "rewards/margins": 1.1111385822296143,
9875
+ "rewards/rejected": -1.5725722312927246,
9876
+ "step": 1290
9877
+ },
9878
+ {
9879
+ "epoch": 1.4733091012757464,
9880
+ "grad_norm": 74.02679784754338,
9881
+ "learning_rate": 3.5883246783845543e-08,
9882
+ "logits/chosen": -1.2495771646499634,
9883
+ "logits/rejected": -1.2960941791534424,
9884
+ "logps/chosen": -157.36351013183594,
9885
+ "logps/rejected": -196.3470916748047,
9886
+ "loss": 0.4401,
9887
+ "rewards/accuracies": 0.84375,
9888
+ "rewards/chosen": -0.23140710592269897,
9889
+ "rewards/margins": 1.0461037158966064,
9890
+ "rewards/rejected": -1.2775108814239502,
9891
+ "step": 1292
9892
+ },
9893
+ {
9894
+ "epoch": 1.4755897655192074,
9895
+ "grad_norm": 61.44266871054178,
9896
+ "learning_rate": 3.559183942519188e-08,
9897
+ "logits/chosen": -1.3195384740829468,
9898
+ "logits/rejected": -1.3464099168777466,
9899
+ "logps/chosen": -170.23886108398438,
9900
+ "logps/rejected": -182.47579956054688,
9901
+ "loss": 0.4044,
9902
+ "rewards/accuracies": 0.75,
9903
+ "rewards/chosen": -0.5699801445007324,
9904
+ "rewards/margins": 0.663261890411377,
9905
+ "rewards/rejected": -1.233242154121399,
9906
+ "step": 1294
9907
+ },
9908
+ {
9909
+ "epoch": 1.4778704297626684,
9910
+ "grad_norm": 65.18540943608312,
9911
+ "learning_rate": 3.530136377496525e-08,
9912
+ "logits/chosen": -1.278255581855774,
9913
+ "logits/rejected": -1.2963995933532715,
9914
+ "logps/chosen": -214.0897216796875,
9915
+ "logps/rejected": -238.54718017578125,
9916
+ "loss": 0.4458,
9917
+ "rewards/accuracies": 0.90625,
9918
+ "rewards/chosen": -0.5689273476600647,
9919
+ "rewards/margins": 1.0575151443481445,
9920
+ "rewards/rejected": -1.6264426708221436,
9921
+ "step": 1296
9922
+ },
9923
+ {
9924
+ "epoch": 1.4801510940061293,
9925
+ "grad_norm": 56.51869240720061,
9926
+ "learning_rate": 3.50118240350961e-08,
9927
+ "logits/chosen": -1.2410857677459717,
9928
+ "logits/rejected": -1.3088514804840088,
9929
+ "logps/chosen": -158.88926696777344,
9930
+ "logps/rejected": -195.86138916015625,
9931
+ "loss": 0.449,
9932
+ "rewards/accuracies": 0.625,
9933
+ "rewards/chosen": -0.5141651630401611,
9934
+ "rewards/margins": 0.7266778945922852,
9935
+ "rewards/rejected": -1.2408430576324463,
9936
+ "step": 1298
9937
+ },
9938
+ {
9939
+ "epoch": 1.4824317582495903,
9940
+ "grad_norm": 69.4034723256464,
9941
+ "learning_rate": 3.472322439397635e-08,
9942
+ "logits/chosen": -1.3286279439926147,
9943
+ "logits/rejected": -1.384574294090271,
9944
+ "logps/chosen": -219.34544372558594,
9945
+ "logps/rejected": -237.2283172607422,
9946
+ "loss": 0.4893,
9947
+ "rewards/accuracies": 0.53125,
9948
+ "rewards/chosen": -0.6901402473449707,
9949
+ "rewards/margins": 0.6859029531478882,
9950
+ "rewards/rejected": -1.3760432004928589,
9951
+ "step": 1300
9952
+ },
9953
+ {
9954
+ "epoch": 1.4824317582495903,
9955
+ "eval_logits/chosen": -1.3521403074264526,
9956
+ "eval_logits/rejected": -1.3340317010879517,
9957
+ "eval_logps/chosen": -131.97569274902344,
9958
+ "eval_logps/rejected": -138.84446716308594,
9959
+ "eval_loss": 0.5475608110427856,
9960
+ "eval_rewards/accuracies": 0.7200000286102295,
9961
+ "eval_rewards/chosen": -0.20777291059494019,
9962
+ "eval_rewards/margins": 0.47040116786956787,
9963
+ "eval_rewards/rejected": -0.6781739592552185,
9964
+ "eval_runtime": 21.6329,
9965
+ "eval_samples_per_second": 4.623,
9966
+ "eval_steps_per_second": 1.156,
9967
+ "step": 1300
9968
+ },
9969
+ {
9970
+ "epoch": 1.484712422493051,
9971
+ "grad_norm": 66.5561433557891,
9972
+ "learning_rate": 3.4435569026398645e-08,
9973
+ "logits/chosen": -1.1897640228271484,
9974
+ "logits/rejected": -1.3370938301086426,
9975
+ "logps/chosen": -141.83460998535156,
9976
+ "logps/rejected": -182.74176025390625,
9977
+ "loss": 0.4463,
9978
+ "rewards/accuracies": 0.8125,
9979
+ "rewards/chosen": -0.18475398421287537,
9980
+ "rewards/margins": 0.82102370262146,
9981
+ "rewards/rejected": -1.0057775974273682,
9982
+ "step": 1302
9983
+ },
9984
+ {
9985
+ "epoch": 1.486993086736512,
9986
+ "grad_norm": 48.88961410554484,
9987
+ "learning_rate": 3.4148862093496145e-08,
9988
+ "logits/chosen": -1.2648987770080566,
9989
+ "logits/rejected": -1.274294376373291,
9990
+ "logps/chosen": -161.31605529785156,
9991
+ "logps/rejected": -165.04278564453125,
9992
+ "loss": 0.3889,
9993
+ "rewards/accuracies": 0.75,
9994
+ "rewards/chosen": -0.4667995870113373,
9995
+ "rewards/margins": 0.7909737229347229,
9996
+ "rewards/rejected": -1.2577731609344482,
9997
+ "step": 1304
9998
+ },
9999
+ {
10000
+ "epoch": 1.489273750979973,
10001
+ "grad_norm": 67.73951681369277,
10002
+ "learning_rate": 3.386310774268214e-08,
10003
+ "logits/chosen": -1.2678455114364624,
10004
+ "logits/rejected": -1.320731520652771,
10005
+ "logps/chosen": -204.93576049804688,
10006
+ "logps/rejected": -220.75344848632812,
10007
+ "loss": 0.433,
10008
+ "rewards/accuracies": 0.84375,
10009
+ "rewards/chosen": -0.5178290605545044,
10010
+ "rewards/margins": 0.9241759777069092,
10011
+ "rewards/rejected": -1.4420050382614136,
10012
+ "step": 1306
10013
+ },
10014
+ {
10015
+ "epoch": 1.491554415223434,
10016
+ "grad_norm": 53.60885013196278,
10017
+ "learning_rate": 3.3578310107590255e-08,
10018
+ "logits/chosen": -1.2518330812454224,
10019
+ "logits/rejected": -1.3134666681289673,
10020
+ "logps/chosen": -128.73098754882812,
10021
+ "logps/rejected": -141.5028839111328,
10022
+ "loss": 0.4175,
10023
+ "rewards/accuracies": 0.78125,
10024
+ "rewards/chosen": -0.4798870086669922,
10025
+ "rewards/margins": 0.7654281854629517,
10026
+ "rewards/rejected": -1.2453151941299438,
10027
+ "step": 1308
10028
+ },
10029
+ {
10030
+ "epoch": 1.4938350794668946,
10031
+ "grad_norm": 64.42291386610013,
10032
+ "learning_rate": 3.329447330801455e-08,
10033
+ "logits/chosen": -1.2452740669250488,
10034
+ "logits/rejected": -1.2846417427062988,
10035
+ "logps/chosen": -127.42951202392578,
10036
+ "logps/rejected": -177.83033752441406,
10037
+ "loss": 0.4905,
10038
+ "rewards/accuracies": 0.78125,
10039
+ "rewards/chosen": -0.4574730694293976,
10040
+ "rewards/margins": 1.024438500404358,
10041
+ "rewards/rejected": -1.481911540031433,
10042
+ "step": 1310
10043
+ },
10044
+ {
10045
+ "epoch": 1.4961157437103556,
10046
+ "grad_norm": 60.99913378119905,
10047
+ "learning_rate": 3.3011601449849914e-08,
10048
+ "logits/chosen": -1.2262144088745117,
10049
+ "logits/rejected": -1.200211763381958,
10050
+ "logps/chosen": -175.07473754882812,
10051
+ "logps/rejected": -194.3573455810547,
10052
+ "loss": 0.419,
10053
+ "rewards/accuracies": 0.78125,
10054
+ "rewards/chosen": -0.5272756814956665,
10055
+ "rewards/margins": 1.1627264022827148,
10056
+ "rewards/rejected": -1.6900020837783813,
10057
+ "step": 1312
10058
+ },
10059
+ {
10060
+ "epoch": 1.4983964079538166,
10061
+ "grad_norm": 61.30357952779087,
10062
+ "learning_rate": 3.272969862503271e-08,
10063
+ "logits/chosen": -1.258878231048584,
10064
+ "logits/rejected": -1.3279513120651245,
10065
+ "logps/chosen": -169.57151794433594,
10066
+ "logps/rejected": -205.15086364746094,
10067
+ "loss": 0.4254,
10068
+ "rewards/accuracies": 0.9375,
10069
+ "rewards/chosen": -0.45883575081825256,
10070
+ "rewards/margins": 1.1377463340759277,
10071
+ "rewards/rejected": -1.5965821743011475,
10072
+ "step": 1314
10073
+ },
10074
+ {
10075
+ "epoch": 1.5006770721972775,
10076
+ "grad_norm": 60.7970290747735,
10077
+ "learning_rate": 3.2448768911481574e-08,
10078
+ "logits/chosen": -1.3344089984893799,
10079
+ "logits/rejected": -1.3924615383148193,
10080
+ "logps/chosen": -242.09625244140625,
10081
+ "logps/rejected": -284.8103942871094,
10082
+ "loss": 0.3898,
10083
+ "rewards/accuracies": 0.90625,
10084
+ "rewards/chosen": -0.42801302671432495,
10085
+ "rewards/margins": 1.6990736722946167,
10086
+ "rewards/rejected": -2.127086639404297,
10087
+ "step": 1316
10088
+ },
10089
+ {
10090
+ "epoch": 1.5029577364407385,
10091
+ "grad_norm": 58.23756711117375,
10092
+ "learning_rate": 3.216881637303839e-08,
10093
+ "logits/chosen": -1.2963494062423706,
10094
+ "logits/rejected": -1.2964308261871338,
10095
+ "logps/chosen": -194.5859375,
10096
+ "logps/rejected": -207.51177978515625,
10097
+ "loss": 0.4461,
10098
+ "rewards/accuracies": 0.84375,
10099
+ "rewards/chosen": -0.37401753664016724,
10100
+ "rewards/margins": 0.9556913375854492,
10101
+ "rewards/rejected": -1.3297089338302612,
10102
+ "step": 1318
10103
+ },
10104
+ {
10105
+ "epoch": 1.5052384006841992,
10106
+ "grad_norm": 52.85976726825495,
10107
+ "learning_rate": 3.188984505940955e-08,
10108
+ "logits/chosen": -1.310511589050293,
10109
+ "logits/rejected": -1.3698493242263794,
10110
+ "logps/chosen": -155.9166717529297,
10111
+ "logps/rejected": -185.9949951171875,
10112
+ "loss": 0.4608,
10113
+ "rewards/accuracies": 0.78125,
10114
+ "rewards/chosen": -0.28941965103149414,
10115
+ "rewards/margins": 0.9209386110305786,
10116
+ "rewards/rejected": -1.2103582620620728,
10117
+ "step": 1320
10118
+ },
10119
+ {
10120
+ "epoch": 1.5075190649276602,
10121
+ "grad_norm": 65.51821056925988,
10122
+ "learning_rate": 3.161185900610737e-08,
10123
+ "logits/chosen": -1.2264206409454346,
10124
+ "logits/rejected": -1.2563592195510864,
10125
+ "logps/chosen": -169.9464111328125,
10126
+ "logps/rejected": -215.42799377441406,
10127
+ "loss": 0.4746,
10128
+ "rewards/accuracies": 0.84375,
10129
+ "rewards/chosen": -0.45204079151153564,
10130
+ "rewards/margins": 0.9063868522644043,
10131
+ "rewards/rejected": -1.35842764377594,
10132
+ "step": 1322
10133
+ },
10134
+ {
10135
+ "epoch": 1.509799729171121,
10136
+ "grad_norm": 65.41148602924677,
10137
+ "learning_rate": 3.1334862234391624e-08,
10138
+ "logits/chosen": -1.1124111413955688,
10139
+ "logits/rejected": -1.2182986736297607,
10140
+ "logps/chosen": -168.18359375,
10141
+ "logps/rejected": -212.33782958984375,
10142
+ "loss": 0.3879,
10143
+ "rewards/accuracies": 0.9375,
10144
+ "rewards/chosen": -0.4647515118122101,
10145
+ "rewards/margins": 1.2126293182373047,
10146
+ "rewards/rejected": -1.677380919456482,
10147
+ "step": 1324
10148
+ },
10149
+ {
10150
+ "epoch": 1.512080393414582,
10151
+ "grad_norm": 56.95611387521237,
10152
+ "learning_rate": 3.105885875121151e-08,
10153
+ "logits/chosen": -1.2453256845474243,
10154
+ "logits/rejected": -1.3663585186004639,
10155
+ "logps/chosen": -183.23095703125,
10156
+ "logps/rejected": -234.28765869140625,
10157
+ "loss": 0.4144,
10158
+ "rewards/accuracies": 0.90625,
10159
+ "rewards/chosen": -0.22449856996536255,
10160
+ "rewards/margins": 1.4339529275894165,
10161
+ "rewards/rejected": -1.6584514379501343,
10162
+ "step": 1326
10163
+ },
10164
+ {
10165
+ "epoch": 1.5143610576580429,
10166
+ "grad_norm": 75.28120333419699,
10167
+ "learning_rate": 3.078385254914764e-08,
10168
+ "logits/chosen": -1.246031403541565,
10169
+ "logits/rejected": -1.3311541080474854,
10170
+ "logps/chosen": -181.19082641601562,
10171
+ "logps/rejected": -224.01809692382812,
10172
+ "loss": 0.4438,
10173
+ "rewards/accuracies": 0.71875,
10174
+ "rewards/chosen": -0.5463556051254272,
10175
+ "rewards/margins": 1.012133240699768,
10176
+ "rewards/rejected": -1.5584888458251953,
10177
+ "step": 1328
10178
+ },
10179
+ {
10180
+ "epoch": 1.5166417219015038,
10181
+ "grad_norm": 59.21659370389,
10182
+ "learning_rate": 3.0509847606354214e-08,
10183
+ "logits/chosen": -1.1559014320373535,
10184
+ "logits/rejected": -1.175613522529602,
10185
+ "logps/chosen": -175.91024780273438,
10186
+ "logps/rejected": -201.29910278320312,
10187
+ "loss": 0.4106,
10188
+ "rewards/accuracies": 0.9375,
10189
+ "rewards/chosen": -0.5821335315704346,
10190
+ "rewards/margins": 1.0530939102172852,
10191
+ "rewards/rejected": -1.6352273225784302,
10192
+ "step": 1330
10193
+ },
10194
+ {
10195
+ "epoch": 1.5189223861449648,
10196
+ "grad_norm": 65.02090257303733,
10197
+ "learning_rate": 3.0236847886501535e-08,
10198
+ "logits/chosen": -1.3365192413330078,
10199
+ "logits/rejected": -1.3719249963760376,
10200
+ "logps/chosen": -195.5978546142578,
10201
+ "logps/rejected": -219.9347686767578,
10202
+ "loss": 0.3962,
10203
+ "rewards/accuracies": 0.90625,
10204
+ "rewards/chosen": -0.3096795678138733,
10205
+ "rewards/margins": 1.1052062511444092,
10206
+ "rewards/rejected": -1.4148855209350586,
10207
+ "step": 1332
10208
+ },
10209
+ {
10210
+ "epoch": 1.5212030503884257,
10211
+ "grad_norm": 59.28308867818603,
10212
+ "learning_rate": 2.9964857338718716e-08,
10213
+ "logits/chosen": -1.357577919960022,
10214
+ "logits/rejected": -1.2968313694000244,
10215
+ "logps/chosen": -199.23211669921875,
10216
+ "logps/rejected": -185.07896423339844,
10217
+ "loss": 0.4879,
10218
+ "rewards/accuracies": 0.71875,
10219
+ "rewards/chosen": -0.36101239919662476,
10220
+ "rewards/margins": 0.6830317974090576,
10221
+ "rewards/rejected": -1.0440441370010376,
10222
+ "step": 1334
10223
+ },
10224
+ {
10225
+ "epoch": 1.5234837146318867,
10226
+ "grad_norm": 55.43183849143864,
10227
+ "learning_rate": 2.9693879897536432e-08,
10228
+ "logits/chosen": -1.1980834007263184,
10229
+ "logits/rejected": -1.229064702987671,
10230
+ "logps/chosen": -199.63746643066406,
10231
+ "logps/rejected": -221.99234008789062,
10232
+ "loss": 0.4335,
10233
+ "rewards/accuracies": 0.84375,
10234
+ "rewards/chosen": -0.5939749479293823,
10235
+ "rewards/margins": 1.0861446857452393,
10236
+ "rewards/rejected": -1.6801198720932007,
10237
+ "step": 1336
10238
+ },
10239
+ {
10240
+ "epoch": 1.5257643788753474,
10241
+ "grad_norm": 46.58400293343511,
10242
+ "learning_rate": 2.9423919482830118e-08,
10243
+ "logits/chosen": -1.1741948127746582,
10244
+ "logits/rejected": -1.2695064544677734,
10245
+ "logps/chosen": -147.90426635742188,
10246
+ "logps/rejected": -210.15591430664062,
10247
+ "loss": 0.4373,
10248
+ "rewards/accuracies": 0.90625,
10249
+ "rewards/chosen": -0.16375023126602173,
10250
+ "rewards/margins": 1.4425245523452759,
10251
+ "rewards/rejected": -1.6062746047973633,
10252
+ "step": 1338
10253
+ },
10254
+ {
10255
+ "epoch": 1.5280450431188084,
10256
+ "grad_norm": 60.486372599547316,
10257
+ "learning_rate": 2.9154979999763197e-08,
10258
+ "logits/chosen": -1.2853294610977173,
10259
+ "logits/rejected": -1.3705867528915405,
10260
+ "logps/chosen": -159.66326904296875,
10261
+ "logps/rejected": -226.9650421142578,
10262
+ "loss": 0.4299,
10263
+ "rewards/accuracies": 0.90625,
10264
+ "rewards/chosen": -0.5052796006202698,
10265
+ "rewards/margins": 1.3695988655090332,
10266
+ "rewards/rejected": -1.8748785257339478,
10267
+ "step": 1340
10268
+ },
10269
+ {
10270
+ "epoch": 1.5303257073622691,
10271
+ "grad_norm": 66.91951190930519,
10272
+ "learning_rate": 2.8887065338730633e-08,
10273
+ "logits/chosen": -1.1936756372451782,
10274
+ "logits/rejected": -1.2785755395889282,
10275
+ "logps/chosen": -156.64866638183594,
10276
+ "logps/rejected": -195.68453979492188,
10277
+ "loss": 0.4813,
10278
+ "rewards/accuracies": 0.8125,
10279
+ "rewards/chosen": -0.30220431089401245,
10280
+ "rewards/margins": 0.8970733880996704,
10281
+ "rewards/rejected": -1.1992775201797485,
10282
+ "step": 1342
10283
+ },
10284
+ {
10285
+ "epoch": 1.53260637160573,
10286
+ "grad_norm": 60.327377050918805,
10287
+ "learning_rate": 2.86201793753026e-08,
10288
+ "logits/chosen": -1.2364442348480225,
10289
+ "logits/rejected": -1.2909033298492432,
10290
+ "logps/chosen": -191.64169311523438,
10291
+ "logps/rejected": -228.86416625976562,
10292
+ "loss": 0.4307,
10293
+ "rewards/accuracies": 0.8125,
10294
+ "rewards/chosen": -0.5236424207687378,
10295
+ "rewards/margins": 0.9988542795181274,
10296
+ "rewards/rejected": -1.5224968194961548,
10297
+ "step": 1344
10298
+ },
10299
+ {
10300
+ "epoch": 1.534887035849191,
10301
+ "grad_norm": 71.82683810198446,
10302
+ "learning_rate": 2.835432597016848e-08,
10303
+ "logits/chosen": -1.19918692111969,
10304
+ "logits/rejected": -1.2287514209747314,
10305
+ "logps/chosen": -203.8031768798828,
10306
+ "logps/rejected": -217.00051879882812,
10307
+ "loss": 0.4555,
10308
+ "rewards/accuracies": 0.9375,
10309
+ "rewards/chosen": -0.5840819478034973,
10310
+ "rewards/margins": 1.0689098834991455,
10311
+ "rewards/rejected": -1.6529918909072876,
10312
+ "step": 1346
10313
+ },
10314
+ {
10315
+ "epoch": 1.537167700092652,
10316
+ "grad_norm": 51.67351577452576,
10317
+ "learning_rate": 2.8089508969081e-08,
10318
+ "logits/chosen": -1.1072896718978882,
10319
+ "logits/rejected": -1.1964483261108398,
10320
+ "logps/chosen": -167.41482543945312,
10321
+ "logps/rejected": -227.43304443359375,
10322
+ "loss": 0.3874,
10323
+ "rewards/accuracies": 0.84375,
10324
+ "rewards/chosen": -0.4981079697608948,
10325
+ "rewards/margins": 1.5203238725662231,
10326
+ "rewards/rejected": -2.0184319019317627,
10327
+ "step": 1348
10328
+ },
10329
+ {
10330
+ "epoch": 1.539448364336113,
10331
+ "grad_norm": 54.16559868864558,
10332
+ "learning_rate": 2.7825732202800544e-08,
10333
+ "logits/chosen": -1.1472342014312744,
10334
+ "logits/rejected": -1.2177824974060059,
10335
+ "logps/chosen": -162.28453063964844,
10336
+ "logps/rejected": -183.14820861816406,
10337
+ "loss": 0.4325,
10338
+ "rewards/accuracies": 0.75,
10339
+ "rewards/chosen": -0.6399708986282349,
10340
+ "rewards/margins": 0.8657874464988708,
10341
+ "rewards/rejected": -1.5057581663131714,
10342
+ "step": 1350
10343
+ },
10344
+ {
10345
+ "epoch": 1.541729028579574,
10346
+ "grad_norm": 59.09514477278818,
10347
+ "learning_rate": 2.756299948703982e-08,
10348
+ "logits/chosen": -1.1995211839675903,
10349
+ "logits/rejected": -1.2396866083145142,
10350
+ "logps/chosen": -132.65550231933594,
10351
+ "logps/rejected": -141.451416015625,
10352
+ "loss": 0.4078,
10353
+ "rewards/accuracies": 0.75,
10354
+ "rewards/chosen": -0.38874107599258423,
10355
+ "rewards/margins": 0.7118159532546997,
10356
+ "rewards/rejected": -1.1005568504333496,
10357
+ "step": 1352
10358
+ },
10359
+ {
10360
+ "epoch": 1.5440096928230347,
10361
+ "grad_norm": 61.53811528707539,
10362
+ "learning_rate": 2.7301314622408612e-08,
10363
+ "logits/chosen": -1.2403637170791626,
10364
+ "logits/rejected": -1.3120653629302979,
10365
+ "logps/chosen": -156.661865234375,
10366
+ "logps/rejected": -218.12490844726562,
10367
+ "loss": 0.4467,
10368
+ "rewards/accuracies": 0.8125,
10369
+ "rewards/chosen": -0.468991219997406,
10370
+ "rewards/margins": 1.6298167705535889,
10371
+ "rewards/rejected": -2.0988078117370605,
10372
+ "step": 1354
10373
+ },
10374
+ {
10375
+ "epoch": 1.5462903570664956,
10376
+ "grad_norm": 68.28115300743727,
10377
+ "learning_rate": 2.704068139435881e-08,
10378
+ "logits/chosen": -1.2020457983016968,
10379
+ "logits/rejected": -1.2723631858825684,
10380
+ "logps/chosen": -167.51483154296875,
10381
+ "logps/rejected": -186.7481231689453,
10382
+ "loss": 0.4608,
10383
+ "rewards/accuracies": 0.71875,
10384
+ "rewards/chosen": -0.5097277760505676,
10385
+ "rewards/margins": 0.8848594427108765,
10386
+ "rewards/rejected": -1.3945870399475098,
10387
+ "step": 1356
10388
+ },
10389
+ {
10390
+ "epoch": 1.5485710213099564,
10391
+ "grad_norm": 65.06182702724148,
10392
+ "learning_rate": 2.6781103573129703e-08,
10393
+ "logits/chosen": -1.270340085029602,
10394
+ "logits/rejected": -1.2233667373657227,
10395
+ "logps/chosen": -141.70269775390625,
10396
+ "logps/rejected": -159.1035919189453,
10397
+ "loss": 0.4297,
10398
+ "rewards/accuracies": 0.875,
10399
+ "rewards/chosen": -0.3412969708442688,
10400
+ "rewards/margins": 0.8257958292961121,
10401
+ "rewards/rejected": -1.1670928001403809,
10402
+ "step": 1358
10403
+ },
10404
+ {
10405
+ "epoch": 1.5508516855534173,
10406
+ "grad_norm": 54.785688494455215,
10407
+ "learning_rate": 2.652258491369329e-08,
10408
+ "logits/chosen": -1.2447706460952759,
10409
+ "logits/rejected": -1.2511292695999146,
10410
+ "logps/chosen": -171.25672912597656,
10411
+ "logps/rejected": -208.33084106445312,
10412
+ "loss": 0.4368,
10413
+ "rewards/accuracies": 0.875,
10414
+ "rewards/chosen": -0.3487054705619812,
10415
+ "rewards/margins": 1.1124207973480225,
10416
+ "rewards/rejected": -1.4611263275146484,
10417
+ "step": 1360
10418
+ },
10419
+ {
10420
+ "epoch": 1.5531323497968783,
10421
+ "grad_norm": 61.41628077442576,
10422
+ "learning_rate": 2.626512915570015e-08,
10423
+ "logits/chosen": -1.328946590423584,
10424
+ "logits/rejected": -1.3554866313934326,
10425
+ "logps/chosen": -125.71770477294922,
10426
+ "logps/rejected": -141.460693359375,
10427
+ "loss": 0.4368,
10428
+ "rewards/accuracies": 0.8125,
10429
+ "rewards/chosen": -0.13910508155822754,
10430
+ "rewards/margins": 0.7971990704536438,
10431
+ "rewards/rejected": -0.9363042116165161,
10432
+ "step": 1362
10433
+ },
10434
+ {
10435
+ "epoch": 1.5554130140403393,
10436
+ "grad_norm": 75.66249491591283,
10437
+ "learning_rate": 2.6008740023425247e-08,
10438
+ "logits/chosen": -1.188770055770874,
10439
+ "logits/rejected": -1.2130908966064453,
10440
+ "logps/chosen": -183.416748046875,
10441
+ "logps/rejected": -207.94090270996094,
10442
+ "loss": 0.4306,
10443
+ "rewards/accuracies": 0.71875,
10444
+ "rewards/chosen": -0.651962399482727,
10445
+ "rewards/margins": 1.215145230293274,
10446
+ "rewards/rejected": -1.867107629776001,
10447
+ "step": 1364
10448
+ },
10449
+ {
10450
+ "epoch": 1.5576936782838002,
10451
+ "grad_norm": 55.08493508678333,
10452
+ "learning_rate": 2.5753421225714055e-08,
10453
+ "logits/chosen": -1.2770978212356567,
10454
+ "logits/rejected": -1.3901137113571167,
10455
+ "logps/chosen": -182.26524353027344,
10456
+ "logps/rejected": -213.17454528808594,
10457
+ "loss": 0.4494,
10458
+ "rewards/accuracies": 0.78125,
10459
+ "rewards/chosen": -0.602812647819519,
10460
+ "rewards/margins": 1.0171971321105957,
10461
+ "rewards/rejected": -1.6200097799301147,
10462
+ "step": 1366
10463
+ },
10464
+ {
10465
+ "epoch": 1.5599743425272612,
10466
+ "grad_norm": 61.38622790534087,
10467
+ "learning_rate": 2.549917645592893e-08,
10468
+ "logits/chosen": -1.0256890058517456,
10469
+ "logits/rejected": -1.0421488285064697,
10470
+ "logps/chosen": -125.034423828125,
10471
+ "logps/rejected": -140.7981414794922,
10472
+ "loss": 0.4129,
10473
+ "rewards/accuracies": 0.78125,
10474
+ "rewards/chosen": -0.5211226344108582,
10475
+ "rewards/margins": 0.7981542348861694,
10476
+ "rewards/rejected": -1.3192768096923828,
10477
+ "step": 1368
10478
+ },
10479
+ {
10480
+ "epoch": 1.562255006770722,
10481
+ "grad_norm": 56.63177895037392,
10482
+ "learning_rate": 2.524600939189566e-08,
10483
+ "logits/chosen": -1.155221939086914,
10484
+ "logits/rejected": -1.1944361925125122,
10485
+ "logps/chosen": -161.79409790039062,
10486
+ "logps/rejected": -194.83041381835938,
10487
+ "loss": 0.4127,
10488
+ "rewards/accuracies": 0.84375,
10489
+ "rewards/chosen": -0.42054063081741333,
10490
+ "rewards/margins": 1.119522213935852,
10491
+ "rewards/rejected": -1.5400630235671997,
10492
+ "step": 1370
10493
+ },
10494
+ {
10495
+ "epoch": 1.564535671014183,
10496
+ "grad_norm": 83.86033988088741,
10497
+ "learning_rate": 2.4993923695850305e-08,
10498
+ "logits/chosen": -1.293369174003601,
10499
+ "logits/rejected": -1.3639140129089355,
10500
+ "logps/chosen": -198.75289916992188,
10501
+ "logps/rejected": -229.84117126464844,
10502
+ "loss": 0.4988,
10503
+ "rewards/accuracies": 0.8125,
10504
+ "rewards/chosen": -0.41869914531707764,
10505
+ "rewards/margins": 0.7414001226425171,
10506
+ "rewards/rejected": -1.1600991487503052,
10507
+ "step": 1372
10508
+ },
10509
+ {
10510
+ "epoch": 1.5668163352576436,
10511
+ "grad_norm": 62.24749551918009,
10512
+ "learning_rate": 2.4742923014386154e-08,
10513
+ "logits/chosen": -1.3067548274993896,
10514
+ "logits/rejected": -1.3476440906524658,
10515
+ "logps/chosen": -224.86627197265625,
10516
+ "logps/rejected": -235.1451416015625,
10517
+ "loss": 0.4154,
10518
+ "rewards/accuracies": 0.875,
10519
+ "rewards/chosen": -0.6012443900108337,
10520
+ "rewards/margins": 0.9985144138336182,
10521
+ "rewards/rejected": -1.5997586250305176,
10522
+ "step": 1374
10523
+ },
10524
+ {
10525
+ "epoch": 1.5690969995011046,
10526
+ "grad_norm": 64.51543232805054,
10527
+ "learning_rate": 2.4493010978401063e-08,
10528
+ "logits/chosen": -1.0690737962722778,
10529
+ "logits/rejected": -1.084768295288086,
10530
+ "logps/chosen": -163.8816375732422,
10531
+ "logps/rejected": -171.3943634033203,
10532
+ "loss": 0.4249,
10533
+ "rewards/accuracies": 0.875,
10534
+ "rewards/chosen": -0.611855685710907,
10535
+ "rewards/margins": 0.8946461081504822,
10536
+ "rewards/rejected": -1.5065017938613892,
10537
+ "step": 1376
10538
+ },
10539
+ {
10540
+ "epoch": 1.5713776637445656,
10541
+ "grad_norm": 54.357068938946284,
10542
+ "learning_rate": 2.4244191203044805e-08,
10543
+ "logits/chosen": -1.4049066305160522,
10544
+ "logits/rejected": -1.4406367540359497,
10545
+ "logps/chosen": -193.67234802246094,
10546
+ "logps/rejected": -223.92030334472656,
10547
+ "loss": 0.3912,
10548
+ "rewards/accuracies": 0.8125,
10549
+ "rewards/chosen": -0.3962157368659973,
10550
+ "rewards/margins": 1.082966685295105,
10551
+ "rewards/rejected": -1.479182481765747,
10552
+ "step": 1378
10553
+ },
10554
+ {
10555
+ "epoch": 1.5736583279880265,
10556
+ "grad_norm": 56.319793805013724,
10557
+ "learning_rate": 2.399646728766691e-08,
10558
+ "logits/chosen": -1.3638195991516113,
10559
+ "logits/rejected": -1.3412432670593262,
10560
+ "logps/chosen": -164.48809814453125,
10561
+ "logps/rejected": -196.62823486328125,
10562
+ "loss": 0.4493,
10563
+ "rewards/accuracies": 0.78125,
10564
+ "rewards/chosen": -0.2889966666698456,
10565
+ "rewards/margins": 1.261388897895813,
10566
+ "rewards/rejected": -1.5503859519958496,
10567
+ "step": 1380
10568
+ },
10569
+ {
10570
+ "epoch": 1.5759389922314875,
10571
+ "grad_norm": 60.4303807612328,
10572
+ "learning_rate": 2.3749842815764498e-08,
10573
+ "logits/chosen": -1.1395071744918823,
10574
+ "logits/rejected": -1.1399792432785034,
10575
+ "logps/chosen": -139.91824340820312,
10576
+ "logps/rejected": -184.93896484375,
10577
+ "loss": 0.4426,
10578
+ "rewards/accuracies": 0.75,
10579
+ "rewards/chosen": -0.37107953429222107,
10580
+ "rewards/margins": 0.8352210521697998,
10581
+ "rewards/rejected": -1.2063006162643433,
10582
+ "step": 1382
10583
+ },
10584
+ {
10585
+ "epoch": 1.5782196564749484,
10586
+ "grad_norm": 55.142567366980046,
10587
+ "learning_rate": 2.3504321354930568e-08,
10588
+ "logits/chosen": -1.1650046110153198,
10589
+ "logits/rejected": -1.2775689363479614,
10590
+ "logps/chosen": -144.93995666503906,
10591
+ "logps/rejected": -177.63087463378906,
10592
+ "loss": 0.4272,
10593
+ "rewards/accuracies": 0.78125,
10594
+ "rewards/chosen": -0.5145533084869385,
10595
+ "rewards/margins": 0.9500089287757874,
10596
+ "rewards/rejected": -1.4645624160766602,
10597
+ "step": 1384
10598
+ },
10599
+ {
10600
+ "epoch": 1.5805003207184094,
10601
+ "grad_norm": 63.43007453665504,
10602
+ "learning_rate": 2.3259906456802213e-08,
10603
+ "logits/chosen": -1.216495394706726,
10604
+ "logits/rejected": -1.2768280506134033,
10605
+ "logps/chosen": -169.9758758544922,
10606
+ "logps/rejected": -194.25665283203125,
10607
+ "loss": 0.4427,
10608
+ "rewards/accuracies": 0.78125,
10609
+ "rewards/chosen": -0.43023961782455444,
10610
+ "rewards/margins": 0.8201103210449219,
10611
+ "rewards/rejected": -1.250349998474121,
10612
+ "step": 1386
10613
+ },
10614
+ {
10615
+ "epoch": 1.5827809849618701,
10616
+ "grad_norm": 58.56739556116542,
10617
+ "learning_rate": 2.301660165700936e-08,
10618
+ "logits/chosen": -1.291746735572815,
10619
+ "logits/rejected": -1.3648316860198975,
10620
+ "logps/chosen": -166.92015075683594,
10621
+ "logps/rejected": -194.9810028076172,
10622
+ "loss": 0.4506,
10623
+ "rewards/accuracies": 0.8125,
10624
+ "rewards/chosen": -0.5265369415283203,
10625
+ "rewards/margins": 0.8070346713066101,
10626
+ "rewards/rejected": -1.3335715532302856,
10627
+ "step": 1388
10628
+ },
10629
+ {
10630
+ "epoch": 1.585061649205331,
10631
+ "grad_norm": 54.69042761154998,
10632
+ "learning_rate": 2.2774410475123608e-08,
10633
+ "logits/chosen": -1.3127899169921875,
10634
+ "logits/rejected": -1.2719758749008179,
10635
+ "logps/chosen": -178.777587890625,
10636
+ "logps/rejected": -247.97511291503906,
10637
+ "loss": 0.3964,
10638
+ "rewards/accuracies": 0.875,
10639
+ "rewards/chosen": -0.19407829642295837,
10640
+ "rewards/margins": 1.4336225986480713,
10641
+ "rewards/rejected": -1.6277010440826416,
10642
+ "step": 1390
10643
+ },
10644
+ {
10645
+ "epoch": 1.5873423134487918,
10646
+ "grad_norm": 62.874447838260565,
10647
+ "learning_rate": 2.2533336414607317e-08,
10648
+ "logits/chosen": -1.2624969482421875,
10649
+ "logits/rejected": -1.319579839706421,
10650
+ "logps/chosen": -169.37161254882812,
10651
+ "logps/rejected": -186.8485870361328,
10652
+ "loss": 0.4139,
10653
+ "rewards/accuracies": 0.90625,
10654
+ "rewards/chosen": -0.39721712470054626,
10655
+ "rewards/margins": 0.8743160367012024,
10656
+ "rewards/rejected": -1.2715333700180054,
10657
+ "step": 1392
10658
+ },
10659
+ {
10660
+ "epoch": 1.5896229776922528,
10661
+ "grad_norm": 56.69647177405222,
10662
+ "learning_rate": 2.2293382962762853e-08,
10663
+ "logits/chosen": -1.1312240362167358,
10664
+ "logits/rejected": -1.1108318567276,
10665
+ "logps/chosen": -149.8402557373047,
10666
+ "logps/rejected": -147.10037231445312,
10667
+ "loss": 0.4255,
10668
+ "rewards/accuracies": 0.71875,
10669
+ "rewards/chosen": -0.5710242986679077,
10670
+ "rewards/margins": 0.5461159348487854,
10671
+ "rewards/rejected": -1.117140293121338,
10672
+ "step": 1394
10673
+ },
10674
+ {
10675
+ "epoch": 1.5919036419357138,
10676
+ "grad_norm": 71.94947354276687,
10677
+ "learning_rate": 2.2054553590682268e-08,
10678
+ "logits/chosen": -1.1476179361343384,
10679
+ "logits/rejected": -1.2181971073150635,
10680
+ "logps/chosen": -152.3661346435547,
10681
+ "logps/rejected": -192.77369689941406,
10682
+ "loss": 0.4157,
10683
+ "rewards/accuracies": 0.875,
10684
+ "rewards/chosen": -0.5518044233322144,
10685
+ "rewards/margins": 1.2432973384857178,
10686
+ "rewards/rejected": -1.7951017618179321,
10687
+ "step": 1396
10688
+ },
10689
+ {
10690
+ "epoch": 1.5941843061791747,
10691
+ "grad_norm": 65.43519539314455,
10692
+ "learning_rate": 2.1816851753197018e-08,
10693
+ "logits/chosen": -1.2672624588012695,
10694
+ "logits/rejected": -1.3876826763153076,
10695
+ "logps/chosen": -198.90475463867188,
10696
+ "logps/rejected": -248.56439208984375,
10697
+ "loss": 0.479,
10698
+ "rewards/accuracies": 0.875,
10699
+ "rewards/chosen": -0.7569680213928223,
10700
+ "rewards/margins": 1.3200610876083374,
10701
+ "rewards/rejected": -2.077029228210449,
10702
+ "step": 1398
10703
+ },
10704
+ {
10705
+ "epoch": 1.5964649704226357,
10706
+ "grad_norm": 55.75209984482193,
10707
+ "learning_rate": 2.1580280888827997e-08,
10708
+ "logits/chosen": -1.3570483922958374,
10709
+ "logits/rejected": -1.39362633228302,
10710
+ "logps/chosen": -168.9151153564453,
10711
+ "logps/rejected": -199.656982421875,
10712
+ "loss": 0.4361,
10713
+ "rewards/accuracies": 0.8125,
10714
+ "rewards/chosen": -0.38282498717308044,
10715
+ "rewards/margins": 1.1389210224151611,
10716
+ "rewards/rejected": -1.521746039390564,
10717
+ "step": 1400
10718
+ },
10719
+ {
10720
+ "epoch": 1.5964649704226357,
10721
+ "eval_logits/chosen": -1.349047064781189,
10722
+ "eval_logits/rejected": -1.3316236734390259,
10723
+ "eval_logps/chosen": -131.90457153320312,
10724
+ "eval_logps/rejected": -138.97027587890625,
10725
+ "eval_loss": 0.5412697792053223,
10726
+ "eval_rewards/accuracies": 0.7200000286102295,
10727
+ "eval_rewards/chosen": -0.2006600797176361,
10728
+ "eval_rewards/margins": 0.49009186029434204,
10729
+ "eval_rewards/rejected": -0.6907519698143005,
10730
+ "eval_runtime": 20.6376,
10731
+ "eval_samples_per_second": 4.846,
10732
+ "eval_steps_per_second": 1.211,
10733
+ "step": 1400
10734
+ },
10735
+ {
10736
+ "epoch": 1.5987456346660966,
10737
+ "grad_norm": 58.426952642835886,
10738
+ "learning_rate": 2.1344844419735752e-08,
10739
+ "logits/chosen": -1.3734134435653687,
10740
+ "logits/rejected": -1.4403023719787598,
10741
+ "logps/chosen": -138.4149627685547,
10742
+ "logps/rejected": -162.69033813476562,
10743
+ "loss": 0.4393,
10744
+ "rewards/accuracies": 0.875,
10745
+ "rewards/chosen": -0.2428184598684311,
10746
+ "rewards/margins": 0.8137100338935852,
10747
+ "rewards/rejected": -1.0565285682678223,
10748
+ "step": 1402
10749
+ },
10750
+ {
10751
+ "epoch": 1.6010262989095574,
10752
+ "grad_norm": 57.12690162473469,
10753
+ "learning_rate": 2.1110545751671073e-08,
10754
+ "logits/chosen": -1.2013407945632935,
10755
+ "logits/rejected": -1.3104689121246338,
10756
+ "logps/chosen": -137.76437377929688,
10757
+ "logps/rejected": -174.31373596191406,
10758
+ "loss": 0.4367,
10759
+ "rewards/accuracies": 0.875,
10760
+ "rewards/chosen": -0.14584028720855713,
10761
+ "rewards/margins": 0.999373197555542,
10762
+ "rewards/rejected": -1.1452134847640991,
10763
+ "step": 1404
10764
+ },
10765
+ {
10766
+ "epoch": 1.6033069631530183,
10767
+ "grad_norm": 60.35122974906749,
10768
+ "learning_rate": 2.0877388273925644e-08,
10769
+ "logits/chosen": -1.227086067199707,
10770
+ "logits/rejected": -1.1707607507705688,
10771
+ "logps/chosen": -209.45407104492188,
10772
+ "logps/rejected": -257.14776611328125,
10773
+ "loss": 0.3969,
10774
+ "rewards/accuracies": 0.8125,
10775
+ "rewards/chosen": -0.581037163734436,
10776
+ "rewards/margins": 1.283540964126587,
10777
+ "rewards/rejected": -1.864578127861023,
10778
+ "step": 1406
10779
+ },
10780
+ {
10781
+ "epoch": 1.605587627396479,
10782
+ "grad_norm": 52.46325406296021,
10783
+ "learning_rate": 2.0645375359283045e-08,
10784
+ "logits/chosen": -1.3980076313018799,
10785
+ "logits/rejected": -1.4242044687271118,
10786
+ "logps/chosen": -116.77565002441406,
10787
+ "logps/rejected": -137.52145385742188,
10788
+ "loss": 0.4733,
10789
+ "rewards/accuracies": 0.8125,
10790
+ "rewards/chosen": -0.23290672898292542,
10791
+ "rewards/margins": 0.7732049226760864,
10792
+ "rewards/rejected": -1.0061116218566895,
10793
+ "step": 1408
10794
+ },
10795
+ {
10796
+ "epoch": 1.60786829163994,
10797
+ "grad_norm": 55.90140225780788,
10798
+ "learning_rate": 2.0414510363970018e-08,
10799
+ "logits/chosen": -1.1187829971313477,
10800
+ "logits/rejected": -1.1768170595169067,
10801
+ "logps/chosen": -150.73451232910156,
10802
+ "logps/rejected": -189.43748474121094,
10803
+ "loss": 0.3741,
10804
+ "rewards/accuracies": 0.84375,
10805
+ "rewards/chosen": -0.3249818682670593,
10806
+ "rewards/margins": 0.9408416748046875,
10807
+ "rewards/rejected": -1.2658236026763916,
10808
+ "step": 1410
10809
+ },
10810
+ {
10811
+ "epoch": 1.610148955883401,
10812
+ "grad_norm": 58.50925426124599,
10813
+ "learning_rate": 2.0184796627607725e-08,
10814
+ "logits/chosen": -1.1422569751739502,
10815
+ "logits/rejected": -1.1473525762557983,
10816
+ "logps/chosen": -188.11888122558594,
10817
+ "logps/rejected": -191.87713623046875,
10818
+ "loss": 0.4537,
10819
+ "rewards/accuracies": 0.78125,
10820
+ "rewards/chosen": -0.35315239429473877,
10821
+ "rewards/margins": 0.663144588470459,
10822
+ "rewards/rejected": -1.0162967443466187,
10823
+ "step": 1412
10824
+ },
10825
+ {
10826
+ "epoch": 1.612429620126862,
10827
+ "grad_norm": 64.2495432668556,
10828
+ "learning_rate": 1.9956237473163718e-08,
10829
+ "logits/chosen": -1.2085424661636353,
10830
+ "logits/rejected": -1.306652545928955,
10831
+ "logps/chosen": -97.00778198242188,
10832
+ "logps/rejected": -127.79579162597656,
10833
+ "loss": 0.4977,
10834
+ "rewards/accuracies": 0.75,
10835
+ "rewards/chosen": -0.26015713810920715,
10836
+ "rewards/margins": 0.6060620546340942,
10837
+ "rewards/rejected": -0.866219162940979,
10838
+ "step": 1414
10839
+ },
10840
+ {
10841
+ "epoch": 1.614710284370323,
10842
+ "grad_norm": 50.97231528427863,
10843
+ "learning_rate": 1.9728836206903654e-08,
10844
+ "logits/chosen": -1.4538676738739014,
10845
+ "logits/rejected": -1.437317967414856,
10846
+ "logps/chosen": -138.0614776611328,
10847
+ "logps/rejected": -137.07269287109375,
10848
+ "loss": 0.4251,
10849
+ "rewards/accuracies": 0.8125,
10850
+ "rewards/chosen": -0.09943617880344391,
10851
+ "rewards/margins": 0.7697718143463135,
10852
+ "rewards/rejected": -0.8692080974578857,
10853
+ "step": 1416
10854
+ },
10855
+ {
10856
+ "epoch": 1.616990948613784,
10857
+ "grad_norm": 51.24642546995866,
10858
+ "learning_rate": 1.9502596118343552e-08,
10859
+ "logits/chosen": -1.0932207107543945,
10860
+ "logits/rejected": -1.1693406105041504,
10861
+ "logps/chosen": -126.78893280029297,
10862
+ "logps/rejected": -140.98338317871094,
10863
+ "loss": 0.4273,
10864
+ "rewards/accuracies": 0.6875,
10865
+ "rewards/chosen": -0.1916692703962326,
10866
+ "rewards/margins": 0.7681133151054382,
10867
+ "rewards/rejected": -0.959782600402832,
10868
+ "step": 1418
10869
+ },
10870
+ {
10871
+ "epoch": 1.6192716128572446,
10872
+ "grad_norm": 61.40277169360943,
10873
+ "learning_rate": 1.9277520480202203e-08,
10874
+ "logits/chosen": -1.3312891721725464,
10875
+ "logits/rejected": -1.2821072340011597,
10876
+ "logps/chosen": -151.71487426757812,
10877
+ "logps/rejected": -167.5812225341797,
10878
+ "loss": 0.4401,
10879
+ "rewards/accuracies": 0.9375,
10880
+ "rewards/chosen": -0.4683433473110199,
10881
+ "rewards/margins": 1.1215391159057617,
10882
+ "rewards/rejected": -1.589882493019104,
10883
+ "step": 1420
10884
+ },
10885
+ {
10886
+ "epoch": 1.6215522771007056,
10887
+ "grad_norm": 54.3969390887107,
10888
+ "learning_rate": 1.9053612548353803e-08,
10889
+ "logits/chosen": -1.3286281824111938,
10890
+ "logits/rejected": -1.467494249343872,
10891
+ "logps/chosen": -185.03489685058594,
10892
+ "logps/rejected": -245.1704559326172,
10893
+ "loss": 0.3985,
10894
+ "rewards/accuracies": 0.84375,
10895
+ "rewards/chosen": -0.6573659181594849,
10896
+ "rewards/margins": 1.2262755632400513,
10897
+ "rewards/rejected": -1.8836416006088257,
10898
+ "step": 1422
10899
+ },
10900
+ {
10901
+ "epoch": 1.6238329413441663,
10902
+ "grad_norm": 54.259477393424476,
10903
+ "learning_rate": 1.8830875561780902e-08,
10904
+ "logits/chosen": -1.2293699979782104,
10905
+ "logits/rejected": -1.3463877439498901,
10906
+ "logps/chosen": -140.27874755859375,
10907
+ "logps/rejected": -198.82611083984375,
10908
+ "loss": 0.3951,
10909
+ "rewards/accuracies": 0.8125,
10910
+ "rewards/chosen": -0.30968916416168213,
10911
+ "rewards/margins": 0.9538030624389648,
10912
+ "rewards/rejected": -1.2634921073913574,
10913
+ "step": 1424
10914
+ },
10915
+ {
10916
+ "epoch": 1.6261136055876273,
10917
+ "grad_norm": 61.69027813672776,
10918
+ "learning_rate": 1.8609312742527493e-08,
10919
+ "logits/chosen": -1.2697855234146118,
10920
+ "logits/rejected": -1.2990537881851196,
10921
+ "logps/chosen": -155.84657287597656,
10922
+ "logps/rejected": -212.16445922851562,
10923
+ "loss": 0.4447,
10924
+ "rewards/accuracies": 0.78125,
10925
+ "rewards/chosen": -0.38760051131248474,
10926
+ "rewards/margins": 0.9349652528762817,
10927
+ "rewards/rejected": -1.3225656747817993,
10928
+ "step": 1426
10929
+ },
10930
+ {
10931
+ "epoch": 1.6283942698310883,
10932
+ "grad_norm": 54.55311985521747,
10933
+ "learning_rate": 1.8388927295652446e-08,
10934
+ "logits/chosen": -1.285940170288086,
10935
+ "logits/rejected": -1.2539726495742798,
10936
+ "logps/chosen": -165.970947265625,
10937
+ "logps/rejected": -213.34823608398438,
10938
+ "loss": 0.399,
10939
+ "rewards/accuracies": 0.90625,
10940
+ "rewards/chosen": -0.4254089295864105,
10941
+ "rewards/margins": 1.3802815675735474,
10942
+ "rewards/rejected": -1.8056904077529907,
10943
+ "step": 1428
10944
+ },
10945
+ {
10946
+ "epoch": 1.6306749340745492,
10947
+ "grad_norm": 76.43067475489244,
10948
+ "learning_rate": 1.81697224091831e-08,
10949
+ "logits/chosen": -1.3153434991836548,
10950
+ "logits/rejected": -1.3224899768829346,
10951
+ "logps/chosen": -164.8282470703125,
10952
+ "logps/rejected": -173.127685546875,
10953
+ "loss": 0.4394,
10954
+ "rewards/accuracies": 0.875,
10955
+ "rewards/chosen": -0.13270916044712067,
10956
+ "rewards/margins": 0.787804901599884,
10957
+ "rewards/rejected": -0.9205139875411987,
10958
+ "step": 1430
10959
+ },
10960
+ {
10961
+ "epoch": 1.6329555983180102,
10962
+ "grad_norm": 63.28082306297566,
10963
+ "learning_rate": 1.7951701254069208e-08,
10964
+ "logits/chosen": -1.2579729557037354,
10965
+ "logits/rejected": -1.3529393672943115,
10966
+ "logps/chosen": -172.70582580566406,
10967
+ "logps/rejected": -199.5102081298828,
10968
+ "loss": 0.4179,
10969
+ "rewards/accuracies": 0.875,
10970
+ "rewards/chosen": -0.26907747983932495,
10971
+ "rewards/margins": 0.9893758296966553,
10972
+ "rewards/rejected": -1.258453369140625,
10973
+ "step": 1432
10974
+ },
10975
+ {
10976
+ "epoch": 1.6352362625614711,
10977
+ "grad_norm": 66.45448871459682,
10978
+ "learning_rate": 1.773486698413701e-08,
10979
+ "logits/chosen": -1.321483850479126,
10980
+ "logits/rejected": -1.2988489866256714,
10981
+ "logps/chosen": -278.5541076660156,
10982
+ "logps/rejected": -317.18048095703125,
10983
+ "loss": 0.4615,
10984
+ "rewards/accuracies": 0.6875,
10985
+ "rewards/chosen": -0.9866227507591248,
10986
+ "rewards/margins": 1.5252459049224854,
10987
+ "rewards/rejected": -2.511868715286255,
10988
+ "step": 1434
10989
+ },
10990
+ {
10991
+ "epoch": 1.6375169268049319,
10992
+ "grad_norm": 61.23114152941888,
10993
+ "learning_rate": 1.751922273604366e-08,
10994
+ "logits/chosen": -1.2703336477279663,
10995
+ "logits/rejected": -1.3587698936462402,
10996
+ "logps/chosen": -119.15101623535156,
10997
+ "logps/rejected": -158.48219299316406,
10998
+ "loss": 0.4167,
10999
+ "rewards/accuracies": 0.875,
11000
+ "rewards/chosen": -0.3894941508769989,
11001
+ "rewards/margins": 0.8275178074836731,
11002
+ "rewards/rejected": -1.2170119285583496,
11003
+ "step": 1436
11004
+ },
11005
+ {
11006
+ "epoch": 1.6397975910483928,
11007
+ "grad_norm": 55.95939622635403,
11008
+ "learning_rate": 1.7304771629231797e-08,
11009
+ "logits/chosen": -1.3791868686676025,
11010
+ "logits/rejected": -1.32236909866333,
11011
+ "logps/chosen": -139.13572692871094,
11012
+ "logps/rejected": -141.41555786132812,
11013
+ "loss": 0.4648,
11014
+ "rewards/accuracies": 0.84375,
11015
+ "rewards/chosen": -0.30967453122138977,
11016
+ "rewards/margins": 0.7233924269676208,
11017
+ "rewards/rejected": -1.033066987991333,
11018
+ "step": 1438
11019
+ },
11020
+ {
11021
+ "epoch": 1.6420782552918538,
11022
+ "grad_norm": 65.12174124488925,
11023
+ "learning_rate": 1.709151676588446e-08,
11024
+ "logits/chosen": -1.3115848302841187,
11025
+ "logits/rejected": -1.3686813116073608,
11026
+ "logps/chosen": -187.90061950683594,
11027
+ "logps/rejected": -219.3175506591797,
11028
+ "loss": 0.4063,
11029
+ "rewards/accuracies": 0.8125,
11030
+ "rewards/chosen": -0.2774468958377838,
11031
+ "rewards/margins": 1.2934999465942383,
11032
+ "rewards/rejected": -1.5709468126296997,
11033
+ "step": 1440
11034
+ },
11035
+ {
11036
+ "epoch": 1.6443589195353145,
11037
+ "grad_norm": 63.019089490249065,
11038
+ "learning_rate": 1.687946123088021e-08,
11039
+ "logits/chosen": -1.148698091506958,
11040
+ "logits/rejected": -1.2162479162216187,
11041
+ "logps/chosen": -128.07449340820312,
11042
+ "logps/rejected": -160.94265747070312,
11043
+ "loss": 0.3965,
11044
+ "rewards/accuracies": 0.78125,
11045
+ "rewards/chosen": -0.44137704372406006,
11046
+ "rewards/margins": 0.892095685005188,
11047
+ "rewards/rejected": -1.3334728479385376,
11048
+ "step": 1442
11049
+ },
11050
+ {
11051
+ "epoch": 1.6466395837787755,
11052
+ "grad_norm": 53.81256325707842,
11053
+ "learning_rate": 1.6668608091748494e-08,
11054
+ "logits/chosen": -1.3144904375076294,
11055
+ "logits/rejected": -1.3196699619293213,
11056
+ "logps/chosen": -147.97418212890625,
11057
+ "logps/rejected": -184.45281982421875,
11058
+ "loss": 0.4082,
11059
+ "rewards/accuracies": 0.78125,
11060
+ "rewards/chosen": -0.3717328608036041,
11061
+ "rewards/margins": 1.0338945388793945,
11062
+ "rewards/rejected": -1.4056274890899658,
11063
+ "step": 1444
11064
+ },
11065
+ {
11066
+ "epoch": 1.6489202480222365,
11067
+ "grad_norm": 58.43605410338442,
11068
+ "learning_rate": 1.6458960398625288e-08,
11069
+ "logits/chosen": -1.3495270013809204,
11070
+ "logits/rejected": -1.3430432081222534,
11071
+ "logps/chosen": -221.313232421875,
11072
+ "logps/rejected": -240.23922729492188,
11073
+ "loss": 0.4216,
11074
+ "rewards/accuracies": 0.875,
11075
+ "rewards/chosen": -0.4131534695625305,
11076
+ "rewards/margins": 1.1750929355621338,
11077
+ "rewards/rejected": -1.588246464729309,
11078
+ "step": 1446
11079
+ },
11080
+ {
11081
+ "epoch": 1.6512009122656974,
11082
+ "grad_norm": 62.53542903290901,
11083
+ "learning_rate": 1.6250521184208888e-08,
11084
+ "logits/chosen": -1.2720297574996948,
11085
+ "logits/rejected": -1.2884955406188965,
11086
+ "logps/chosen": -170.361328125,
11087
+ "logps/rejected": -195.9408721923828,
11088
+ "loss": 0.4433,
11089
+ "rewards/accuracies": 0.84375,
11090
+ "rewards/chosen": -0.25558391213417053,
11091
+ "rewards/margins": 0.8461555242538452,
11092
+ "rewards/rejected": -1.1017394065856934,
11093
+ "step": 1448
11094
+ },
11095
+ {
11096
+ "epoch": 1.6534815765091584,
11097
+ "grad_norm": 59.021319623999084,
11098
+ "learning_rate": 1.60432934637162e-08,
11099
+ "logits/chosen": -1.2962281703948975,
11100
+ "logits/rejected": -1.3130199909210205,
11101
+ "logps/chosen": -220.19346618652344,
11102
+ "logps/rejected": -234.11521911621094,
11103
+ "loss": 0.3885,
11104
+ "rewards/accuracies": 0.9375,
11105
+ "rewards/chosen": -0.5436195731163025,
11106
+ "rewards/margins": 0.8701571226119995,
11107
+ "rewards/rejected": -1.4137766361236572,
11108
+ "step": 1450
11109
+ },
11110
+ {
11111
+ "epoch": 1.6557622407526194,
11112
+ "grad_norm": 65.21677604603016,
11113
+ "learning_rate": 1.5837280234839012e-08,
11114
+ "logits/chosen": -1.258408546447754,
11115
+ "logits/rejected": -1.2448734045028687,
11116
+ "logps/chosen": -189.3031005859375,
11117
+ "logps/rejected": -186.64773559570312,
11118
+ "loss": 0.4388,
11119
+ "rewards/accuracies": 0.75,
11120
+ "rewards/chosen": -0.5159119367599487,
11121
+ "rewards/margins": 0.5587047338485718,
11122
+ "rewards/rejected": -1.0746166706085205,
11123
+ "step": 1452
11124
+ },
11125
+ {
11126
+ "epoch": 1.65804290499608,
11127
+ "grad_norm": 56.877107879066756,
11128
+ "learning_rate": 1.5632484477700635e-08,
11129
+ "logits/chosen": -1.3145238161087036,
11130
+ "logits/rejected": -1.3254127502441406,
11131
+ "logps/chosen": -225.0052490234375,
11132
+ "logps/rejected": -260.56292724609375,
11133
+ "loss": 0.4098,
11134
+ "rewards/accuracies": 0.84375,
11135
+ "rewards/chosen": -0.6009418964385986,
11136
+ "rewards/margins": 1.1089880466461182,
11137
+ "rewards/rejected": -1.7099300622940063,
11138
+ "step": 1454
11139
+ },
11140
+ {
11141
+ "epoch": 1.660323569239541,
11142
+ "grad_norm": 68.97770948668654,
11143
+ "learning_rate": 1.542890915481282e-08,
11144
+ "logits/chosen": -1.2907415628433228,
11145
+ "logits/rejected": -1.2642382383346558,
11146
+ "logps/chosen": -122.03399658203125,
11147
+ "logps/rejected": -136.55223083496094,
11148
+ "loss": 0.4316,
11149
+ "rewards/accuracies": 0.90625,
11150
+ "rewards/chosen": -0.2291896939277649,
11151
+ "rewards/margins": 0.7178550362586975,
11152
+ "rewards/rejected": -0.9470447897911072,
11153
+ "step": 1456
11154
+ },
11155
+ {
11156
+ "epoch": 1.6626042334830018,
11157
+ "grad_norm": 68.21135169118118,
11158
+ "learning_rate": 1.5226557211032908e-08,
11159
+ "logits/chosen": -1.2149584293365479,
11160
+ "logits/rejected": -1.2694858312606812,
11161
+ "logps/chosen": -163.82122802734375,
11162
+ "logps/rejected": -217.37213134765625,
11163
+ "loss": 0.4581,
11164
+ "rewards/accuracies": 0.78125,
11165
+ "rewards/chosen": -0.4557928442955017,
11166
+ "rewards/margins": 0.9242541193962097,
11167
+ "rewards/rejected": -1.3800469636917114,
11168
+ "step": 1458
11169
+ },
11170
+ {
11171
+ "epoch": 1.6648848977264628,
11172
+ "grad_norm": 58.468740016047576,
11173
+ "learning_rate": 1.5025431573521207e-08,
11174
+ "logits/chosen": -1.3966403007507324,
11175
+ "logits/rejected": -1.395541787147522,
11176
+ "logps/chosen": -136.16111755371094,
11177
+ "logps/rejected": -158.5441436767578,
11178
+ "loss": 0.4215,
11179
+ "rewards/accuracies": 0.875,
11180
+ "rewards/chosen": -0.3195006549358368,
11181
+ "rewards/margins": 0.7229774594306946,
11182
+ "rewards/rejected": -1.042478084564209,
11183
+ "step": 1460
11184
+ },
11185
+ {
11186
+ "epoch": 1.6671655619699237,
11187
+ "grad_norm": 56.594315997333034,
11188
+ "learning_rate": 1.4825535151698653e-08,
11189
+ "logits/chosen": -1.3145086765289307,
11190
+ "logits/rejected": -1.3318628072738647,
11191
+ "logps/chosen": -218.93898010253906,
11192
+ "logps/rejected": -239.53721618652344,
11193
+ "loss": 0.4041,
11194
+ "rewards/accuracies": 0.8125,
11195
+ "rewards/chosen": -0.4259910583496094,
11196
+ "rewards/margins": 0.9239952564239502,
11197
+ "rewards/rejected": -1.34998619556427,
11198
+ "step": 1462
11199
+ },
11200
+ {
11201
+ "epoch": 1.6694462262133847,
11202
+ "grad_norm": 55.89445679724181,
11203
+ "learning_rate": 1.4626870837204775e-08,
11204
+ "logits/chosen": -1.379180908203125,
11205
+ "logits/rejected": -1.4072362184524536,
11206
+ "logps/chosen": -153.11817932128906,
11207
+ "logps/rejected": -182.13790893554688,
11208
+ "loss": 0.4185,
11209
+ "rewards/accuracies": 0.75,
11210
+ "rewards/chosen": -0.38719913363456726,
11211
+ "rewards/margins": 0.9092380404472351,
11212
+ "rewards/rejected": -1.2964370250701904,
11213
+ "step": 1464
11214
+ },
11215
+ {
11216
+ "epoch": 1.6717268904568456,
11217
+ "grad_norm": 64.47758334862267,
11218
+ "learning_rate": 1.4429441503855722e-08,
11219
+ "logits/chosen": -1.3387432098388672,
11220
+ "logits/rejected": -1.436204433441162,
11221
+ "logps/chosen": -214.61618041992188,
11222
+ "logps/rejected": -248.8955078125,
11223
+ "loss": 0.4463,
11224
+ "rewards/accuracies": 0.84375,
11225
+ "rewards/chosen": -0.5446640849113464,
11226
+ "rewards/margins": 1.1796414852142334,
11227
+ "rewards/rejected": -1.7243056297302246,
11228
+ "step": 1466
11229
+ },
11230
+ {
11231
+ "epoch": 1.6740075547003066,
11232
+ "grad_norm": 59.93784188124436,
11233
+ "learning_rate": 1.4233250007602871e-08,
11234
+ "logits/chosen": -1.1467586755752563,
11235
+ "logits/rejected": -1.1875630617141724,
11236
+ "logps/chosen": -198.951416015625,
11237
+ "logps/rejected": -230.94252014160156,
11238
+ "loss": 0.4599,
11239
+ "rewards/accuracies": 0.8125,
11240
+ "rewards/chosen": -0.7742232084274292,
11241
+ "rewards/margins": 1.2332063913345337,
11242
+ "rewards/rejected": -2.007429599761963,
11243
+ "step": 1468
11244
+ },
11245
+ {
11246
+ "epoch": 1.6762882189437673,
11247
+ "grad_norm": 62.25982751081324,
11248
+ "learning_rate": 1.4038299186491442e-08,
11249
+ "logits/chosen": -1.1409118175506592,
11250
+ "logits/rejected": -1.2762134075164795,
11251
+ "logps/chosen": -144.74981689453125,
11252
+ "logps/rejected": -227.7673797607422,
11253
+ "loss": 0.4217,
11254
+ "rewards/accuracies": 0.6875,
11255
+ "rewards/chosen": -0.43610525131225586,
11256
+ "rewards/margins": 1.684708833694458,
11257
+ "rewards/rejected": -2.120814085006714,
11258
+ "step": 1470
11259
+ },
11260
+ {
11261
+ "epoch": 1.6785688831872283,
11262
+ "grad_norm": 49.66084963571848,
11263
+ "learning_rate": 1.3844591860619382e-08,
11264
+ "logits/chosen": -1.3702046871185303,
11265
+ "logits/rejected": -1.4003832340240479,
11266
+ "logps/chosen": -164.3496856689453,
11267
+ "logps/rejected": -176.85682678222656,
11268
+ "loss": 0.4076,
11269
+ "rewards/accuracies": 0.71875,
11270
+ "rewards/chosen": -0.23472319543361664,
11271
+ "rewards/margins": 0.8279229402542114,
11272
+ "rewards/rejected": -1.0626461505889893,
11273
+ "step": 1472
11274
+ },
11275
+ {
11276
+ "epoch": 1.680849547430689,
11277
+ "grad_norm": 52.98373196788465,
11278
+ "learning_rate": 1.3652130832096653e-08,
11279
+ "logits/chosen": -1.1784981489181519,
11280
+ "logits/rejected": -1.2945374250411987,
11281
+ "logps/chosen": -185.53562927246094,
11282
+ "logps/rejected": -221.29660034179688,
11283
+ "loss": 0.4167,
11284
+ "rewards/accuracies": 0.9375,
11285
+ "rewards/chosen": -0.5693493485450745,
11286
+ "rewards/margins": 1.0049147605895996,
11287
+ "rewards/rejected": -1.5742641687393188,
11288
+ "step": 1474
11289
+ },
11290
+ {
11291
+ "epoch": 1.68313021167415,
11292
+ "grad_norm": 76.34629112032515,
11293
+ "learning_rate": 1.3460918885004658e-08,
11294
+ "logits/chosen": -1.2638037204742432,
11295
+ "logits/rejected": -1.3342554569244385,
11296
+ "logps/chosen": -177.4965362548828,
11297
+ "logps/rejected": -229.96853637695312,
11298
+ "loss": 0.4293,
11299
+ "rewards/accuracies": 0.8125,
11300
+ "rewards/chosen": -0.4774022698402405,
11301
+ "rewards/margins": 1.2457822561264038,
11302
+ "rewards/rejected": -1.723184585571289,
11303
+ "step": 1476
11304
+ },
11305
+ {
11306
+ "epoch": 1.685410875917611,
11307
+ "grad_norm": 66.57532589289495,
11308
+ "learning_rate": 1.3270958785355979e-08,
11309
+ "logits/chosen": -1.2002267837524414,
11310
+ "logits/rejected": -1.2736998796463013,
11311
+ "logps/chosen": -172.23741149902344,
11312
+ "logps/rejected": -227.90280151367188,
11313
+ "loss": 0.4668,
11314
+ "rewards/accuracies": 0.84375,
11315
+ "rewards/chosen": -0.5302340984344482,
11316
+ "rewards/margins": 1.142404556274414,
11317
+ "rewards/rejected": -1.6726385354995728,
11318
+ "step": 1478
11319
+ },
11320
+ {
11321
+ "epoch": 1.687691540161072,
11322
+ "grad_norm": 69.95077709646252,
11323
+ "learning_rate": 1.308225328105439e-08,
11324
+ "logits/chosen": -1.376049518585205,
11325
+ "logits/rejected": -1.4532899856567383,
11326
+ "logps/chosen": -178.05099487304688,
11327
+ "logps/rejected": -243.27200317382812,
11328
+ "loss": 0.4221,
11329
+ "rewards/accuracies": 0.875,
11330
+ "rewards/chosen": -0.3648741543292999,
11331
+ "rewards/margins": 1.095149040222168,
11332
+ "rewards/rejected": -1.460023045539856,
11333
+ "step": 1480
11334
+ },
11335
+ {
11336
+ "epoch": 1.6899722044045329,
11337
+ "grad_norm": 64.49691609932415,
11338
+ "learning_rate": 1.2894805101854989e-08,
11339
+ "logits/chosen": -1.3072996139526367,
11340
+ "logits/rejected": -1.2989863157272339,
11341
+ "logps/chosen": -166.4363555908203,
11342
+ "logps/rejected": -176.7548828125,
11343
+ "loss": 0.4753,
11344
+ "rewards/accuracies": 0.78125,
11345
+ "rewards/chosen": -0.39158201217651367,
11346
+ "rewards/margins": 0.8482180833816528,
11347
+ "rewards/rejected": -1.239800214767456,
11348
+ "step": 1482
11349
+ },
11350
+ {
11351
+ "epoch": 1.6922528686479938,
11352
+ "grad_norm": 59.42081736231207,
11353
+ "learning_rate": 1.270861695932489e-08,
11354
+ "logits/chosen": -1.3978495597839355,
11355
+ "logits/rejected": -1.4046682119369507,
11356
+ "logps/chosen": -214.11187744140625,
11357
+ "logps/rejected": -258.4549865722656,
11358
+ "loss": 0.4494,
11359
+ "rewards/accuracies": 0.625,
11360
+ "rewards/chosen": -0.5993155241012573,
11361
+ "rewards/margins": 0.9518192410469055,
11362
+ "rewards/rejected": -1.5511348247528076,
11363
+ "step": 1484
11364
+ },
11365
+ {
11366
+ "epoch": 1.6945335328914546,
11367
+ "grad_norm": 56.004460149491955,
11368
+ "learning_rate": 1.2523691546803872e-08,
11369
+ "logits/chosen": -1.1605501174926758,
11370
+ "logits/rejected": -1.2564440965652466,
11371
+ "logps/chosen": -139.624267578125,
11372
+ "logps/rejected": -178.62393188476562,
11373
+ "loss": 0.4557,
11374
+ "rewards/accuracies": 0.71875,
11375
+ "rewards/chosen": -0.27934566140174866,
11376
+ "rewards/margins": 0.7967109680175781,
11377
+ "rewards/rejected": -1.0760565996170044,
11378
+ "step": 1486
11379
+ },
11380
+ {
11381
+ "epoch": 1.6968141971349155,
11382
+ "grad_norm": 53.58208037132539,
11383
+ "learning_rate": 1.234003153936548e-08,
11384
+ "logits/chosen": -1.1957886219024658,
11385
+ "logits/rejected": -1.3672311305999756,
11386
+ "logps/chosen": -163.6760711669922,
11387
+ "logps/rejected": -232.0875244140625,
11388
+ "loss": 0.4126,
11389
+ "rewards/accuracies": 0.84375,
11390
+ "rewards/chosen": -0.378174364566803,
11391
+ "rewards/margins": 1.1063618659973145,
11392
+ "rewards/rejected": -1.4845364093780518,
11393
+ "step": 1488
11394
+ },
11395
+ {
11396
+ "epoch": 1.6990948613783763,
11397
+ "grad_norm": 75.62123712030618,
11398
+ "learning_rate": 1.2157639593778268e-08,
11399
+ "logits/chosen": -1.249180555343628,
11400
+ "logits/rejected": -1.257821798324585,
11401
+ "logps/chosen": -143.6417236328125,
11402
+ "logps/rejected": -205.5145721435547,
11403
+ "loss": 0.4895,
11404
+ "rewards/accuracies": 0.78125,
11405
+ "rewards/chosen": -0.3994945287704468,
11406
+ "rewards/margins": 1.2723394632339478,
11407
+ "rewards/rejected": -1.671833872795105,
11408
+ "step": 1490
11409
+ },
11410
+ {
11411
+ "epoch": 1.7013755256218372,
11412
+ "grad_norm": 64.99974592800548,
11413
+ "learning_rate": 1.1976518348467424e-08,
11414
+ "logits/chosen": -1.2328625917434692,
11415
+ "logits/rejected": -1.2472069263458252,
11416
+ "logps/chosen": -199.8415069580078,
11417
+ "logps/rejected": -222.4342803955078,
11418
+ "loss": 0.4444,
11419
+ "rewards/accuracies": 0.84375,
11420
+ "rewards/chosen": -0.544438898563385,
11421
+ "rewards/margins": 1.3196830749511719,
11422
+ "rewards/rejected": -1.8641220331192017,
11423
+ "step": 1492
11424
+ },
11425
+ {
11426
+ "epoch": 1.7036561898652982,
11427
+ "grad_norm": 66.84615124590252,
11428
+ "learning_rate": 1.1796670423476574e-08,
11429
+ "logits/chosen": -1.364052414894104,
11430
+ "logits/rejected": -1.3560256958007812,
11431
+ "logps/chosen": -133.53671264648438,
11432
+ "logps/rejected": -152.27175903320312,
11433
+ "loss": 0.4364,
11434
+ "rewards/accuracies": 0.75,
11435
+ "rewards/chosen": -0.3934406042098999,
11436
+ "rewards/margins": 0.749383270740509,
11437
+ "rewards/rejected": -1.1428238153457642,
11438
+ "step": 1494
11439
+ },
11440
+ {
11441
+ "epoch": 1.7059368541087592,
11442
+ "grad_norm": 63.13360469846886,
11443
+ "learning_rate": 1.1618098420429879e-08,
11444
+ "logits/chosen": -1.2544901371002197,
11445
+ "logits/rejected": -1.303771734237671,
11446
+ "logps/chosen": -166.77374267578125,
11447
+ "logps/rejected": -190.30552673339844,
11448
+ "loss": 0.4538,
11449
+ "rewards/accuracies": 0.84375,
11450
+ "rewards/chosen": -0.4510793685913086,
11451
+ "rewards/margins": 0.7863295674324036,
11452
+ "rewards/rejected": -1.2374088764190674,
11453
+ "step": 1496
11454
+ },
11455
+ {
11456
+ "epoch": 1.7082175183522201,
11457
+ "grad_norm": 50.3590196948944,
11458
+ "learning_rate": 1.1440804922494441e-08,
11459
+ "logits/chosen": -1.352832317352295,
11460
+ "logits/rejected": -1.357743263244629,
11461
+ "logps/chosen": -193.49298095703125,
11462
+ "logps/rejected": -207.95184326171875,
11463
+ "loss": 0.3787,
11464
+ "rewards/accuracies": 0.875,
11465
+ "rewards/chosen": -0.36873170733451843,
11466
+ "rewards/margins": 1.0819979906082153,
11467
+ "rewards/rejected": -1.4507297277450562,
11468
+ "step": 1498
11469
+ },
11470
+ {
11471
+ "epoch": 1.710498182595681,
11472
+ "grad_norm": 59.01395721554923,
11473
+ "learning_rate": 1.1264792494342856e-08,
11474
+ "logits/chosen": -1.2877607345581055,
11475
+ "logits/rejected": -1.3114897012710571,
11476
+ "logps/chosen": -158.8022003173828,
11477
+ "logps/rejected": -197.77500915527344,
11478
+ "loss": 0.4406,
11479
+ "rewards/accuracies": 0.875,
11480
+ "rewards/chosen": -0.48249971866607666,
11481
+ "rewards/margins": 0.8965498208999634,
11482
+ "rewards/rejected": -1.37904953956604,
11483
+ "step": 1500
11484
+ },
11485
+ {
11486
+ "epoch": 1.710498182595681,
11487
+ "eval_logits/chosen": -1.3420703411102295,
11488
+ "eval_logits/rejected": -1.3242188692092896,
11489
+ "eval_logps/chosen": -132.36380004882812,
11490
+ "eval_logps/rejected": -138.97621154785156,
11491
+ "eval_loss": 0.5477466583251953,
11492
+ "eval_rewards/accuracies": 0.7200000286102295,
11493
+ "eval_rewards/chosen": -0.24658337235450745,
11494
+ "eval_rewards/margins": 0.44476309418678284,
11495
+ "eval_rewards/rejected": -0.6913464069366455,
11496
+ "eval_runtime": 21.5135,
11497
+ "eval_samples_per_second": 4.648,
11498
+ "eval_steps_per_second": 1.162,
11499
+ "step": 1500
11500
  }
11501
  ],
11502
  "logging_steps": 2,