RyanYr commited on
Commit
bc751d0
1 Parent(s): d910325

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a5116dee8628efc144342ca4a6de9fe3583889a8ccce615c6b0a81dbeaed3c7
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd980325951d8231000ff67edace7a12644e719c501ac636a0ce98206dbb63c
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c527bb7cf51aca43a29bba25edc1d4ae9e7da745bea4785d0690e02b42d02dc4
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0838a35df82986324cb75bb54a537d38f605c4a6e5e657ffc60e9f19ed81d2
3
+ size 24090788996
last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8c9c9a7d2fcfa2e8fa40312006739b2c850b9a5ecfacd34f5a60173282093
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:259ffd7944316a19d3cd4a130f207a063579c77fad5d447554aeff12156085d6
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d29c1fe957781c3a62402feef79dfb38cf56b8fddb352092f6ffd32c6211e320
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:921a755d22277c2d84c9363b6dc0c6e459c1e6fbaddb89bc814ee9c5db4f54fb
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1200
 
1
+ global_step1500
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:697f068ea638078ac4d7214af8b8dd705b457d98098f64c5634dce4d0735e056
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99de85720481c98cc093f3faf5805a4ff05d5df419d49b8575ed63ce236d5815
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c701e1adfd6fc4140fc4da095a0e17303fc0cc082c0f4ecd1689c370de3a5b
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368f761161ebad7292a8dbdeca4656fb602262d1f2495446f32f49896062f7dc
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff6e53692a0195a93ee13f1bc9a625025940f8ed925febbf99d8230cdcc8c75
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:306982a5a2f0fc8003fdb3eebf34d6850d83379bebc04fbe40d7a6bb9f8b6a5c
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c0882d342ba96f75d9c6510a1ca27d6114e6289a81c60de6d812f5fb437ba5c
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2c5d55d66df705380544c1b8076cf199a0a6e6da3583e847a01a69fbf8edb4
3
  size 1168138808
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8044e4c53158c210a17648ba8f2dc2d25a25bbfc55f686015542618eb652a33e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f003069486a57c6ac033f30cf4c4213eb6b7d659bab68a5a50fdb8da7c4118
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cd85d7fa425e7888c973f1c2985ac15ca21b5e6171fe140a401c2bc75ca46ff
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a016ef89b4392d083b2c15a7cf06a39bc61a759f648cf6dc03f1c32b89a526aa
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7915667371a58f1598639e0d1c20a0c59c783c14580cd040a6631eb4ea2311e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b56fe0893036dc052d18d90feba4328b90ea71561942150b07406ac3d7a700e
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35dd78929ad7f0fbf37fdb1284e8edf0424350f6e6ce1cd5a3ee78979af3d3cb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c203d12c2c308dab785ed672c9ca27fb6a2f72acd1e1552d1516c7b0006013
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:685c1f811cfe9c198dd728b594559640ab3fa0f3d97d3259ff1cb0dd7cc548e1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0eed8fec4102664205a804b0fbc28ba65f44e3fb811cdaf695f0e9321c6fe0b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3897937024972855,
5
  "eval_steps": 100,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9199,6 +9199,2304 @@
9199
  "eval_samples_per_second": 3.581,
9200
  "eval_steps_per_second": 0.895,
9201
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9202
  }
9203
  ],
9204
  "logging_steps": 2,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.7372421281216068,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9199
  "eval_samples_per_second": 3.581,
9200
  "eval_steps_per_second": 0.895,
9201
  "step": 1200
9202
+ },
9203
+ {
9204
+ "epoch": 1.3921100253347811,
9205
+ "grad_norm": 65.19042296707491,
9206
+ "learning_rate": 4.70189974343236e-08,
9207
+ "logits/chosen": -1.2034111022949219,
9208
+ "logits/rejected": -1.2350322008132935,
9209
+ "logps/chosen": -121.24800872802734,
9210
+ "logps/rejected": -157.0181427001953,
9211
+ "loss": 0.389,
9212
+ "rewards/accuracies": 0.875,
9213
+ "rewards/chosen": -0.05601517856121063,
9214
+ "rewards/margins": 1.3547430038452148,
9215
+ "rewards/rejected": -1.4107582569122314,
9216
+ "step": 1202
9217
+ },
9218
+ {
9219
+ "epoch": 1.3944263481722765,
9220
+ "grad_norm": 53.022218131457315,
9221
+ "learning_rate": 4.669166489076283e-08,
9222
+ "logits/chosen": -1.1960185766220093,
9223
+ "logits/rejected": -1.2297847270965576,
9224
+ "logps/chosen": -147.0485382080078,
9225
+ "logps/rejected": -177.79165649414062,
9226
+ "loss": 0.3619,
9227
+ "rewards/accuracies": 0.84375,
9228
+ "rewards/chosen": -0.17010337114334106,
9229
+ "rewards/margins": 1.6398181915283203,
9230
+ "rewards/rejected": -1.8099215030670166,
9231
+ "step": 1204
9232
+ },
9233
+ {
9234
+ "epoch": 1.396742671009772,
9235
+ "grad_norm": 51.968933285305766,
9236
+ "learning_rate": 4.636512834750479e-08,
9237
+ "logits/chosen": -1.1823878288269043,
9238
+ "logits/rejected": -1.1940593719482422,
9239
+ "logps/chosen": -165.0301055908203,
9240
+ "logps/rejected": -192.39288330078125,
9241
+ "loss": 0.3919,
9242
+ "rewards/accuracies": 0.8125,
9243
+ "rewards/chosen": -0.2851025462150574,
9244
+ "rewards/margins": 1.3472487926483154,
9245
+ "rewards/rejected": -1.6323513984680176,
9246
+ "step": 1206
9247
+ },
9248
+ {
9249
+ "epoch": 1.3990589938472675,
9250
+ "grad_norm": 70.67490625711265,
9251
+ "learning_rate": 4.6039392680394705e-08,
9252
+ "logits/chosen": -1.1791980266571045,
9253
+ "logits/rejected": -1.2245995998382568,
9254
+ "logps/chosen": -144.21392822265625,
9255
+ "logps/rejected": -177.3154754638672,
9256
+ "loss": 0.4364,
9257
+ "rewards/accuracies": 0.78125,
9258
+ "rewards/chosen": -0.42649248242378235,
9259
+ "rewards/margins": 1.543526291847229,
9260
+ "rewards/rejected": -1.9700188636779785,
9261
+ "step": 1208
9262
+ },
9263
+ {
9264
+ "epoch": 1.401375316684763,
9265
+ "grad_norm": 73.92072443542028,
9266
+ "learning_rate": 4.5714462753319025e-08,
9267
+ "logits/chosen": -1.1179996728897095,
9268
+ "logits/rejected": -1.1189749240875244,
9269
+ "logps/chosen": -176.80593872070312,
9270
+ "logps/rejected": -192.028564453125,
9271
+ "loss": 0.3727,
9272
+ "rewards/accuracies": 0.8125,
9273
+ "rewards/chosen": -1.0178598165512085,
9274
+ "rewards/margins": 1.1851011514663696,
9275
+ "rewards/rejected": -2.202960968017578,
9276
+ "step": 1210
9277
+ },
9278
+ {
9279
+ "epoch": 1.4036916395222585,
9280
+ "grad_norm": 77.10308260730022,
9281
+ "learning_rate": 4.539034341813285e-08,
9282
+ "logits/chosen": -1.1620545387268066,
9283
+ "logits/rejected": -1.1280796527862549,
9284
+ "logps/chosen": -148.05978393554688,
9285
+ "logps/rejected": -170.8760986328125,
9286
+ "loss": 0.4417,
9287
+ "rewards/accuracies": 0.84375,
9288
+ "rewards/chosen": -0.6205931901931763,
9289
+ "rewards/margins": 0.9719365835189819,
9290
+ "rewards/rejected": -1.5925298929214478,
9291
+ "step": 1212
9292
+ },
9293
+ {
9294
+ "epoch": 1.406007962359754,
9295
+ "grad_norm": 50.96217046302329,
9296
+ "learning_rate": 4.50670395145876e-08,
9297
+ "logits/chosen": -1.2354716062545776,
9298
+ "logits/rejected": -1.2715418338775635,
9299
+ "logps/chosen": -197.6702423095703,
9300
+ "logps/rejected": -234.24781799316406,
9301
+ "loss": 0.416,
9302
+ "rewards/accuracies": 0.8125,
9303
+ "rewards/chosen": -0.38070574402809143,
9304
+ "rewards/margins": 1.9455530643463135,
9305
+ "rewards/rejected": -2.326258897781372,
9306
+ "step": 1214
9307
+ },
9308
+ {
9309
+ "epoch": 1.4083242851972493,
9310
+ "grad_norm": 60.92763813478579,
9311
+ "learning_rate": 4.474455587025869e-08,
9312
+ "logits/chosen": -1.2651827335357666,
9313
+ "logits/rejected": -1.2328460216522217,
9314
+ "logps/chosen": -184.45912170410156,
9315
+ "logps/rejected": -197.0801239013672,
9316
+ "loss": 0.3824,
9317
+ "rewards/accuracies": 0.875,
9318
+ "rewards/chosen": -0.584815502166748,
9319
+ "rewards/margins": 1.4052482843399048,
9320
+ "rewards/rejected": -1.9900637865066528,
9321
+ "step": 1216
9322
+ },
9323
+ {
9324
+ "epoch": 1.410640608034745,
9325
+ "grad_norm": 63.32401043366279,
9326
+ "learning_rate": 4.4422897300473315e-08,
9327
+ "logits/chosen": -1.3017557859420776,
9328
+ "logits/rejected": -1.2864493131637573,
9329
+ "logps/chosen": -124.99530792236328,
9330
+ "logps/rejected": -148.06082153320312,
9331
+ "loss": 0.3893,
9332
+ "rewards/accuracies": 0.875,
9333
+ "rewards/chosen": -0.2559690475463867,
9334
+ "rewards/margins": 1.1134750843048096,
9335
+ "rewards/rejected": -1.3694441318511963,
9336
+ "step": 1218
9337
+ },
9338
+ {
9339
+ "epoch": 1.4129569308722403,
9340
+ "grad_norm": 53.98313466834894,
9341
+ "learning_rate": 4.4102068608238685e-08,
9342
+ "logits/chosen": -1.230201244354248,
9343
+ "logits/rejected": -1.2914016246795654,
9344
+ "logps/chosen": -171.6476287841797,
9345
+ "logps/rejected": -189.03538513183594,
9346
+ "loss": 0.4396,
9347
+ "rewards/accuracies": 0.90625,
9348
+ "rewards/chosen": -0.6881774067878723,
9349
+ "rewards/margins": 0.9222534894943237,
9350
+ "rewards/rejected": -1.6104308366775513,
9351
+ "step": 1220
9352
+ },
9353
+ {
9354
+ "epoch": 1.415273253709736,
9355
+ "grad_norm": 62.04575224759104,
9356
+ "learning_rate": 4.3782074584170346e-08,
9357
+ "logits/chosen": -1.214889645576477,
9358
+ "logits/rejected": -1.1926887035369873,
9359
+ "logps/chosen": -92.45348358154297,
9360
+ "logps/rejected": -105.89907836914062,
9361
+ "loss": 0.4592,
9362
+ "rewards/accuracies": 0.71875,
9363
+ "rewards/chosen": -0.3113905191421509,
9364
+ "rewards/margins": 0.9316319823265076,
9365
+ "rewards/rejected": -1.2430225610733032,
9366
+ "step": 1222
9367
+ },
9368
+ {
9369
+ "epoch": 1.4175895765472313,
9370
+ "grad_norm": 71.40412638311601,
9371
+ "learning_rate": 4.34629200064205e-08,
9372
+ "logits/chosen": -1.1961030960083008,
9373
+ "logits/rejected": -1.2155812978744507,
9374
+ "logps/chosen": -170.02508544921875,
9375
+ "logps/rejected": -210.41592407226562,
9376
+ "loss": 0.4078,
9377
+ "rewards/accuracies": 0.84375,
9378
+ "rewards/chosen": -0.856181263923645,
9379
+ "rewards/margins": 1.6567095518112183,
9380
+ "rewards/rejected": -2.5128908157348633,
9381
+ "step": 1224
9382
+ },
9383
+ {
9384
+ "epoch": 1.4199058993847267,
9385
+ "grad_norm": 57.3723784957176,
9386
+ "learning_rate": 4.314460964060672e-08,
9387
+ "logits/chosen": -1.1995205879211426,
9388
+ "logits/rejected": -1.306661605834961,
9389
+ "logps/chosen": -158.38504028320312,
9390
+ "logps/rejected": -204.92669677734375,
9391
+ "loss": 0.4169,
9392
+ "rewards/accuracies": 0.75,
9393
+ "rewards/chosen": -0.4841935634613037,
9394
+ "rewards/margins": 1.3874504566192627,
9395
+ "rewards/rejected": -1.8716439008712769,
9396
+ "step": 1226
9397
+ },
9398
+ {
9399
+ "epoch": 1.4222222222222223,
9400
+ "grad_norm": 51.26821420581517,
9401
+ "learning_rate": 4.2827148239740875e-08,
9402
+ "logits/chosen": -1.1911481618881226,
9403
+ "logits/rejected": -1.2252691984176636,
9404
+ "logps/chosen": -149.15687561035156,
9405
+ "logps/rejected": -168.63299560546875,
9406
+ "loss": 0.3545,
9407
+ "rewards/accuracies": 0.84375,
9408
+ "rewards/chosen": 0.06090724095702171,
9409
+ "rewards/margins": 1.1758480072021484,
9410
+ "rewards/rejected": -1.114940881729126,
9411
+ "step": 1228
9412
+ },
9413
+ {
9414
+ "epoch": 1.4245385450597177,
9415
+ "grad_norm": 59.18176805705484,
9416
+ "learning_rate": 4.251054054415808e-08,
9417
+ "logits/chosen": -1.2053039073944092,
9418
+ "logits/rejected": -1.26601243019104,
9419
+ "logps/chosen": -124.14604187011719,
9420
+ "logps/rejected": -133.4225616455078,
9421
+ "loss": 0.5098,
9422
+ "rewards/accuracies": 0.78125,
9423
+ "rewards/chosen": -0.6462215781211853,
9424
+ "rewards/margins": 0.5801703333854675,
9425
+ "rewards/rejected": -1.2263920307159424,
9426
+ "step": 1230
9427
+ },
9428
+ {
9429
+ "epoch": 1.4268548678972133,
9430
+ "grad_norm": 59.54196160924442,
9431
+ "learning_rate": 4.219479128144583e-08,
9432
+ "logits/chosen": -1.1135673522949219,
9433
+ "logits/rejected": -1.1362190246582031,
9434
+ "logps/chosen": -168.93687438964844,
9435
+ "logps/rejected": -226.7947235107422,
9436
+ "loss": 0.4097,
9437
+ "rewards/accuracies": 0.8125,
9438
+ "rewards/chosen": -0.9602434039115906,
9439
+ "rewards/margins": 2.3180015087127686,
9440
+ "rewards/rejected": -3.278245210647583,
9441
+ "step": 1232
9442
+ },
9443
+ {
9444
+ "epoch": 1.4291711907347087,
9445
+ "grad_norm": 56.10855119361768,
9446
+ "learning_rate": 4.187990516637361e-08,
9447
+ "logits/chosen": -1.3297936916351318,
9448
+ "logits/rejected": -1.3411719799041748,
9449
+ "logps/chosen": -160.88560485839844,
9450
+ "logps/rejected": -200.78402709960938,
9451
+ "loss": 0.4441,
9452
+ "rewards/accuracies": 0.6875,
9453
+ "rewards/chosen": -0.3891071081161499,
9454
+ "rewards/margins": 1.3050543069839478,
9455
+ "rewards/rejected": -1.6941611766815186,
9456
+ "step": 1234
9457
+ },
9458
+ {
9459
+ "epoch": 1.431487513572204,
9460
+ "grad_norm": 63.476091563634505,
9461
+ "learning_rate": 4.156588690082229e-08,
9462
+ "logits/chosen": -1.2897419929504395,
9463
+ "logits/rejected": -1.2645704746246338,
9464
+ "logps/chosen": -194.49856567382812,
9465
+ "logps/rejected": -206.52557373046875,
9466
+ "loss": 0.4213,
9467
+ "rewards/accuracies": 0.875,
9468
+ "rewards/chosen": -0.4005884528160095,
9469
+ "rewards/margins": 1.0420472621917725,
9470
+ "rewards/rejected": -1.4426357746124268,
9471
+ "step": 1236
9472
+ },
9473
+ {
9474
+ "epoch": 1.4338038364096997,
9475
+ "grad_norm": 64.48058226856965,
9476
+ "learning_rate": 4.125274117371401e-08,
9477
+ "logits/chosen": -1.2089612483978271,
9478
+ "logits/rejected": -1.1553493738174438,
9479
+ "logps/chosen": -168.6042938232422,
9480
+ "logps/rejected": -183.2893524169922,
9481
+ "loss": 0.4148,
9482
+ "rewards/accuracies": 0.75,
9483
+ "rewards/chosen": -0.5347051024436951,
9484
+ "rewards/margins": 1.0337334871292114,
9485
+ "rewards/rejected": -1.5684385299682617,
9486
+ "step": 1238
9487
+ },
9488
+ {
9489
+ "epoch": 1.436120159247195,
9490
+ "grad_norm": 66.24493235516152,
9491
+ "learning_rate": 4.094047266094225e-08,
9492
+ "logits/chosen": -1.1589419841766357,
9493
+ "logits/rejected": -1.2252132892608643,
9494
+ "logps/chosen": -121.9848403930664,
9495
+ "logps/rejected": -142.36962890625,
9496
+ "loss": 0.4173,
9497
+ "rewards/accuracies": 0.8125,
9498
+ "rewards/chosen": -0.47534143924713135,
9499
+ "rewards/margins": 0.8780463933944702,
9500
+ "rewards/rejected": -1.353387713432312,
9501
+ "step": 1240
9502
+ },
9503
+ {
9504
+ "epoch": 1.4384364820846907,
9505
+ "grad_norm": 67.05029588369739,
9506
+ "learning_rate": 4.062908602530186e-08,
9507
+ "logits/chosen": -1.2876590490341187,
9508
+ "logits/rejected": -1.2815279960632324,
9509
+ "logps/chosen": -125.93046569824219,
9510
+ "logps/rejected": -150.712890625,
9511
+ "loss": 0.4631,
9512
+ "rewards/accuracies": 0.78125,
9513
+ "rewards/chosen": -0.10989043861627579,
9514
+ "rewards/margins": 1.1261409521102905,
9515
+ "rewards/rejected": -1.2360315322875977,
9516
+ "step": 1242
9517
+ },
9518
+ {
9519
+ "epoch": 1.440752804922186,
9520
+ "grad_norm": 72.90739630648197,
9521
+ "learning_rate": 4.031858591641948e-08,
9522
+ "logits/chosen": -1.2130520343780518,
9523
+ "logits/rejected": -1.2716223001480103,
9524
+ "logps/chosen": -173.3944549560547,
9525
+ "logps/rejected": -214.772705078125,
9526
+ "loss": 0.4677,
9527
+ "rewards/accuracies": 0.90625,
9528
+ "rewards/chosen": -0.10449859499931335,
9529
+ "rewards/margins": 1.398302435874939,
9530
+ "rewards/rejected": -1.5028009414672852,
9531
+ "step": 1244
9532
+ },
9533
+ {
9534
+ "epoch": 1.4430691277596814,
9535
+ "grad_norm": 53.298120215279184,
9536
+ "learning_rate": 4.000897697068417e-08,
9537
+ "logits/chosen": -1.2028003931045532,
9538
+ "logits/rejected": -1.2138316631317139,
9539
+ "logps/chosen": -153.28302001953125,
9540
+ "logps/rejected": -213.52073669433594,
9541
+ "loss": 0.4146,
9542
+ "rewards/accuracies": 0.8125,
9543
+ "rewards/chosen": -0.801752507686615,
9544
+ "rewards/margins": 2.9270172119140625,
9545
+ "rewards/rejected": -3.7287697792053223,
9546
+ "step": 1246
9547
+ },
9548
+ {
9549
+ "epoch": 1.445385450597177,
9550
+ "grad_norm": 65.76302946420111,
9551
+ "learning_rate": 3.970026381117813e-08,
9552
+ "logits/chosen": -1.2451378107070923,
9553
+ "logits/rejected": -1.2875595092773438,
9554
+ "logps/chosen": -148.5320587158203,
9555
+ "logps/rejected": -175.6725616455078,
9556
+ "loss": 0.4043,
9557
+ "rewards/accuracies": 0.84375,
9558
+ "rewards/chosen": -0.14613214135169983,
9559
+ "rewards/margins": 1.1353800296783447,
9560
+ "rewards/rejected": -1.2815121412277222,
9561
+ "step": 1248
9562
+ },
9563
+ {
9564
+ "epoch": 1.4477017734346724,
9565
+ "grad_norm": 60.49195421908293,
9566
+ "learning_rate": 3.93924510476076e-08,
9567
+ "logits/chosen": -1.1492172479629517,
9568
+ "logits/rejected": -1.1965818405151367,
9569
+ "logps/chosen": -182.40252685546875,
9570
+ "logps/rejected": -212.2118682861328,
9571
+ "loss": 0.3858,
9572
+ "rewards/accuracies": 0.90625,
9573
+ "rewards/chosen": -0.37888991832733154,
9574
+ "rewards/margins": 1.5161809921264648,
9575
+ "rewards/rejected": -1.8950707912445068,
9576
+ "step": 1250
9577
+ },
9578
+ {
9579
+ "epoch": 1.450018096272168,
9580
+ "grad_norm": 58.233537771407626,
9581
+ "learning_rate": 3.9085543276234246e-08,
9582
+ "logits/chosen": -1.165425419807434,
9583
+ "logits/rejected": -1.2350255250930786,
9584
+ "logps/chosen": -173.421630859375,
9585
+ "logps/rejected": -217.11502075195312,
9586
+ "loss": 0.395,
9587
+ "rewards/accuracies": 0.875,
9588
+ "rewards/chosen": -0.4715797007083893,
9589
+ "rewards/margins": 1.3812612295150757,
9590
+ "rewards/rejected": -1.8528410196304321,
9591
+ "step": 1252
9592
+ },
9593
+ {
9594
+ "epoch": 1.4523344191096634,
9595
+ "grad_norm": 56.04571021565187,
9596
+ "learning_rate": 3.8779545079806244e-08,
9597
+ "logits/chosen": -1.2306987047195435,
9598
+ "logits/rejected": -1.2661701440811157,
9599
+ "logps/chosen": -158.64395141601562,
9600
+ "logps/rejected": -163.93408203125,
9601
+ "loss": 0.4244,
9602
+ "rewards/accuracies": 0.78125,
9603
+ "rewards/chosen": -0.8128874897956848,
9604
+ "rewards/margins": 1.2007758617401123,
9605
+ "rewards/rejected": -2.0136635303497314,
9606
+ "step": 1254
9607
+ },
9608
+ {
9609
+ "epoch": 1.4546507419471588,
9610
+ "grad_norm": 56.87831566114625,
9611
+ "learning_rate": 3.847446102749009e-08,
9612
+ "logits/chosen": -1.2190110683441162,
9613
+ "logits/rejected": -1.2493897676467896,
9614
+ "logps/chosen": -165.40530395507812,
9615
+ "logps/rejected": -196.738037109375,
9616
+ "loss": 0.4215,
9617
+ "rewards/accuracies": 0.875,
9618
+ "rewards/chosen": -0.03989005833864212,
9619
+ "rewards/margins": 1.3989439010620117,
9620
+ "rewards/rejected": -1.4388341903686523,
9621
+ "step": 1256
9622
+ },
9623
+ {
9624
+ "epoch": 1.4569670647846544,
9625
+ "grad_norm": 50.40656011526863,
9626
+ "learning_rate": 3.817029567480228e-08,
9627
+ "logits/chosen": -1.1830198764801025,
9628
+ "logits/rejected": -1.1742397546768188,
9629
+ "logps/chosen": -136.02699279785156,
9630
+ "logps/rejected": -155.1636199951172,
9631
+ "loss": 0.4128,
9632
+ "rewards/accuracies": 0.90625,
9633
+ "rewards/chosen": -0.21887874603271484,
9634
+ "rewards/margins": 1.521628737449646,
9635
+ "rewards/rejected": -1.7405076026916504,
9636
+ "step": 1258
9637
+ },
9638
+ {
9639
+ "epoch": 1.4592833876221498,
9640
+ "grad_norm": 57.01905107655371,
9641
+ "learning_rate": 3.7867053563541195e-08,
9642
+ "logits/chosen": -0.9882857799530029,
9643
+ "logits/rejected": -0.9833186864852905,
9644
+ "logps/chosen": -144.3942413330078,
9645
+ "logps/rejected": -162.635498046875,
9646
+ "loss": 0.3927,
9647
+ "rewards/accuracies": 0.9375,
9648
+ "rewards/chosen": -0.3512459993362427,
9649
+ "rewards/margins": 1.1930773258209229,
9650
+ "rewards/rejected": -1.544323205947876,
9651
+ "step": 1260
9652
+ },
9653
+ {
9654
+ "epoch": 1.4615997104596454,
9655
+ "grad_norm": 59.931801696827705,
9656
+ "learning_rate": 3.756473922171941e-08,
9657
+ "logits/chosen": -1.2637214660644531,
9658
+ "logits/rejected": -1.3037135601043701,
9659
+ "logps/chosen": -177.24203491210938,
9660
+ "logps/rejected": -203.71690368652344,
9661
+ "loss": 0.4226,
9662
+ "rewards/accuracies": 0.8125,
9663
+ "rewards/chosen": -0.2562026083469391,
9664
+ "rewards/margins": 1.4607383012771606,
9665
+ "rewards/rejected": -1.716940999031067,
9666
+ "step": 1262
9667
+ },
9668
+ {
9669
+ "epoch": 1.4639160332971408,
9670
+ "grad_norm": 61.29727113734488,
9671
+ "learning_rate": 3.726335716349611e-08,
9672
+ "logits/chosen": -1.0998203754425049,
9673
+ "logits/rejected": -1.1926376819610596,
9674
+ "logps/chosen": -140.4119415283203,
9675
+ "logps/rejected": -204.82289123535156,
9676
+ "loss": 0.3865,
9677
+ "rewards/accuracies": 0.875,
9678
+ "rewards/chosen": -0.2265455424785614,
9679
+ "rewards/margins": 1.8351831436157227,
9680
+ "rewards/rejected": -2.0617284774780273,
9681
+ "step": 1264
9682
+ },
9683
+ {
9684
+ "epoch": 1.4662323561346362,
9685
+ "grad_norm": 52.33658884441956,
9686
+ "learning_rate": 3.696291188910954e-08,
9687
+ "logits/chosen": -1.334247350692749,
9688
+ "logits/rejected": -1.2990857362747192,
9689
+ "logps/chosen": -211.4185791015625,
9690
+ "logps/rejected": -239.36732482910156,
9691
+ "loss": 0.4207,
9692
+ "rewards/accuracies": 0.8125,
9693
+ "rewards/chosen": -0.521153450012207,
9694
+ "rewards/margins": 1.7276406288146973,
9695
+ "rewards/rejected": -2.2487940788269043,
9696
+ "step": 1266
9697
+ },
9698
+ {
9699
+ "epoch": 1.4685486789721318,
9700
+ "grad_norm": 55.99376810708936,
9701
+ "learning_rate": 3.666340788480986e-08,
9702
+ "logits/chosen": -1.2197870016098022,
9703
+ "logits/rejected": -1.2413604259490967,
9704
+ "logps/chosen": -152.36419677734375,
9705
+ "logps/rejected": -199.79225158691406,
9706
+ "loss": 0.4336,
9707
+ "rewards/accuracies": 0.78125,
9708
+ "rewards/chosen": -0.41305410861968994,
9709
+ "rewards/margins": 1.4322149753570557,
9710
+ "rewards/rejected": -1.845268964767456,
9711
+ "step": 1268
9712
+ },
9713
+ {
9714
+ "epoch": 1.4708650018096272,
9715
+ "grad_norm": 48.41879744298975,
9716
+ "learning_rate": 3.636484962279226e-08,
9717
+ "logits/chosen": -1.319197416305542,
9718
+ "logits/rejected": -1.4153599739074707,
9719
+ "logps/chosen": -138.763427734375,
9720
+ "logps/rejected": -166.7461395263672,
9721
+ "loss": 0.3822,
9722
+ "rewards/accuracies": 0.875,
9723
+ "rewards/chosen": 0.17914807796478271,
9724
+ "rewards/margins": 1.2840946912765503,
9725
+ "rewards/rejected": -1.1049466133117676,
9726
+ "step": 1270
9727
+ },
9728
+ {
9729
+ "epoch": 1.4731813246471228,
9730
+ "grad_norm": 61.194603768661054,
9731
+ "learning_rate": 3.6067241561130114e-08,
9732
+ "logits/chosen": -1.2439590692520142,
9733
+ "logits/rejected": -1.183296799659729,
9734
+ "logps/chosen": -141.58709716796875,
9735
+ "logps/rejected": -141.97996520996094,
9736
+ "loss": 0.409,
9737
+ "rewards/accuracies": 0.8125,
9738
+ "rewards/chosen": 0.017182359471917152,
9739
+ "rewards/margins": 0.84832763671875,
9740
+ "rewards/rejected": -0.8311452269554138,
9741
+ "step": 1272
9742
+ },
9743
+ {
9744
+ "epoch": 1.4754976474846182,
9745
+ "grad_norm": 77.92386005527986,
9746
+ "learning_rate": 3.5770588143708315e-08,
9747
+ "logits/chosen": -1.2311725616455078,
9748
+ "logits/rejected": -1.2159764766693115,
9749
+ "logps/chosen": -155.29443359375,
9750
+ "logps/rejected": -178.33035278320312,
9751
+ "loss": 0.4345,
9752
+ "rewards/accuracies": 0.90625,
9753
+ "rewards/chosen": 0.0964183509349823,
9754
+ "rewards/margins": 1.3752447366714478,
9755
+ "rewards/rejected": -1.278826355934143,
9756
+ "step": 1274
9757
+ },
9758
+ {
9759
+ "epoch": 1.4778139703221136,
9760
+ "grad_norm": 59.69529021775264,
9761
+ "learning_rate": 3.5474893800157e-08,
9762
+ "logits/chosen": -1.2245632410049438,
9763
+ "logits/rejected": -1.216321349143982,
9764
+ "logps/chosen": -146.5887451171875,
9765
+ "logps/rejected": -148.22378540039062,
9766
+ "loss": 0.4262,
9767
+ "rewards/accuracies": 0.78125,
9768
+ "rewards/chosen": -0.34651532769203186,
9769
+ "rewards/margins": 0.8327122926712036,
9770
+ "rewards/rejected": -1.179227590560913,
9771
+ "step": 1276
9772
+ },
9773
+ {
9774
+ "epoch": 1.4801302931596092,
9775
+ "grad_norm": 75.38031016102305,
9776
+ "learning_rate": 3.5180162945785554e-08,
9777
+ "logits/chosen": -1.2859201431274414,
9778
+ "logits/rejected": -1.3334230184555054,
9779
+ "logps/chosen": -122.11463928222656,
9780
+ "logps/rejected": -150.13040161132812,
9781
+ "loss": 0.4738,
9782
+ "rewards/accuracies": 0.875,
9783
+ "rewards/chosen": -0.1191844493150711,
9784
+ "rewards/margins": 1.2066011428833008,
9785
+ "rewards/rejected": -1.3257856369018555,
9786
+ "step": 1278
9787
+ },
9788
+ {
9789
+ "epoch": 1.4824466159971046,
9790
+ "grad_norm": 70.9685656111324,
9791
+ "learning_rate": 3.488639998151633e-08,
9792
+ "logits/chosen": -1.1641169786453247,
9793
+ "logits/rejected": -1.2118412256240845,
9794
+ "logps/chosen": -177.6825714111328,
9795
+ "logps/rejected": -220.609619140625,
9796
+ "loss": 0.452,
9797
+ "rewards/accuracies": 0.8125,
9798
+ "rewards/chosen": -0.5036740303039551,
9799
+ "rewards/margins": 2.158677101135254,
9800
+ "rewards/rejected": -2.662351131439209,
9801
+ "step": 1280
9802
+ },
9803
+ {
9804
+ "epoch": 1.4847629388346002,
9805
+ "grad_norm": 56.844036643554055,
9806
+ "learning_rate": 3.45936092938193e-08,
9807
+ "logits/chosen": -1.2509068250656128,
9808
+ "logits/rejected": -1.3152053356170654,
9809
+ "logps/chosen": -156.49288940429688,
9810
+ "logps/rejected": -175.0799102783203,
9811
+ "loss": 0.4025,
9812
+ "rewards/accuracies": 0.8125,
9813
+ "rewards/chosen": -0.10208474099636078,
9814
+ "rewards/margins": 1.4737757444381714,
9815
+ "rewards/rejected": -1.5758603811264038,
9816
+ "step": 1282
9817
+ },
9818
+ {
9819
+ "epoch": 1.4870792616720956,
9820
+ "grad_norm": 63.04162868208357,
9821
+ "learning_rate": 3.4301795254646396e-08,
9822
+ "logits/chosen": -1.1289265155792236,
9823
+ "logits/rejected": -1.1296483278274536,
9824
+ "logps/chosen": -92.31997680664062,
9825
+ "logps/rejected": -125.47240447998047,
9826
+ "loss": 0.4319,
9827
+ "rewards/accuracies": 0.9375,
9828
+ "rewards/chosen": -0.2308368980884552,
9829
+ "rewards/margins": 1.562235951423645,
9830
+ "rewards/rejected": -1.7930728197097778,
9831
+ "step": 1284
9832
+ },
9833
+ {
9834
+ "epoch": 1.489395584509591,
9835
+ "grad_norm": 53.7512285130868,
9836
+ "learning_rate": 3.4010962221366125e-08,
9837
+ "logits/chosen": -1.2739626169204712,
9838
+ "logits/rejected": -1.3384690284729004,
9839
+ "logps/chosen": -133.05880737304688,
9840
+ "logps/rejected": -184.07568359375,
9841
+ "loss": 0.4018,
9842
+ "rewards/accuracies": 0.875,
9843
+ "rewards/chosen": -0.1107388436794281,
9844
+ "rewards/margins": 1.6195769309997559,
9845
+ "rewards/rejected": -1.7303158044815063,
9846
+ "step": 1286
9847
+ },
9848
+ {
9849
+ "epoch": 1.4917119073470864,
9850
+ "grad_norm": 69.01008947683725,
9851
+ "learning_rate": 3.3721114536698635e-08,
9852
+ "logits/chosen": -1.3090903759002686,
9853
+ "logits/rejected": -1.3190556764602661,
9854
+ "logps/chosen": -157.00119018554688,
9855
+ "logps/rejected": -156.64285278320312,
9856
+ "loss": 0.435,
9857
+ "rewards/accuracies": 0.84375,
9858
+ "rewards/chosen": -0.31143975257873535,
9859
+ "rewards/margins": 0.8066626787185669,
9860
+ "rewards/rejected": -1.1181025505065918,
9861
+ "step": 1288
9862
+ },
9863
+ {
9864
+ "epoch": 1.494028230184582,
9865
+ "grad_norm": 50.25709967553361,
9866
+ "learning_rate": 3.343225652865095e-08,
9867
+ "logits/chosen": -1.1199434995651245,
9868
+ "logits/rejected": -1.2268508672714233,
9869
+ "logps/chosen": -122.3958740234375,
9870
+ "logps/rejected": -160.63177490234375,
9871
+ "loss": 0.3879,
9872
+ "rewards/accuracies": 0.84375,
9873
+ "rewards/chosen": -0.30211785435676575,
9874
+ "rewards/margins": 1.6221270561218262,
9875
+ "rewards/rejected": -1.9242448806762695,
9876
+ "step": 1290
9877
+ },
9878
+ {
9879
+ "epoch": 1.4963445530220776,
9880
+ "grad_norm": 52.88445623952638,
9881
+ "learning_rate": 3.3144392510452125e-08,
9882
+ "logits/chosen": -1.2343542575836182,
9883
+ "logits/rejected": -1.3168511390686035,
9884
+ "logps/chosen": -92.0683822631836,
9885
+ "logps/rejected": -115.30619812011719,
9886
+ "loss": 0.4227,
9887
+ "rewards/accuracies": 0.8125,
9888
+ "rewards/chosen": -0.022456973791122437,
9889
+ "rewards/margins": 1.0297763347625732,
9890
+ "rewards/rejected": -1.052233338356018,
9891
+ "step": 1292
9892
+ },
9893
+ {
9894
+ "epoch": 1.498660875859573,
9895
+ "grad_norm": 58.861501596082704,
9896
+ "learning_rate": 3.285752678048892e-08,
9897
+ "logits/chosen": -1.1001089811325073,
9898
+ "logits/rejected": -1.200326919555664,
9899
+ "logps/chosen": -122.45906829833984,
9900
+ "logps/rejected": -142.00970458984375,
9901
+ "loss": 0.3886,
9902
+ "rewards/accuracies": 0.75,
9903
+ "rewards/chosen": -0.28310656547546387,
9904
+ "rewards/margins": 0.9431065320968628,
9905
+ "rewards/rejected": -1.226212978363037,
9906
+ "step": 1294
9907
+ },
9908
+ {
9909
+ "epoch": 1.5009771986970684,
9910
+ "grad_norm": 56.84546595956147,
9911
+ "learning_rate": 3.2571663622241875e-08,
9912
+ "logits/chosen": -1.2022103071212769,
9913
+ "logits/rejected": -1.2175830602645874,
9914
+ "logps/chosen": -152.65174865722656,
9915
+ "logps/rejected": -191.4637908935547,
9916
+ "loss": 0.4328,
9917
+ "rewards/accuracies": 0.84375,
9918
+ "rewards/chosen": -0.23669162392616272,
9919
+ "rewards/margins": 1.503944993019104,
9920
+ "rewards/rejected": -1.7406367063522339,
9921
+ "step": 1296
9922
+ },
9923
+ {
9924
+ "epoch": 1.5032935215345637,
9925
+ "grad_norm": 75.28276112082713,
9926
+ "learning_rate": 3.2286807304220874e-08,
9927
+ "logits/chosen": -1.2572470903396606,
9928
+ "logits/rejected": -1.3082858324050903,
9929
+ "logps/chosen": -178.1489715576172,
9930
+ "logps/rejected": -204.2378692626953,
9931
+ "loss": 0.3727,
9932
+ "rewards/accuracies": 0.875,
9933
+ "rewards/chosen": -0.29775533080101013,
9934
+ "rewards/margins": 1.6656044721603394,
9935
+ "rewards/rejected": -1.9633598327636719,
9936
+ "step": 1298
9937
+ },
9938
+ {
9939
+ "epoch": 1.5056098443720594,
9940
+ "grad_norm": 58.03906016771635,
9941
+ "learning_rate": 3.200296207990174e-08,
9942
+ "logits/chosen": -1.234593391418457,
9943
+ "logits/rejected": -1.1559195518493652,
9944
+ "logps/chosen": -146.87179565429688,
9945
+ "logps/rejected": -168.06884765625,
9946
+ "loss": 0.4228,
9947
+ "rewards/accuracies": 0.75,
9948
+ "rewards/chosen": -0.2712811231613159,
9949
+ "rewards/margins": 1.2952864170074463,
9950
+ "rewards/rejected": -1.5665674209594727,
9951
+ "step": 1300
9952
+ },
9953
+ {
9954
+ "epoch": 1.5056098443720594,
9955
+ "eval_logits/chosen": -1.2275954484939575,
9956
+ "eval_logits/rejected": -1.222235918045044,
9957
+ "eval_logps/chosen": -142.21490478515625,
9958
+ "eval_logps/rejected": -147.02261352539062,
9959
+ "eval_loss": 0.5889570713043213,
9960
+ "eval_rewards/accuracies": 0.7599999904632568,
9961
+ "eval_rewards/chosen": -0.6583734154701233,
9962
+ "eval_rewards/margins": 0.6485320329666138,
9963
+ "eval_rewards/rejected": -1.3069055080413818,
9964
+ "eval_runtime": 28.0973,
9965
+ "eval_samples_per_second": 3.559,
9966
+ "eval_steps_per_second": 0.89,
9967
+ "step": 1300
9968
+ },
9969
+ {
9970
+ "epoch": 1.507926167209555,
9971
+ "grad_norm": 53.8054068622811,
9972
+ "learning_rate": 3.172013218766273e-08,
9973
+ "logits/chosen": -1.1949838399887085,
9974
+ "logits/rejected": -1.1276732683181763,
9975
+ "logps/chosen": -116.6605453491211,
9976
+ "logps/rejected": -129.42701721191406,
9977
+ "loss": 0.4054,
9978
+ "rewards/accuracies": 0.84375,
9979
+ "rewards/chosen": -0.26029157638549805,
9980
+ "rewards/margins": 0.894656777381897,
9981
+ "rewards/rejected": -1.1549484729766846,
9982
+ "step": 1302
9983
+ },
9984
+ {
9985
+ "epoch": 1.5102424900470504,
9986
+ "grad_norm": 53.44469670494882,
9987
+ "learning_rate": 3.143832185072103e-08,
9988
+ "logits/chosen": -1.330610752105713,
9989
+ "logits/rejected": -1.3352289199829102,
9990
+ "logps/chosen": -129.06997680664062,
9991
+ "logps/rejected": -138.2757568359375,
9992
+ "loss": 0.4228,
9993
+ "rewards/accuracies": 0.8125,
9994
+ "rewards/chosen": -0.19665758311748505,
9995
+ "rewards/margins": 0.8899365067481995,
9996
+ "rewards/rejected": -1.0865941047668457,
9997
+ "step": 1304
9998
+ },
9999
+ {
10000
+ "epoch": 1.5125588128845457,
10001
+ "grad_norm": 52.12869653110471,
10002
+ "learning_rate": 3.115753527706986e-08,
10003
+ "logits/chosen": -1.2492622137069702,
10004
+ "logits/rejected": -1.2376924753189087,
10005
+ "logps/chosen": -176.64134216308594,
10006
+ "logps/rejected": -197.2509307861328,
10007
+ "loss": 0.3503,
10008
+ "rewards/accuracies": 0.90625,
10009
+ "rewards/chosen": -0.33254703879356384,
10010
+ "rewards/margins": 1.2227458953857422,
10011
+ "rewards/rejected": -1.5552929639816284,
10012
+ "step": 1306
10013
+ },
10014
+ {
10015
+ "epoch": 1.5148751357220411,
10016
+ "grad_norm": 76.85376628040396,
10017
+ "learning_rate": 3.087777665941565e-08,
10018
+ "logits/chosen": -1.0722814798355103,
10019
+ "logits/rejected": -1.1189197301864624,
10020
+ "logps/chosen": -142.5889892578125,
10021
+ "logps/rejected": -179.30929565429688,
10022
+ "loss": 0.425,
10023
+ "rewards/accuracies": 0.9375,
10024
+ "rewards/chosen": -0.607629120349884,
10025
+ "rewards/margins": 1.3540990352630615,
10026
+ "rewards/rejected": -1.9617282152175903,
10027
+ "step": 1308
10028
+ },
10029
+ {
10030
+ "epoch": 1.5171914585595367,
10031
+ "grad_norm": 53.40350152738603,
10032
+ "learning_rate": 3.059905017511536e-08,
10033
+ "logits/chosen": -1.289185881614685,
10034
+ "logits/rejected": -1.3002314567565918,
10035
+ "logps/chosen": -155.68594360351562,
10036
+ "logps/rejected": -181.0437774658203,
10037
+ "loss": 0.4321,
10038
+ "rewards/accuracies": 0.78125,
10039
+ "rewards/chosen": -0.1934920847415924,
10040
+ "rewards/margins": 1.0485948324203491,
10041
+ "rewards/rejected": -1.2420868873596191,
10042
+ "step": 1310
10043
+ },
10044
+ {
10045
+ "epoch": 1.5195077813970324,
10046
+ "grad_norm": 62.10414133443873,
10047
+ "learning_rate": 3.032135998611409e-08,
10048
+ "logits/chosen": -1.2625949382781982,
10049
+ "logits/rejected": -1.2948625087738037,
10050
+ "logps/chosen": -120.27640533447266,
10051
+ "logps/rejected": -124.78179931640625,
10052
+ "loss": 0.4706,
10053
+ "rewards/accuracies": 0.75,
10054
+ "rewards/chosen": -0.41021931171417236,
10055
+ "rewards/margins": 0.7083292603492737,
10056
+ "rewards/rejected": -1.1185486316680908,
10057
+ "step": 1312
10058
+ },
10059
+ {
10060
+ "epoch": 1.5218241042345277,
10061
+ "grad_norm": 53.355625531453235,
10062
+ "learning_rate": 3.004471023888307e-08,
10063
+ "logits/chosen": -1.202606201171875,
10064
+ "logits/rejected": -1.2765260934829712,
10065
+ "logps/chosen": -154.8223876953125,
10066
+ "logps/rejected": -174.3974609375,
10067
+ "loss": 0.3724,
10068
+ "rewards/accuracies": 0.90625,
10069
+ "rewards/chosen": -0.02067536488175392,
10070
+ "rewards/margins": 1.3669226169586182,
10071
+ "rewards/rejected": -1.3875980377197266,
10072
+ "step": 1314
10073
+ },
10074
+ {
10075
+ "epoch": 1.5241404270720231,
10076
+ "grad_norm": 59.47198871147131,
10077
+ "learning_rate": 2.9769105064357537e-08,
10078
+ "logits/chosen": -1.3064639568328857,
10079
+ "logits/rejected": -1.2530848979949951,
10080
+ "logps/chosen": -183.5660858154297,
10081
+ "logps/rejected": -209.7610626220703,
10082
+ "loss": 0.4216,
10083
+ "rewards/accuracies": 0.84375,
10084
+ "rewards/chosen": -0.21822071075439453,
10085
+ "rewards/margins": 1.0935778617858887,
10086
+ "rewards/rejected": -1.3117986917495728,
10087
+ "step": 1316
10088
+ },
10089
+ {
10090
+ "epoch": 1.5264567499095185,
10091
+ "grad_norm": 65.91690062559164,
10092
+ "learning_rate": 2.949454857787519e-08,
10093
+ "logits/chosen": -1.238956093788147,
10094
+ "logits/rejected": -1.218425989151001,
10095
+ "logps/chosen": -214.0928955078125,
10096
+ "logps/rejected": -242.11489868164062,
10097
+ "loss": 0.431,
10098
+ "rewards/accuracies": 0.78125,
10099
+ "rewards/chosen": -0.3267192244529724,
10100
+ "rewards/margins": 2.120006799697876,
10101
+ "rewards/rejected": -2.446725606918335,
10102
+ "step": 1318
10103
+ },
10104
+ {
10105
+ "epoch": 1.5287730727470141,
10106
+ "grad_norm": 79.84542515208742,
10107
+ "learning_rate": 2.9221044879114775e-08,
10108
+ "logits/chosen": -1.251328468322754,
10109
+ "logits/rejected": -1.3467867374420166,
10110
+ "logps/chosen": -177.4748077392578,
10111
+ "logps/rejected": -217.42910766601562,
10112
+ "loss": 0.4328,
10113
+ "rewards/accuracies": 0.90625,
10114
+ "rewards/chosen": -0.36167412996292114,
10115
+ "rewards/margins": 1.5050268173217773,
10116
+ "rewards/rejected": -1.8667008876800537,
10117
+ "step": 1320
10118
+ },
10119
+ {
10120
+ "epoch": 1.5310893955845097,
10121
+ "grad_norm": 49.694028003635175,
10122
+ "learning_rate": 2.8948598052034777e-08,
10123
+ "logits/chosen": -1.3321678638458252,
10124
+ "logits/rejected": -1.336784839630127,
10125
+ "logps/chosen": -151.92041015625,
10126
+ "logps/rejected": -168.43971252441406,
10127
+ "loss": 0.4527,
10128
+ "rewards/accuracies": 0.8125,
10129
+ "rewards/chosen": -0.07541098445653915,
10130
+ "rewards/margins": 1.295732021331787,
10131
+ "rewards/rejected": -1.371143102645874,
10132
+ "step": 1322
10133
+ },
10134
+ {
10135
+ "epoch": 1.5334057184220051,
10136
+ "grad_norm": 53.56450173273162,
10137
+ "learning_rate": 2.867721216481246e-08,
10138
+ "logits/chosen": -1.278252363204956,
10139
+ "logits/rejected": -1.380322813987732,
10140
+ "logps/chosen": -117.78822326660156,
10141
+ "logps/rejected": -142.52127075195312,
10142
+ "loss": 0.4433,
10143
+ "rewards/accuracies": 0.8125,
10144
+ "rewards/chosen": -0.0004923827946186066,
10145
+ "rewards/margins": 0.94923996925354,
10146
+ "rewards/rejected": -0.94973224401474,
10147
+ "step": 1324
10148
+ },
10149
+ {
10150
+ "epoch": 1.5357220412595005,
10151
+ "grad_norm": 61.396961223172646,
10152
+ "learning_rate": 2.8406891269783073e-08,
10153
+ "logits/chosen": -1.1902800798416138,
10154
+ "logits/rejected": -1.3021633625030518,
10155
+ "logps/chosen": -158.06736755371094,
10156
+ "logps/rejected": -229.27200317382812,
10157
+ "loss": 0.4586,
10158
+ "rewards/accuracies": 0.8125,
10159
+ "rewards/chosen": -0.1848677396774292,
10160
+ "rewards/margins": 1.0514723062515259,
10161
+ "rewards/rejected": -1.236340045928955,
10162
+ "step": 1326
10163
+ },
10164
+ {
10165
+ "epoch": 1.538038364096996,
10166
+ "grad_norm": 60.56277160405794,
10167
+ "learning_rate": 2.813763940337952e-08,
10168
+ "logits/chosen": -1.2507346868515015,
10169
+ "logits/rejected": -1.2003321647644043,
10170
+ "logps/chosen": -127.8934097290039,
10171
+ "logps/rejected": -149.8028564453125,
10172
+ "loss": 0.4295,
10173
+ "rewards/accuracies": 0.78125,
10174
+ "rewards/chosen": -0.22713351249694824,
10175
+ "rewards/margins": 1.3372572660446167,
10176
+ "rewards/rejected": -1.5643908977508545,
10177
+ "step": 1328
10178
+ },
10179
+ {
10180
+ "epoch": 1.5403546869344915,
10181
+ "grad_norm": 49.96943093434472,
10182
+ "learning_rate": 2.7869460586071868e-08,
10183
+ "logits/chosen": -1.290654182434082,
10184
+ "logits/rejected": -1.226252794265747,
10185
+ "logps/chosen": -154.7710418701172,
10186
+ "logps/rejected": -160.32904052734375,
10187
+ "loss": 0.4118,
10188
+ "rewards/accuracies": 0.96875,
10189
+ "rewards/chosen": 0.027480699121952057,
10190
+ "rewards/margins": 0.9481416940689087,
10191
+ "rewards/rejected": -0.9206609129905701,
10192
+ "step": 1330
10193
+ },
10194
+ {
10195
+ "epoch": 1.5426710097719871,
10196
+ "grad_norm": 59.55494034761672,
10197
+ "learning_rate": 2.7602358822307413e-08,
10198
+ "logits/chosen": -1.2469313144683838,
10199
+ "logits/rejected": -1.2422947883605957,
10200
+ "logps/chosen": -134.33392333984375,
10201
+ "logps/rejected": -147.20509338378906,
10202
+ "loss": 0.4157,
10203
+ "rewards/accuracies": 0.875,
10204
+ "rewards/chosen": -0.2177230417728424,
10205
+ "rewards/margins": 1.3250346183776855,
10206
+ "rewards/rejected": -1.5427578687667847,
10207
+ "step": 1332
10208
+ },
10209
+ {
10210
+ "epoch": 1.5449873326094825,
10211
+ "grad_norm": 50.53246486906999,
10212
+ "learning_rate": 2.733633810045094e-08,
10213
+ "logits/chosen": -1.2038668394088745,
10214
+ "logits/rejected": -1.2241549491882324,
10215
+ "logps/chosen": -134.41952514648438,
10216
+ "logps/rejected": -164.3434600830078,
10217
+ "loss": 0.3798,
10218
+ "rewards/accuracies": 0.90625,
10219
+ "rewards/chosen": -0.30086490511894226,
10220
+ "rewards/margins": 1.5149168968200684,
10221
+ "rewards/rejected": -1.8157817125320435,
10222
+ "step": 1334
10223
+ },
10224
+ {
10225
+ "epoch": 1.547303655446978,
10226
+ "grad_norm": 64.60352169707365,
10227
+ "learning_rate": 2.7071402392725096e-08,
10228
+ "logits/chosen": -1.223114013671875,
10229
+ "logits/rejected": -1.2082433700561523,
10230
+ "logps/chosen": -165.63157653808594,
10231
+ "logps/rejected": -190.38706970214844,
10232
+ "loss": 0.4127,
10233
+ "rewards/accuracies": 0.875,
10234
+ "rewards/chosen": -0.4103352725505829,
10235
+ "rewards/margins": 1.6495842933654785,
10236
+ "rewards/rejected": -2.0599193572998047,
10237
+ "step": 1336
10238
+ },
10239
+ {
10240
+ "epoch": 1.5496199782844733,
10241
+ "grad_norm": 49.10542820839143,
10242
+ "learning_rate": 2.6807555655151025e-08,
10243
+ "logits/chosen": -1.2764735221862793,
10244
+ "logits/rejected": -1.3327206373214722,
10245
+ "logps/chosen": -134.23565673828125,
10246
+ "logps/rejected": -163.20474243164062,
10247
+ "loss": 0.3816,
10248
+ "rewards/accuracies": 0.78125,
10249
+ "rewards/chosen": -0.16112415492534637,
10250
+ "rewards/margins": 1.4172645807266235,
10251
+ "rewards/rejected": -1.5783886909484863,
10252
+ "step": 1338
10253
+ },
10254
+ {
10255
+ "epoch": 1.551936301121969,
10256
+ "grad_norm": 50.20237542030147,
10257
+ "learning_rate": 2.6544801827489482e-08,
10258
+ "logits/chosen": -1.2196974754333496,
10259
+ "logits/rejected": -1.1965646743774414,
10260
+ "logps/chosen": -129.3689422607422,
10261
+ "logps/rejected": -142.672119140625,
10262
+ "loss": 0.367,
10263
+ "rewards/accuracies": 0.84375,
10264
+ "rewards/chosen": -0.257286012172699,
10265
+ "rewards/margins": 1.1139978170394897,
10266
+ "rewards/rejected": -1.3712838888168335,
10267
+ "step": 1340
10268
+ },
10269
+ {
10270
+ "epoch": 1.5542526239594645,
10271
+ "grad_norm": 59.30212350967455,
10272
+ "learning_rate": 2.6283144833181782e-08,
10273
+ "logits/chosen": -1.1602783203125,
10274
+ "logits/rejected": -1.1242191791534424,
10275
+ "logps/chosen": -120.19819641113281,
10276
+ "logps/rejected": -154.9192352294922,
10277
+ "loss": 0.44,
10278
+ "rewards/accuracies": 0.875,
10279
+ "rewards/chosen": -0.2868664264678955,
10280
+ "rewards/margins": 1.309116244316101,
10281
+ "rewards/rejected": -1.5959827899932861,
10282
+ "step": 1342
10283
+ },
10284
+ {
10285
+ "epoch": 1.55656894679696,
10286
+ "grad_norm": 55.629592680909475,
10287
+ "learning_rate": 2.6022588579291327e-08,
10288
+ "logits/chosen": -1.1773267984390259,
10289
+ "logits/rejected": -1.1578972339630127,
10290
+ "logps/chosen": -120.31849670410156,
10291
+ "logps/rejected": -141.1858367919922,
10292
+ "loss": 0.4489,
10293
+ "rewards/accuracies": 0.84375,
10294
+ "rewards/chosen": -0.2234707921743393,
10295
+ "rewards/margins": 0.9735670685768127,
10296
+ "rewards/rejected": -1.1970378160476685,
10297
+ "step": 1344
10298
+ },
10299
+ {
10300
+ "epoch": 1.5588852696344553,
10301
+ "grad_norm": 58.52065186564036,
10302
+ "learning_rate": 2.5763136956445342e-08,
10303
+ "logits/chosen": -1.13753342628479,
10304
+ "logits/rejected": -1.1701133251190186,
10305
+ "logps/chosen": -139.2621612548828,
10306
+ "logps/rejected": -194.79722595214844,
10307
+ "loss": 0.4361,
10308
+ "rewards/accuracies": 0.75,
10309
+ "rewards/chosen": -0.3876231908798218,
10310
+ "rewards/margins": 1.8029227256774902,
10311
+ "rewards/rejected": -2.1905460357666016,
10312
+ "step": 1346
10313
+ },
10314
+ {
10315
+ "epoch": 1.5612015924719507,
10316
+ "grad_norm": 53.922591259946095,
10317
+ "learning_rate": 2.5504793838776582e-08,
10318
+ "logits/chosen": -1.2545125484466553,
10319
+ "logits/rejected": -1.330094814300537,
10320
+ "logps/chosen": -150.107421875,
10321
+ "logps/rejected": -178.18336486816406,
10322
+ "loss": 0.3999,
10323
+ "rewards/accuracies": 0.8125,
10324
+ "rewards/chosen": -0.33850181102752686,
10325
+ "rewards/margins": 1.4819968938827515,
10326
+ "rewards/rejected": -1.8204987049102783,
10327
+ "step": 1348
10328
+ },
10329
+ {
10330
+ "epoch": 1.5635179153094463,
10331
+ "grad_norm": 50.688082246559354,
10332
+ "learning_rate": 2.5247563083865697e-08,
10333
+ "logits/chosen": -1.2270935773849487,
10334
+ "logits/rejected": -1.2281591892242432,
10335
+ "logps/chosen": -151.96878051757812,
10336
+ "logps/rejected": -163.12692260742188,
10337
+ "loss": 0.3785,
10338
+ "rewards/accuracies": 0.78125,
10339
+ "rewards/chosen": -0.06753317266702652,
10340
+ "rewards/margins": 0.8644936084747314,
10341
+ "rewards/rejected": -0.9320268630981445,
10342
+ "step": 1350
10343
+ },
10344
+ {
10345
+ "epoch": 1.565834238146942,
10346
+ "grad_norm": 48.380975770041516,
10347
+ "learning_rate": 2.4991448532683525e-08,
10348
+ "logits/chosen": -1.08161461353302,
10349
+ "logits/rejected": -1.0994572639465332,
10350
+ "logps/chosen": -72.54817962646484,
10351
+ "logps/rejected": -86.65010833740234,
10352
+ "loss": 0.4118,
10353
+ "rewards/accuracies": 0.8125,
10354
+ "rewards/chosen": 0.03128306567668915,
10355
+ "rewards/margins": 1.0805275440216064,
10356
+ "rewards/rejected": -1.0492445230484009,
10357
+ "step": 1352
10358
+ },
10359
+ {
10360
+ "epoch": 1.5681505609844373,
10361
+ "grad_norm": 56.228286429005685,
10362
+ "learning_rate": 2.4736454009533657e-08,
10363
+ "logits/chosen": -1.110878586769104,
10364
+ "logits/rejected": -1.1308159828186035,
10365
+ "logps/chosen": -91.99187469482422,
10366
+ "logps/rejected": -103.77259826660156,
10367
+ "loss": 0.4426,
10368
+ "rewards/accuracies": 0.71875,
10369
+ "rewards/chosen": -0.12472671270370483,
10370
+ "rewards/margins": 0.7570998668670654,
10371
+ "rewards/rejected": -0.8818265199661255,
10372
+ "step": 1354
10373
+ },
10374
+ {
10375
+ "epoch": 1.5704668838219327,
10376
+ "grad_norm": 45.1944314609051,
10377
+ "learning_rate": 2.4482583321995476e-08,
10378
+ "logits/chosen": -1.1095668077468872,
10379
+ "logits/rejected": -1.0675170421600342,
10380
+ "logps/chosen": -136.01248168945312,
10381
+ "logps/rejected": -142.30926513671875,
10382
+ "loss": 0.4013,
10383
+ "rewards/accuracies": 0.78125,
10384
+ "rewards/chosen": -0.3576383888721466,
10385
+ "rewards/margins": 1.211026668548584,
10386
+ "rewards/rejected": -1.5686650276184082,
10387
+ "step": 1356
10388
+ },
10389
+ {
10390
+ "epoch": 1.572783206659428,
10391
+ "grad_norm": 69.47752257662052,
10392
+ "learning_rate": 2.4229840260867286e-08,
10393
+ "logits/chosen": -1.1537913084030151,
10394
+ "logits/rejected": -1.1009234189987183,
10395
+ "logps/chosen": -154.23377990722656,
10396
+ "logps/rejected": -169.52256774902344,
10397
+ "loss": 0.4161,
10398
+ "rewards/accuracies": 0.84375,
10399
+ "rewards/chosen": -0.404860258102417,
10400
+ "rewards/margins": 1.2964788675308228,
10401
+ "rewards/rejected": -1.7013392448425293,
10402
+ "step": 1358
10403
+ },
10404
+ {
10405
+ "epoch": 1.5750995294969237,
10406
+ "grad_norm": 57.67713141201682,
10407
+ "learning_rate": 2.3978228600109563e-08,
10408
+ "logits/chosen": -1.2760729789733887,
10409
+ "logits/rejected": -1.3226953744888306,
10410
+ "logps/chosen": -157.44998168945312,
10411
+ "logps/rejected": -199.5008087158203,
10412
+ "loss": 0.4445,
10413
+ "rewards/accuracies": 0.84375,
10414
+ "rewards/chosen": -0.27148348093032837,
10415
+ "rewards/margins": 1.2586216926574707,
10416
+ "rewards/rejected": -1.5301051139831543,
10417
+ "step": 1360
10418
+ },
10419
+ {
10420
+ "epoch": 1.577415852334419,
10421
+ "grad_norm": 63.00178299040625,
10422
+ "learning_rate": 2.372775209678881e-08,
10423
+ "logits/chosen": -1.2917158603668213,
10424
+ "logits/rejected": -1.2442728281021118,
10425
+ "logps/chosen": -135.3437957763672,
10426
+ "logps/rejected": -150.4008026123047,
10427
+ "loss": 0.4729,
10428
+ "rewards/accuracies": 0.75,
10429
+ "rewards/chosen": -0.21689726412296295,
10430
+ "rewards/margins": 0.914188802242279,
10431
+ "rewards/rejected": -1.1310861110687256,
10432
+ "step": 1362
10433
+ },
10434
+ {
10435
+ "epoch": 1.5797321751719147,
10436
+ "grad_norm": 54.9545294199578,
10437
+ "learning_rate": 2.347841449102136e-08,
10438
+ "logits/chosen": -1.16354501247406,
10439
+ "logits/rejected": -1.2029650211334229,
10440
+ "logps/chosen": -127.01541137695312,
10441
+ "logps/rejected": -160.94837951660156,
10442
+ "loss": 0.4415,
10443
+ "rewards/accuracies": 0.84375,
10444
+ "rewards/chosen": -0.2984340488910675,
10445
+ "rewards/margins": 1.5368034839630127,
10446
+ "rewards/rejected": -1.8352375030517578,
10447
+ "step": 1364
10448
+ },
10449
+ {
10450
+ "epoch": 1.58204849800941,
10451
+ "grad_norm": 59.15647045553974,
10452
+ "learning_rate": 2.3230219505917424e-08,
10453
+ "logits/chosen": -1.1245296001434326,
10454
+ "logits/rejected": -1.1090185642242432,
10455
+ "logps/chosen": -88.6761245727539,
10456
+ "logps/rejected": -99.77947235107422,
10457
+ "loss": 0.4674,
10458
+ "rewards/accuracies": 0.8125,
10459
+ "rewards/chosen": -0.2688758969306946,
10460
+ "rewards/margins": 1.0267162322998047,
10461
+ "rewards/rejected": -1.295592188835144,
10462
+ "step": 1366
10463
+ },
10464
+ {
10465
+ "epoch": 1.5843648208469054,
10466
+ "grad_norm": 95.242652077602,
10467
+ "learning_rate": 2.2983170847525635e-08,
10468
+ "logits/chosen": -1.2544560432434082,
10469
+ "logits/rejected": -1.334200382232666,
10470
+ "logps/chosen": -132.92308044433594,
10471
+ "logps/rejected": -153.16336059570312,
10472
+ "loss": 0.4863,
10473
+ "rewards/accuracies": 0.78125,
10474
+ "rewards/chosen": -0.2278953194618225,
10475
+ "rewards/margins": 0.9455510973930359,
10476
+ "rewards/rejected": -1.1734462976455688,
10477
+ "step": 1368
10478
+ },
10479
+ {
10480
+ "epoch": 1.586681143684401,
10481
+ "grad_norm": 81.42436059360928,
10482
+ "learning_rate": 2.2737272204777737e-08,
10483
+ "logits/chosen": -1.0738383531570435,
10484
+ "logits/rejected": -1.143731951713562,
10485
+ "logps/chosen": -123.22402954101562,
10486
+ "logps/rejected": -167.40036010742188,
10487
+ "loss": 0.4226,
10488
+ "rewards/accuracies": 0.9375,
10489
+ "rewards/chosen": -0.27396196126937866,
10490
+ "rewards/margins": 1.5770013332366943,
10491
+ "rewards/rejected": -1.8509632349014282,
10492
+ "step": 1370
10493
+ },
10494
+ {
10495
+ "epoch": 1.5889974665218964,
10496
+ "grad_norm": 55.49979161745584,
10497
+ "learning_rate": 2.249252724943336e-08,
10498
+ "logits/chosen": -1.2299567461013794,
10499
+ "logits/rejected": -1.2440650463104248,
10500
+ "logps/chosen": -123.21504974365234,
10501
+ "logps/rejected": -143.9616241455078,
10502
+ "loss": 0.4755,
10503
+ "rewards/accuracies": 0.75,
10504
+ "rewards/chosen": -0.491690993309021,
10505
+ "rewards/margins": 0.894086480140686,
10506
+ "rewards/rejected": -1.385777473449707,
10507
+ "step": 1372
10508
+ },
10509
+ {
10510
+ "epoch": 1.591313789359392,
10511
+ "grad_norm": 54.18985696863257,
10512
+ "learning_rate": 2.2248939636025264e-08,
10513
+ "logits/chosen": -1.203713297843933,
10514
+ "logits/rejected": -1.2125518321990967,
10515
+ "logps/chosen": -176.7722930908203,
10516
+ "logps/rejected": -228.38929748535156,
10517
+ "loss": 0.408,
10518
+ "rewards/accuracies": 0.875,
10519
+ "rewards/chosen": -0.5514904260635376,
10520
+ "rewards/margins": 3.2685635089874268,
10521
+ "rewards/rejected": -3.820053815841675,
10522
+ "step": 1374
10523
+ },
10524
+ {
10525
+ "epoch": 1.5936301121968874,
10526
+ "grad_norm": 54.36252764320043,
10527
+ "learning_rate": 2.200651300180483e-08,
10528
+ "logits/chosen": -1.2815027236938477,
10529
+ "logits/rejected": -1.318671703338623,
10530
+ "logps/chosen": -130.49078369140625,
10531
+ "logps/rejected": -142.1308135986328,
10532
+ "loss": 0.3952,
10533
+ "rewards/accuracies": 0.90625,
10534
+ "rewards/chosen": -0.029716283082962036,
10535
+ "rewards/margins": 0.9699075222015381,
10536
+ "rewards/rejected": -0.999623715877533,
10537
+ "step": 1376
10538
+ },
10539
+ {
10540
+ "epoch": 1.5959464350343828,
10541
+ "grad_norm": 96.21574776555832,
10542
+ "learning_rate": 2.1765250966687687e-08,
10543
+ "logits/chosen": -1.3336846828460693,
10544
+ "logits/rejected": -1.3228704929351807,
10545
+ "logps/chosen": -176.54737854003906,
10546
+ "logps/rejected": -209.42909240722656,
10547
+ "loss": 0.4583,
10548
+ "rewards/accuracies": 0.71875,
10549
+ "rewards/chosen": -0.7003276348114014,
10550
+ "rewards/margins": 1.229347586631775,
10551
+ "rewards/rejected": -1.9296752214431763,
10552
+ "step": 1378
10553
+ },
10554
+ {
10555
+ "epoch": 1.5982627578718784,
10556
+ "grad_norm": 71.93512521949452,
10557
+ "learning_rate": 2.1525157133199633e-08,
10558
+ "logits/chosen": -1.08268404006958,
10559
+ "logits/rejected": -1.1782095432281494,
10560
+ "logps/chosen": -122.80490112304688,
10561
+ "logps/rejected": -159.9678955078125,
10562
+ "loss": 0.4574,
10563
+ "rewards/accuracies": 0.8125,
10564
+ "rewards/chosen": -0.40625882148742676,
10565
+ "rewards/margins": 1.3748208284378052,
10566
+ "rewards/rejected": -1.7810795307159424,
10567
+ "step": 1380
10568
+ },
10569
+ {
10570
+ "epoch": 1.6005790807093738,
10571
+ "grad_norm": 70.14564165621101,
10572
+ "learning_rate": 2.1286235086422843e-08,
10573
+ "logits/chosen": -1.3746612071990967,
10574
+ "logits/rejected": -1.3713853359222412,
10575
+ "logps/chosen": -178.19586181640625,
10576
+ "logps/rejected": -212.018310546875,
10577
+ "loss": 0.3689,
10578
+ "rewards/accuracies": 0.96875,
10579
+ "rewards/chosen": -0.2453938126564026,
10580
+ "rewards/margins": 1.7820340394973755,
10581
+ "rewards/rejected": -2.027428150177002,
10582
+ "step": 1382
10583
+ },
10584
+ {
10585
+ "epoch": 1.6028954035468694,
10586
+ "grad_norm": 53.127835579709945,
10587
+ "learning_rate": 2.1048488393942455e-08,
10588
+ "logits/chosen": -1.17882239818573,
10589
+ "logits/rejected": -1.1624205112457275,
10590
+ "logps/chosen": -111.90765380859375,
10591
+ "logps/rejected": -135.91148376464844,
10592
+ "loss": 0.475,
10593
+ "rewards/accuracies": 0.78125,
10594
+ "rewards/chosen": -0.25234535336494446,
10595
+ "rewards/margins": 1.1534082889556885,
10596
+ "rewards/rejected": -1.4057536125183105,
10597
+ "step": 1384
10598
+ },
10599
+ {
10600
+ "epoch": 1.6052117263843648,
10601
+ "grad_norm": 59.635014558757035,
10602
+ "learning_rate": 2.0811920605793122e-08,
10603
+ "logits/chosen": -1.1906554698944092,
10604
+ "logits/rejected": -1.2808858156204224,
10605
+ "logps/chosen": -129.28529357910156,
10606
+ "logps/rejected": -157.04090881347656,
10607
+ "loss": 0.3961,
10608
+ "rewards/accuracies": 0.75,
10609
+ "rewards/chosen": 0.06318804621696472,
10610
+ "rewards/margins": 1.2911432981491089,
10611
+ "rewards/rejected": -1.2279552221298218,
10612
+ "step": 1386
10613
+ },
10614
+ {
10615
+ "epoch": 1.6075280492218602,
10616
+ "grad_norm": 57.41476924308661,
10617
+ "learning_rate": 2.0576535254406157e-08,
10618
+ "logits/chosen": -1.1566798686981201,
10619
+ "logits/rejected": -1.1978453397750854,
10620
+ "logps/chosen": -153.1495819091797,
10621
+ "logps/rejected": -175.12283325195312,
10622
+ "loss": 0.3893,
10623
+ "rewards/accuracies": 0.8125,
10624
+ "rewards/chosen": -0.4101862907409668,
10625
+ "rewards/margins": 1.2702761888504028,
10626
+ "rewards/rejected": -1.6804625988006592,
10627
+ "step": 1388
10628
+ },
10629
+ {
10630
+ "epoch": 1.6098443720593558,
10631
+ "grad_norm": 46.94707676426246,
10632
+ "learning_rate": 2.0342335854556736e-08,
10633
+ "logits/chosen": -1.3085883855819702,
10634
+ "logits/rejected": -1.2242193222045898,
10635
+ "logps/chosen": -173.11146545410156,
10636
+ "logps/rejected": -201.7664031982422,
10637
+ "loss": 0.3882,
10638
+ "rewards/accuracies": 0.90625,
10639
+ "rewards/chosen": -0.6821246147155762,
10640
+ "rewards/margins": 1.9655760526657104,
10641
+ "rewards/rejected": -2.6477010250091553,
10642
+ "step": 1390
10643
+ },
10644
+ {
10645
+ "epoch": 1.6121606948968512,
10646
+ "grad_norm": 51.39756905271222,
10647
+ "learning_rate": 2.0109325903311324e-08,
10648
+ "logits/chosen": -1.1785892248153687,
10649
+ "logits/rejected": -1.2349551916122437,
10650
+ "logps/chosen": -173.98049926757812,
10651
+ "logps/rejected": -225.72409057617188,
10652
+ "loss": 0.378,
10653
+ "rewards/accuracies": 0.875,
10654
+ "rewards/chosen": -0.16729062795639038,
10655
+ "rewards/margins": 1.8970539569854736,
10656
+ "rewards/rejected": -2.064344644546509,
10657
+ "step": 1392
10658
+ },
10659
+ {
10660
+ "epoch": 1.6144770177343468,
10661
+ "grad_norm": 75.45520614404798,
10662
+ "learning_rate": 1.9877508879975557e-08,
10663
+ "logits/chosen": -1.2368243932724,
10664
+ "logits/rejected": -1.1864349842071533,
10665
+ "logps/chosen": -157.86318969726562,
10666
+ "logps/rejected": -164.7779083251953,
10667
+ "loss": 0.4102,
10668
+ "rewards/accuracies": 0.875,
10669
+ "rewards/chosen": -0.416544646024704,
10670
+ "rewards/margins": 1.2882412672042847,
10671
+ "rewards/rejected": -1.7047858238220215,
10672
+ "step": 1394
10673
+ },
10674
+ {
10675
+ "epoch": 1.6167933405718422,
10676
+ "grad_norm": 54.69368738030066,
10677
+ "learning_rate": 1.9646888246042337e-08,
10678
+ "logits/chosen": -1.2882027626037598,
10679
+ "logits/rejected": -1.2957350015640259,
10680
+ "logps/chosen": -168.46693420410156,
10681
+ "logps/rejected": -184.96092224121094,
10682
+ "loss": 0.3796,
10683
+ "rewards/accuracies": 0.71875,
10684
+ "rewards/chosen": -0.5366595983505249,
10685
+ "rewards/margins": 1.4451313018798828,
10686
+ "rewards/rejected": -1.9817910194396973,
10687
+ "step": 1396
10688
+ },
10689
+ {
10690
+ "epoch": 1.6191096634093376,
10691
+ "grad_norm": 47.07150629441627,
10692
+ "learning_rate": 1.941746744513999e-08,
10693
+ "logits/chosen": -1.2016966342926025,
10694
+ "logits/rejected": -1.2090229988098145,
10695
+ "logps/chosen": -119.58673095703125,
10696
+ "logps/rejected": -141.86593627929688,
10697
+ "loss": 0.3931,
10698
+ "rewards/accuracies": 0.875,
10699
+ "rewards/chosen": -0.3934524655342102,
10700
+ "rewards/margins": 1.4040158987045288,
10701
+ "rewards/rejected": -1.7974684238433838,
10702
+ "step": 1398
10703
+ },
10704
+ {
10705
+ "epoch": 1.6214259862468332,
10706
+ "grad_norm": 62.032269869178876,
10707
+ "learning_rate": 1.918924990298091e-08,
10708
+ "logits/chosen": -1.2298343181610107,
10709
+ "logits/rejected": -1.233276128768921,
10710
+ "logps/chosen": -160.66769409179688,
10711
+ "logps/rejected": -165.26895141601562,
10712
+ "loss": 0.4199,
10713
+ "rewards/accuracies": 0.84375,
10714
+ "rewards/chosen": -0.6389051079750061,
10715
+ "rewards/margins": 0.8895078897476196,
10716
+ "rewards/rejected": -1.52841317653656,
10717
+ "step": 1400
10718
+ },
10719
+ {
10720
+ "epoch": 1.6214259862468332,
10721
+ "eval_logits/chosen": -1.2133427858352661,
10722
+ "eval_logits/rejected": -1.2084039449691772,
10723
+ "eval_logps/chosen": -144.74742126464844,
10724
+ "eval_logps/rejected": -149.5865478515625,
10725
+ "eval_loss": 0.6033037304878235,
10726
+ "eval_rewards/accuracies": 0.7200000286102295,
10727
+ "eval_rewards/chosen": -0.9116251468658447,
10728
+ "eval_rewards/margins": 0.6516737341880798,
10729
+ "eval_rewards/rejected": -1.5632988214492798,
10730
+ "eval_runtime": 24.3113,
10731
+ "eval_samples_per_second": 4.113,
10732
+ "eval_steps_per_second": 1.028,
10733
+ "step": 1400
10734
+ },
10735
+ {
10736
+ "epoch": 1.6237423090843286,
10737
+ "grad_norm": 86.9760098456171,
10738
+ "learning_rate": 1.8962239027310577e-08,
10739
+ "logits/chosen": -1.1873736381530762,
10740
+ "logits/rejected": -1.2712754011154175,
10741
+ "logps/chosen": -150.010498046875,
10742
+ "logps/rejected": -177.68890380859375,
10743
+ "loss": 0.4684,
10744
+ "rewards/accuracies": 0.78125,
10745
+ "rewards/chosen": -0.3790132403373718,
10746
+ "rewards/margins": 1.0307780504226685,
10747
+ "rewards/rejected": -1.409791350364685,
10748
+ "step": 1402
10749
+ },
10750
+ {
10751
+ "epoch": 1.6260586319218242,
10752
+ "grad_norm": 54.339526567430624,
10753
+ "learning_rate": 1.8736438207856377e-08,
10754
+ "logits/chosen": -1.346308708190918,
10755
+ "logits/rejected": -1.3275481462478638,
10756
+ "logps/chosen": -172.08343505859375,
10757
+ "logps/rejected": -188.81179809570312,
10758
+ "loss": 0.4124,
10759
+ "rewards/accuracies": 0.90625,
10760
+ "rewards/chosen": -0.4836081564426422,
10761
+ "rewards/margins": 1.5920302867889404,
10762
+ "rewards/rejected": -2.07563853263855,
10763
+ "step": 1404
10764
+ },
10765
+ {
10766
+ "epoch": 1.6283749547593196,
10767
+ "grad_norm": 50.44666894199177,
10768
+ "learning_rate": 1.851185081627714e-08,
10769
+ "logits/chosen": -1.313905954360962,
10770
+ "logits/rejected": -1.3108646869659424,
10771
+ "logps/chosen": -150.0095977783203,
10772
+ "logps/rejected": -163.9129180908203,
10773
+ "loss": 0.4744,
10774
+ "rewards/accuracies": 0.78125,
10775
+ "rewards/chosen": -0.6312530040740967,
10776
+ "rewards/margins": 0.8648273348808289,
10777
+ "rewards/rejected": -1.4960802793502808,
10778
+ "step": 1406
10779
+ },
10780
+ {
10781
+ "epoch": 1.630691277596815,
10782
+ "grad_norm": 63.4120821056028,
10783
+ "learning_rate": 1.8288480206112877e-08,
10784
+ "logits/chosen": -1.2368450164794922,
10785
+ "logits/rejected": -1.222330927848816,
10786
+ "logps/chosen": -114.20048522949219,
10787
+ "logps/rejected": -153.55780029296875,
10788
+ "loss": 0.4453,
10789
+ "rewards/accuracies": 0.75,
10790
+ "rewards/chosen": -0.13375352323055267,
10791
+ "rewards/margins": 1.81991446018219,
10792
+ "rewards/rejected": -1.9536678791046143,
10793
+ "step": 1408
10794
+ },
10795
+ {
10796
+ "epoch": 1.6330076004343104,
10797
+ "grad_norm": 57.27120326939087,
10798
+ "learning_rate": 1.806632971273454e-08,
10799
+ "logits/chosen": -1.3121931552886963,
10800
+ "logits/rejected": -1.3301172256469727,
10801
+ "logps/chosen": -140.49411010742188,
10802
+ "logps/rejected": -147.28729248046875,
10803
+ "loss": 0.4135,
10804
+ "rewards/accuracies": 0.84375,
10805
+ "rewards/chosen": -0.3088449537754059,
10806
+ "rewards/margins": 0.9551953673362732,
10807
+ "rewards/rejected": -1.264040470123291,
10808
+ "step": 1410
10809
+ },
10810
+ {
10811
+ "epoch": 1.635323923271806,
10812
+ "grad_norm": 58.835101317691525,
10813
+ "learning_rate": 1.7845402653294262e-08,
10814
+ "logits/chosen": -1.1533010005950928,
10815
+ "logits/rejected": -1.2099212408065796,
10816
+ "logps/chosen": -161.35955810546875,
10817
+ "logps/rejected": -182.40274047851562,
10818
+ "loss": 0.3941,
10819
+ "rewards/accuracies": 0.96875,
10820
+ "rewards/chosen": -0.4202643036842346,
10821
+ "rewards/margins": 1.3078231811523438,
10822
+ "rewards/rejected": -1.7280876636505127,
10823
+ "step": 1412
10824
+ },
10825
+ {
10826
+ "epoch": 1.6376402461093016,
10827
+ "grad_norm": 62.35716207894577,
10828
+ "learning_rate": 1.762570232667595e-08,
10829
+ "logits/chosen": -1.164352297782898,
10830
+ "logits/rejected": -1.2668792009353638,
10831
+ "logps/chosen": -121.197021484375,
10832
+ "logps/rejected": -175.3108367919922,
10833
+ "loss": 0.4048,
10834
+ "rewards/accuracies": 0.78125,
10835
+ "rewards/chosen": -0.2763165235519409,
10836
+ "rewards/margins": 1.5692354440689087,
10837
+ "rewards/rejected": -1.8455519676208496,
10838
+ "step": 1414
10839
+ },
10840
+ {
10841
+ "epoch": 1.639956568946797,
10842
+ "grad_norm": 72.03828023987269,
10843
+ "learning_rate": 1.7407232013445893e-08,
10844
+ "logits/chosen": -1.120769739151001,
10845
+ "logits/rejected": -1.203442096710205,
10846
+ "logps/chosen": -147.95274353027344,
10847
+ "logps/rejected": -192.7701873779297,
10848
+ "loss": 0.4188,
10849
+ "rewards/accuracies": 0.9375,
10850
+ "rewards/chosen": -0.37782442569732666,
10851
+ "rewards/margins": 1.473639965057373,
10852
+ "rewards/rejected": -1.8514643907546997,
10853
+ "step": 1416
10854
+ },
10855
+ {
10856
+ "epoch": 1.6422728917842924,
10857
+ "grad_norm": 66.4702023164338,
10858
+ "learning_rate": 1.7189994975803758e-08,
10859
+ "logits/chosen": -1.1487022638320923,
10860
+ "logits/rejected": -1.2365858554840088,
10861
+ "logps/chosen": -117.31167602539062,
10862
+ "logps/rejected": -146.18536376953125,
10863
+ "loss": 0.4393,
10864
+ "rewards/accuracies": 0.78125,
10865
+ "rewards/chosen": -0.3540950417518616,
10866
+ "rewards/margins": 0.7364134192466736,
10867
+ "rewards/rejected": -1.0905084609985352,
10868
+ "step": 1418
10869
+ },
10870
+ {
10871
+ "epoch": 1.6445892146217878,
10872
+ "grad_norm": 78.71556855680205,
10873
+ "learning_rate": 1.6973994457534023e-08,
10874
+ "logits/chosen": -1.2775179147720337,
10875
+ "logits/rejected": -1.3115909099578857,
10876
+ "logps/chosen": -164.73760986328125,
10877
+ "logps/rejected": -190.9739227294922,
10878
+ "loss": 0.506,
10879
+ "rewards/accuracies": 0.8125,
10880
+ "rewards/chosen": -0.6548792123794556,
10881
+ "rewards/margins": 1.3077069520950317,
10882
+ "rewards/rejected": -1.9625860452651978,
10883
+ "step": 1420
10884
+ },
10885
+ {
10886
+ "epoch": 1.6469055374592834,
10887
+ "grad_norm": 62.03650354576015,
10888
+ "learning_rate": 1.6759233683957396e-08,
10889
+ "logits/chosen": -1.280670166015625,
10890
+ "logits/rejected": -1.270959734916687,
10891
+ "logps/chosen": -174.10838317871094,
10892
+ "logps/rejected": -190.41744995117188,
10893
+ "loss": 0.417,
10894
+ "rewards/accuracies": 0.90625,
10895
+ "rewards/chosen": -0.3348293602466583,
10896
+ "rewards/margins": 1.4488041400909424,
10897
+ "rewards/rejected": -1.7836335897445679,
10898
+ "step": 1422
10899
+ },
10900
+ {
10901
+ "epoch": 1.649221860296779,
10902
+ "grad_norm": 57.86020225721166,
10903
+ "learning_rate": 1.6545715861882702e-08,
10904
+ "logits/chosen": -1.101415753364563,
10905
+ "logits/rejected": -1.1186178922653198,
10906
+ "logps/chosen": -142.97349548339844,
10907
+ "logps/rejected": -186.55322265625,
10908
+ "loss": 0.4078,
10909
+ "rewards/accuracies": 0.84375,
10910
+ "rewards/chosen": -0.5462976098060608,
10911
+ "rewards/margins": 2.0124125480651855,
10912
+ "rewards/rejected": -2.5587100982666016,
10913
+ "step": 1424
10914
+ },
10915
+ {
10916
+ "epoch": 1.6515381831342744,
10917
+ "grad_norm": 48.695250546687646,
10918
+ "learning_rate": 1.6333444179559074e-08,
10919
+ "logits/chosen": -1.2034971714019775,
10920
+ "logits/rejected": -1.2503241300582886,
10921
+ "logps/chosen": -169.5352020263672,
10922
+ "logps/rejected": -214.4050750732422,
10923
+ "loss": 0.4054,
10924
+ "rewards/accuracies": 0.875,
10925
+ "rewards/chosen": -0.5561625361442566,
10926
+ "rewards/margins": 2.379465341567993,
10927
+ "rewards/rejected": -2.9356279373168945,
10928
+ "step": 1426
10929
+ },
10930
+ {
10931
+ "epoch": 1.6538545059717698,
10932
+ "grad_norm": 58.54466931855239,
10933
+ "learning_rate": 1.6122421806628207e-08,
10934
+ "logits/chosen": -1.2484573125839233,
10935
+ "logits/rejected": -1.298370599746704,
10936
+ "logps/chosen": -224.2542266845703,
10937
+ "logps/rejected": -239.31918334960938,
10938
+ "loss": 0.3847,
10939
+ "rewards/accuracies": 0.84375,
10940
+ "rewards/chosen": -0.5975647568702698,
10941
+ "rewards/margins": 2.2055599689483643,
10942
+ "rewards/rejected": -2.8031251430511475,
10943
+ "step": 1428
10944
+ },
10945
+ {
10946
+ "epoch": 1.6561708288092651,
10947
+ "grad_norm": 59.92256675039104,
10948
+ "learning_rate": 1.5912651894077167e-08,
10949
+ "logits/chosen": -1.2150397300720215,
10950
+ "logits/rejected": -1.1968854665756226,
10951
+ "logps/chosen": -150.77381896972656,
10952
+ "logps/rejected": -189.05838012695312,
10953
+ "loss": 0.4165,
10954
+ "rewards/accuracies": 0.9375,
10955
+ "rewards/chosen": -0.49205654859542847,
10956
+ "rewards/margins": 2.0640861988067627,
10957
+ "rewards/rejected": -2.556142807006836,
10958
+ "step": 1430
10959
+ },
10960
+ {
10961
+ "epoch": 1.6584871516467607,
10962
+ "grad_norm": 59.42932647345947,
10963
+ "learning_rate": 1.57041375741912e-08,
10964
+ "logits/chosen": -1.196043610572815,
10965
+ "logits/rejected": -1.2252073287963867,
10966
+ "logps/chosen": -145.3686065673828,
10967
+ "logps/rejected": -144.02952575683594,
10968
+ "loss": 0.389,
10969
+ "rewards/accuracies": 0.8125,
10970
+ "rewards/chosen": -0.924052894115448,
10971
+ "rewards/margins": 0.9492592215538025,
10972
+ "rewards/rejected": -1.8733121156692505,
10973
+ "step": 1432
10974
+ },
10975
+ {
10976
+ "epoch": 1.6608034744842564,
10977
+ "grad_norm": 59.90616287675461,
10978
+ "learning_rate": 1.5496881960507124e-08,
10979
+ "logits/chosen": -1.1822700500488281,
10980
+ "logits/rejected": -1.1750373840332031,
10981
+ "logps/chosen": -134.07861328125,
10982
+ "logps/rejected": -201.8026123046875,
10983
+ "loss": 0.382,
10984
+ "rewards/accuracies": 0.875,
10985
+ "rewards/chosen": -0.24189743399620056,
10986
+ "rewards/margins": 2.8620426654815674,
10987
+ "rewards/rejected": -3.103940010070801,
10988
+ "step": 1434
10989
+ },
10990
+ {
10991
+ "epoch": 1.6631197973217517,
10992
+ "grad_norm": 57.85512148826699,
10993
+ "learning_rate": 1.529088814776668e-08,
10994
+ "logits/chosen": -1.2793397903442383,
10995
+ "logits/rejected": -1.2699894905090332,
10996
+ "logps/chosen": -182.90478515625,
10997
+ "logps/rejected": -223.45468139648438,
10998
+ "loss": 0.3792,
10999
+ "rewards/accuracies": 0.78125,
11000
+ "rewards/chosen": -0.6292093992233276,
11001
+ "rewards/margins": 1.7043105363845825,
11002
+ "rewards/rejected": -2.33351993560791,
11003
+ "step": 1436
11004
+ },
11005
+ {
11006
+ "epoch": 1.6654361201592471,
11007
+ "grad_norm": 80.74607876180737,
11008
+ "learning_rate": 1.508615921187044e-08,
11009
+ "logits/chosen": -1.049709677696228,
11010
+ "logits/rejected": -1.1322901248931885,
11011
+ "logps/chosen": -109.7282943725586,
11012
+ "logps/rejected": -159.00778198242188,
11013
+ "loss": 0.3868,
11014
+ "rewards/accuracies": 0.90625,
11015
+ "rewards/chosen": -0.34554052352905273,
11016
+ "rewards/margins": 1.7658944129943848,
11017
+ "rewards/rejected": -2.1114349365234375,
11018
+ "step": 1438
11019
+ },
11020
+ {
11021
+ "epoch": 1.6677524429967425,
11022
+ "grad_norm": 66.72584214795384,
11023
+ "learning_rate": 1.4882698209831779e-08,
11024
+ "logits/chosen": -1.1424648761749268,
11025
+ "logits/rejected": -1.1974608898162842,
11026
+ "logps/chosen": -97.99867248535156,
11027
+ "logps/rejected": -134.96340942382812,
11028
+ "loss": 0.4193,
11029
+ "rewards/accuracies": 0.71875,
11030
+ "rewards/chosen": -0.35685765743255615,
11031
+ "rewards/margins": 1.5133432149887085,
11032
+ "rewards/rejected": -1.8702008724212646,
11033
+ "step": 1440
11034
+ },
11035
+ {
11036
+ "epoch": 1.6700687658342381,
11037
+ "grad_norm": 78.22654470786401,
11038
+ "learning_rate": 1.4680508179731343e-08,
11039
+ "logits/chosen": -1.2903565168380737,
11040
+ "logits/rejected": -1.3427797555923462,
11041
+ "logps/chosen": -164.58184814453125,
11042
+ "logps/rejected": -196.1396484375,
11043
+ "loss": 0.4565,
11044
+ "rewards/accuracies": 0.8125,
11045
+ "rewards/chosen": -0.2896607220172882,
11046
+ "rewards/margins": 1.235891580581665,
11047
+ "rewards/rejected": -1.5255522727966309,
11048
+ "step": 1442
11049
+ },
11050
+ {
11051
+ "epoch": 1.6723850886717337,
11052
+ "grad_norm": 64.64622163037477,
11053
+ "learning_rate": 1.4479592140671548e-08,
11054
+ "logits/chosen": -1.3046828508377075,
11055
+ "logits/rejected": -1.2628744840621948,
11056
+ "logps/chosen": -194.83673095703125,
11057
+ "logps/rejected": -228.6671905517578,
11058
+ "loss": 0.4481,
11059
+ "rewards/accuracies": 0.78125,
11060
+ "rewards/chosen": -0.3258281648159027,
11061
+ "rewards/margins": 1.5874714851379395,
11062
+ "rewards/rejected": -1.913299560546875,
11063
+ "step": 1444
11064
+ },
11065
+ {
11066
+ "epoch": 1.6747014115092291,
11067
+ "grad_norm": 54.707446026319836,
11068
+ "learning_rate": 1.4279953092731633e-08,
11069
+ "logits/chosen": -1.3320201635360718,
11070
+ "logits/rejected": -1.3739019632339478,
11071
+ "logps/chosen": -184.18093872070312,
11072
+ "logps/rejected": -212.85275268554688,
11073
+ "loss": 0.3619,
11074
+ "rewards/accuracies": 0.90625,
11075
+ "rewards/chosen": -0.21321099996566772,
11076
+ "rewards/margins": 1.5299046039581299,
11077
+ "rewards/rejected": -1.7431155443191528,
11078
+ "step": 1446
11079
+ },
11080
+ {
11081
+ "epoch": 1.6770177343467245,
11082
+ "grad_norm": 82.01457178127649,
11083
+ "learning_rate": 1.4081594016922772e-08,
11084
+ "logits/chosen": -1.3037813901901245,
11085
+ "logits/rejected": -1.2591630220413208,
11086
+ "logps/chosen": -190.23208618164062,
11087
+ "logps/rejected": -201.10235595703125,
11088
+ "loss": 0.4678,
11089
+ "rewards/accuracies": 0.6875,
11090
+ "rewards/chosen": -0.4632962942123413,
11091
+ "rewards/margins": 0.94648677110672,
11092
+ "rewards/rejected": -1.409783124923706,
11093
+ "step": 1448
11094
+ },
11095
+ {
11096
+ "epoch": 1.67933405718422,
11097
+ "grad_norm": 55.804214360937905,
11098
+ "learning_rate": 1.3884517875143542e-08,
11099
+ "logits/chosen": -1.2149088382720947,
11100
+ "logits/rejected": -1.2506321668624878,
11101
+ "logps/chosen": -134.70565795898438,
11102
+ "logps/rejected": -170.83775329589844,
11103
+ "loss": 0.4304,
11104
+ "rewards/accuracies": 0.84375,
11105
+ "rewards/chosen": -0.13964088261127472,
11106
+ "rewards/margins": 1.4484151601791382,
11107
+ "rewards/rejected": -1.588055968284607,
11108
+ "step": 1450
11109
+ },
11110
+ {
11111
+ "epoch": 1.6816503800217155,
11112
+ "grad_norm": 45.37776187937181,
11113
+ "learning_rate": 1.3688727610135841e-08,
11114
+ "logits/chosen": -1.182690978050232,
11115
+ "logits/rejected": -1.128598928451538,
11116
+ "logps/chosen": -166.2299041748047,
11117
+ "logps/rejected": -189.45960998535156,
11118
+ "loss": 0.3818,
11119
+ "rewards/accuracies": 0.90625,
11120
+ "rewards/chosen": -0.45779427886009216,
11121
+ "rewards/margins": 1.7794036865234375,
11122
+ "rewards/rejected": -2.2371981143951416,
11123
+ "step": 1452
11124
+ },
11125
+ {
11126
+ "epoch": 1.6839667028592111,
11127
+ "grad_norm": 52.071114969595605,
11128
+ "learning_rate": 1.3494226145440767e-08,
11129
+ "logits/chosen": -1.129225492477417,
11130
+ "logits/rejected": -1.1527059078216553,
11131
+ "logps/chosen": -131.0524139404297,
11132
+ "logps/rejected": -152.54283142089844,
11133
+ "loss": 0.3594,
11134
+ "rewards/accuracies": 0.84375,
11135
+ "rewards/chosen": -0.4113074541091919,
11136
+ "rewards/margins": 1.4839246273040771,
11137
+ "rewards/rejected": -1.8952319622039795,
11138
+ "step": 1454
11139
+ },
11140
+ {
11141
+ "epoch": 1.6862830256967065,
11142
+ "grad_norm": 47.42528149711889,
11143
+ "learning_rate": 1.3301016385355091e-08,
11144
+ "logits/chosen": -1.2042808532714844,
11145
+ "logits/rejected": -1.194934368133545,
11146
+ "logps/chosen": -156.91098022460938,
11147
+ "logps/rejected": -188.40354919433594,
11148
+ "loss": 0.3751,
11149
+ "rewards/accuracies": 0.84375,
11150
+ "rewards/chosen": -0.7726644277572632,
11151
+ "rewards/margins": 1.8851245641708374,
11152
+ "rewards/rejected": -2.6577892303466797,
11153
+ "step": 1456
11154
+ },
11155
+ {
11156
+ "epoch": 1.688599348534202,
11157
+ "grad_norm": 61.34540856220831,
11158
+ "learning_rate": 1.3109101214887864e-08,
11159
+ "logits/chosen": -1.2197272777557373,
11160
+ "logits/rejected": -1.1938438415527344,
11161
+ "logps/chosen": -133.5988311767578,
11162
+ "logps/rejected": -192.86639404296875,
11163
+ "loss": 0.41,
11164
+ "rewards/accuracies": 0.875,
11165
+ "rewards/chosen": -0.4758758246898651,
11166
+ "rewards/margins": 3.0053532123565674,
11167
+ "rewards/rejected": -3.4812285900115967,
11168
+ "step": 1458
11169
+ },
11170
+ {
11171
+ "epoch": 1.6909156713716973,
11172
+ "grad_norm": 54.8216979898567,
11173
+ "learning_rate": 1.2918483499717237e-08,
11174
+ "logits/chosen": -1.1394593715667725,
11175
+ "logits/rejected": -1.1835401058197021,
11176
+ "logps/chosen": -122.25736999511719,
11177
+ "logps/rejected": -133.27230834960938,
11178
+ "loss": 0.3597,
11179
+ "rewards/accuracies": 0.90625,
11180
+ "rewards/chosen": -0.4267653822898865,
11181
+ "rewards/margins": 1.0661195516586304,
11182
+ "rewards/rejected": -1.4928849935531616,
11183
+ "step": 1460
11184
+ },
11185
+ {
11186
+ "epoch": 1.693231994209193,
11187
+ "grad_norm": 62.86307508634661,
11188
+ "learning_rate": 1.2729166086147803e-08,
11189
+ "logits/chosen": -1.2270219326019287,
11190
+ "logits/rejected": -1.2750426530838013,
11191
+ "logps/chosen": -128.8585968017578,
11192
+ "logps/rejected": -152.87161254882812,
11193
+ "loss": 0.4384,
11194
+ "rewards/accuracies": 0.84375,
11195
+ "rewards/chosen": -0.016907572746276855,
11196
+ "rewards/margins": 1.5406163930892944,
11197
+ "rewards/rejected": -1.5575240850448608,
11198
+ "step": 1462
11199
+ },
11200
+ {
11201
+ "epoch": 1.6955483170466885,
11202
+ "grad_norm": 87.68154889445682,
11203
+ "learning_rate": 1.2541151801068072e-08,
11204
+ "logits/chosen": -1.1000306606292725,
11205
+ "logits/rejected": -1.1639959812164307,
11206
+ "logps/chosen": -173.75588989257812,
11207
+ "logps/rejected": -201.65650939941406,
11208
+ "loss": 0.4866,
11209
+ "rewards/accuracies": 0.78125,
11210
+ "rewards/chosen": -1.0096687078475952,
11211
+ "rewards/margins": 1.2543641328811646,
11212
+ "rewards/rejected": -2.2640328407287598,
11213
+ "step": 1464
11214
+ },
11215
+ {
11216
+ "epoch": 1.697864639884184,
11217
+ "grad_norm": 52.24912842261691,
11218
+ "learning_rate": 1.2354443451908202e-08,
11219
+ "logits/chosen": -1.2294087409973145,
11220
+ "logits/rejected": -1.2400881052017212,
11221
+ "logps/chosen": -153.24412536621094,
11222
+ "logps/rejected": -166.8737030029297,
11223
+ "loss": 0.3901,
11224
+ "rewards/accuracies": 0.78125,
11225
+ "rewards/chosen": -0.4041752815246582,
11226
+ "rewards/margins": 1.1100343465805054,
11227
+ "rewards/rejected": -1.5142096281051636,
11228
+ "step": 1466
11229
+ },
11230
+ {
11231
+ "epoch": 1.7001809627216793,
11232
+ "grad_norm": 59.60290932317476,
11233
+ "learning_rate": 1.2169043826598057e-08,
11234
+ "logits/chosen": -1.1557788848876953,
11235
+ "logits/rejected": -1.2175214290618896,
11236
+ "logps/chosen": -141.86160278320312,
11237
+ "logps/rejected": -166.4970245361328,
11238
+ "loss": 0.4357,
11239
+ "rewards/accuracies": 0.8125,
11240
+ "rewards/chosen": -0.4277328848838806,
11241
+ "rewards/margins": 1.5622481107711792,
11242
+ "rewards/rejected": -1.989980936050415,
11243
+ "step": 1468
11244
+ },
11245
+ {
11246
+ "epoch": 1.7024972855591747,
11247
+ "grad_norm": 56.1236013000123,
11248
+ "learning_rate": 1.1984955693525788e-08,
11249
+ "logits/chosen": -1.224461317062378,
11250
+ "logits/rejected": -1.1498862504959106,
11251
+ "logps/chosen": -118.77024841308594,
11252
+ "logps/rejected": -134.83253479003906,
11253
+ "loss": 0.4148,
11254
+ "rewards/accuracies": 0.9375,
11255
+ "rewards/chosen": -0.11248160153627396,
11256
+ "rewards/margins": 1.6419700384140015,
11257
+ "rewards/rejected": -1.7544519901275635,
11258
+ "step": 1470
11259
+ },
11260
+ {
11261
+ "epoch": 1.7048136083966703,
11262
+ "grad_norm": 74.87499789321072,
11263
+ "learning_rate": 1.180218180149617e-08,
11264
+ "logits/chosen": -1.3031408786773682,
11265
+ "logits/rejected": -1.2794002294540405,
11266
+ "logps/chosen": -166.36143493652344,
11267
+ "logps/rejected": -182.16787719726562,
11268
+ "loss": 0.4709,
11269
+ "rewards/accuracies": 0.75,
11270
+ "rewards/chosen": -0.30539318919181824,
11271
+ "rewards/margins": 1.1033700704574585,
11272
+ "rewards/rejected": -1.4087631702423096,
11273
+ "step": 1472
11274
+ },
11275
+ {
11276
+ "epoch": 1.707129931234166,
11277
+ "grad_norm": 55.679405729529364,
11278
+ "learning_rate": 1.1620724879689791e-08,
11279
+ "logits/chosen": -1.2027887105941772,
11280
+ "logits/rejected": -1.2204790115356445,
11281
+ "logps/chosen": -131.13787841796875,
11282
+ "logps/rejected": -150.597900390625,
11283
+ "loss": 0.3978,
11284
+ "rewards/accuracies": 0.71875,
11285
+ "rewards/chosen": -0.2811795473098755,
11286
+ "rewards/margins": 0.9655717015266418,
11287
+ "rewards/rejected": -1.2467511892318726,
11288
+ "step": 1474
11289
+ },
11290
+ {
11291
+ "epoch": 1.7094462540716613,
11292
+ "grad_norm": 59.35657673584767,
11293
+ "learning_rate": 1.1440587637622256e-08,
11294
+ "logits/chosen": -1.2350002527236938,
11295
+ "logits/rejected": -1.3663004636764526,
11296
+ "logps/chosen": -94.2022705078125,
11297
+ "logps/rejected": -113.4736099243164,
11298
+ "loss": 0.4273,
11299
+ "rewards/accuracies": 0.8125,
11300
+ "rewards/chosen": -0.37293869256973267,
11301
+ "rewards/margins": 0.5190171599388123,
11302
+ "rewards/rejected": -0.8919559121131897,
11303
+ "step": 1476
11304
+ },
11305
+ {
11306
+ "epoch": 1.7117625769091567,
11307
+ "grad_norm": 58.75339969893179,
11308
+ "learning_rate": 1.1261772765103682e-08,
11309
+ "logits/chosen": -1.2917990684509277,
11310
+ "logits/rejected": -1.3643466234207153,
11311
+ "logps/chosen": -115.38711547851562,
11312
+ "logps/rejected": -147.71717834472656,
11313
+ "loss": 0.4022,
11314
+ "rewards/accuracies": 0.875,
11315
+ "rewards/chosen": -0.19546058773994446,
11316
+ "rewards/margins": 1.1290830373764038,
11317
+ "rewards/rejected": -1.3245434761047363,
11318
+ "step": 1478
11319
+ },
11320
+ {
11321
+ "epoch": 1.714078899746652,
11322
+ "grad_norm": 65.37238691562818,
11323
+ "learning_rate": 1.108428293219854e-08,
11324
+ "logits/chosen": -1.2615394592285156,
11325
+ "logits/rejected": -1.2974615097045898,
11326
+ "logps/chosen": -134.23512268066406,
11327
+ "logps/rejected": -218.10336303710938,
11328
+ "loss": 0.414,
11329
+ "rewards/accuracies": 0.90625,
11330
+ "rewards/chosen": -0.6633899807929993,
11331
+ "rewards/margins": 2.063319206237793,
11332
+ "rewards/rejected": -2.7267091274261475,
11333
+ "step": 1480
11334
+ },
11335
+ {
11336
+ "epoch": 1.7163952225841477,
11337
+ "grad_norm": 46.08904659289323,
11338
+ "learning_rate": 1.0908120789185837e-08,
11339
+ "logits/chosen": -1.1299887895584106,
11340
+ "logits/rejected": -1.0999367237091064,
11341
+ "logps/chosen": -147.2964324951172,
11342
+ "logps/rejected": -166.0971221923828,
11343
+ "loss": 0.3567,
11344
+ "rewards/accuracies": 0.84375,
11345
+ "rewards/chosen": -0.3920423686504364,
11346
+ "rewards/margins": 1.2661586999893188,
11347
+ "rewards/rejected": -1.658200979232788,
11348
+ "step": 1482
11349
+ },
11350
+ {
11351
+ "epoch": 1.7187115454216433,
11352
+ "grad_norm": 62.032458754200285,
11353
+ "learning_rate": 1.0733288966519516e-08,
11354
+ "logits/chosen": -1.258570909500122,
11355
+ "logits/rejected": -1.253230333328247,
11356
+ "logps/chosen": -97.19083404541016,
11357
+ "logps/rejected": -109.28732299804688,
11358
+ "loss": 0.4554,
11359
+ "rewards/accuracies": 0.75,
11360
+ "rewards/chosen": 0.02817530930042267,
11361
+ "rewards/margins": 0.9206870794296265,
11362
+ "rewards/rejected": -0.8925117254257202,
11363
+ "step": 1484
11364
+ },
11365
+ {
11366
+ "epoch": 1.7210278682591387,
11367
+ "grad_norm": 75.38001959898925,
11368
+ "learning_rate": 1.0559790074789132e-08,
11369
+ "logits/chosen": -1.2418440580368042,
11370
+ "logits/rejected": -1.3091630935668945,
11371
+ "logps/chosen": -140.07681274414062,
11372
+ "logps/rejected": -164.11033630371094,
11373
+ "loss": 0.4497,
11374
+ "rewards/accuracies": 0.84375,
11375
+ "rewards/chosen": -0.10434143245220184,
11376
+ "rewards/margins": 1.5909518003463745,
11377
+ "rewards/rejected": -1.6952931880950928,
11378
+ "step": 1486
11379
+ },
11380
+ {
11381
+ "epoch": 1.723344191096634,
11382
+ "grad_norm": 60.00004527174649,
11383
+ "learning_rate": 1.0387626704680896e-08,
11384
+ "logits/chosen": -1.1610029935836792,
11385
+ "logits/rejected": -1.1764705181121826,
11386
+ "logps/chosen": -153.56924438476562,
11387
+ "logps/rejected": -186.0706329345703,
11388
+ "loss": 0.4105,
11389
+ "rewards/accuracies": 0.875,
11390
+ "rewards/chosen": -0.09610500931739807,
11391
+ "rewards/margins": 1.3976409435272217,
11392
+ "rewards/rejected": -1.4937461614608765,
11393
+ "step": 1488
11394
+ },
11395
+ {
11396
+ "epoch": 1.7256605139341294,
11397
+ "grad_norm": 54.45806934963145,
11398
+ "learning_rate": 1.0216801426939093e-08,
11399
+ "logits/chosen": -1.230536699295044,
11400
+ "logits/rejected": -1.2105457782745361,
11401
+ "logps/chosen": -138.13816833496094,
11402
+ "logps/rejected": -160.49887084960938,
11403
+ "loss": 0.451,
11404
+ "rewards/accuracies": 0.8125,
11405
+ "rewards/chosen": -0.30948516726493835,
11406
+ "rewards/margins": 1.360965371131897,
11407
+ "rewards/rejected": -1.6704505681991577,
11408
+ "step": 1490
11409
+ },
11410
+ {
11411
+ "epoch": 1.727976836771625,
11412
+ "grad_norm": 82.82636273947331,
11413
+ "learning_rate": 1.0047316792327498e-08,
11414
+ "logits/chosen": -1.2326574325561523,
11415
+ "logits/rejected": -1.3276634216308594,
11416
+ "logps/chosen": -173.7947235107422,
11417
+ "logps/rejected": -209.5412139892578,
11418
+ "loss": 0.3827,
11419
+ "rewards/accuracies": 0.875,
11420
+ "rewards/chosen": -0.604800820350647,
11421
+ "rewards/margins": 1.53623366355896,
11422
+ "rewards/rejected": -2.1410343647003174,
11423
+ "step": 1492
11424
+ },
11425
+ {
11426
+ "epoch": 1.7302931596091207,
11427
+ "grad_norm": 69.61022757488912,
11428
+ "learning_rate": 9.879175331591472e-09,
11429
+ "logits/chosen": -1.1758224964141846,
11430
+ "logits/rejected": -1.201228380203247,
11431
+ "logps/chosen": -147.6432342529297,
11432
+ "logps/rejected": -187.960693359375,
11433
+ "loss": 0.3883,
11434
+ "rewards/accuracies": 0.78125,
11435
+ "rewards/chosen": -0.6493701934814453,
11436
+ "rewards/margins": 1.607360601425171,
11437
+ "rewards/rejected": -2.256730794906616,
11438
+ "step": 1494
11439
+ },
11440
+ {
11441
+ "epoch": 1.732609482446616,
11442
+ "grad_norm": 46.06958168770852,
11443
+ "learning_rate": 9.712379555420092e-09,
11444
+ "logits/chosen": -1.1795316934585571,
11445
+ "logits/rejected": -1.285264492034912,
11446
+ "logps/chosen": -119.38116455078125,
11447
+ "logps/rejected": -151.4436798095703,
11448
+ "loss": 0.403,
11449
+ "rewards/accuracies": 0.8125,
11450
+ "rewards/chosen": -0.10696488618850708,
11451
+ "rewards/margins": 1.2390841245651245,
11452
+ "rewards/rejected": -1.3460490703582764,
11453
+ "step": 1496
11454
+ },
11455
+ {
11456
+ "epoch": 1.7349258052841114,
11457
+ "grad_norm": 57.95589326263925,
11458
+ "learning_rate": 9.546931954408621e-09,
11459
+ "logits/chosen": -1.1415185928344727,
11460
+ "logits/rejected": -1.1725091934204102,
11461
+ "logps/chosen": -115.87825775146484,
11462
+ "logps/rejected": -149.04075622558594,
11463
+ "loss": 0.5099,
11464
+ "rewards/accuracies": 0.875,
11465
+ "rewards/chosen": -0.27941668033599854,
11466
+ "rewards/margins": 1.3693090677261353,
11467
+ "rewards/rejected": -1.6487256288528442,
11468
+ "step": 1498
11469
+ },
11470
+ {
11471
+ "epoch": 1.7372421281216068,
11472
+ "grad_norm": 55.43462235715989,
11473
+ "learning_rate": 9.382834999021372e-09,
11474
+ "logits/chosen": -1.2606816291809082,
11475
+ "logits/rejected": -1.2870656251907349,
11476
+ "logps/chosen": -125.43834686279297,
11477
+ "logps/rejected": -157.20016479492188,
11478
+ "loss": 0.4188,
11479
+ "rewards/accuracies": 0.75,
11480
+ "rewards/chosen": -0.31777307391166687,
11481
+ "rewards/margins": 0.9490638375282288,
11482
+ "rewards/rejected": -1.2668367624282837,
11483
+ "step": 1500
11484
+ },
11485
+ {
11486
+ "epoch": 1.7372421281216068,
11487
+ "eval_logits/chosen": -1.2174957990646362,
11488
+ "eval_logits/rejected": -1.2125576734542847,
11489
+ "eval_logps/chosen": -143.9083251953125,
11490
+ "eval_logps/rejected": -148.98919677734375,
11491
+ "eval_loss": 0.5947905778884888,
11492
+ "eval_rewards/accuracies": 0.7200000286102295,
11493
+ "eval_rewards/chosen": -0.8277125358581543,
11494
+ "eval_rewards/margins": 0.675851047039032,
11495
+ "eval_rewards/rejected": -1.5035635232925415,
11496
+ "eval_runtime": 23.0512,
11497
+ "eval_samples_per_second": 4.338,
11498
+ "eval_steps_per_second": 1.085,
11499
+ "step": 1500
11500
  }
11501
  ],
11502
  "logging_steps": 2,