RyanYr commited on
Commit
185045f
1 Parent(s): 632211e

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8b57e22ea9a31f314b682e8603b72abb5c7c1059d93c7b9649175fc2b99d90
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9482d352dc81f94eeed704b766a97dfdcebdcdccef2cc7af14042e1308dcfc
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed95171f1cb12f5fa7959a64791d8596d5411aa3997ae72be474532ec9531b98
3
+ size 24090788996
last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2997615d0998626cf7a9b99bd9ef7c501b60db21a78a20a69bd0fb9bed800c4
3
+ size 24090788996
last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8c9c9a7d2fcfa2e8fa40312006739b2c850b9a5ecfacd34f5a60173282093
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:259ffd7944316a19d3cd4a130f207a063579c77fad5d447554aeff12156085d6
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d29c1fe957781c3a62402feef79dfb38cf56b8fddb352092f6ffd32c6211e320
3
+ size 150693
last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:921a755d22277c2d84c9363b6dc0c6e459c1e6fbaddb89bc814ee9c5db4f54fb
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1200
 
1
+ global_step1500
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cb0fd34a3ce140ea8b2609bdd946e63f0fdada580290ec49e87c5fd86c5d9cd
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c232699cc7833ad1c4bc01be99aaf9576f52b50426c0dd90e77d56fe57003bd8
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2efa57d079e5fb7cd4c9f36cc8ca312a81a3518c195550c8aaaff167ac4f2a2
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb26630d8f5249ea3099ea703a2b2c80769fc677b8ad9fdfaa85dcfc32419b8a
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f63b2c2e9b763e70b3780c60812a03ae3b6a7f520d34967dc9b69ebed8fcd51
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:749813aeff58fe35f2eb5e71bddacb082d1bff06e7b90c4c6dda2ce1fe2792ae
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:125f33eb6fab681369f45b22b414cd937093a789c6ccc10a84267ec3b929fc7c
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efe356faa4bc4c418b49ecbc85a6dd22d1f226c0c0fc5ed29f9e7b49217d392d
3
  size 1168138808
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8044e4c53158c210a17648ba8f2dc2d25a25bbfc55f686015542618eb652a33e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f003069486a57c6ac033f30cf4c4213eb6b7d659bab68a5a50fdb8da7c4118
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cd85d7fa425e7888c973f1c2985ac15ca21b5e6171fe140a401c2bc75ca46ff
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a016ef89b4392d083b2c15a7cf06a39bc61a759f648cf6dc03f1c32b89a526aa
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7915667371a58f1598639e0d1c20a0c59c783c14580cd040a6631eb4ea2311e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b56fe0893036dc052d18d90feba4328b90ea71561942150b07406ac3d7a700e
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35dd78929ad7f0fbf37fdb1284e8edf0424350f6e6ce1cd5a3ee78979af3d3cb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c203d12c2c308dab785ed672c9ca27fb6a2f72acd1e1552d1516c7b0006013
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f602cb137b4f881491ae4ff5f7b87503ffe4d118e79329ce7c66a1f99a6b7554
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:542d0559a1accf1318fe1b90ae775b1a17df7be4c307e0080049ab5cb2d79573
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3841828274817964,
5
  "eval_steps": 100,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9199,6 +9199,2304 @@
9199
  "eval_samples_per_second": 4.241,
9200
  "eval_steps_per_second": 1.06,
9201
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9202
  }
9203
  ],
9204
  "logging_steps": 2,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.7302285343522457,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9199
  "eval_samples_per_second": 4.241,
9200
  "eval_steps_per_second": 1.06,
9201
  "step": 1200
9202
+ },
9203
+ {
9204
+ "epoch": 1.3864897988609328,
9205
+ "grad_norm": 44.142526060771885,
9206
+ "learning_rate": 4.768450588515978e-08,
9207
+ "logits/chosen": -1.3997114896774292,
9208
+ "logits/rejected": -1.4610910415649414,
9209
+ "logps/chosen": -131.3092041015625,
9210
+ "logps/rejected": -175.20504760742188,
9211
+ "loss": 0.3635,
9212
+ "rewards/accuracies": 0.96875,
9213
+ "rewards/chosen": -0.47151079773902893,
9214
+ "rewards/margins": 1.4366676807403564,
9215
+ "rewards/rejected": -1.9081785678863525,
9216
+ "step": 1202
9217
+ },
9218
+ {
9219
+ "epoch": 1.3887967702400692,
9220
+ "grad_norm": 71.5222774685512,
9221
+ "learning_rate": 4.7356783712264406e-08,
9222
+ "logits/chosen": -1.2650327682495117,
9223
+ "logits/rejected": -1.266175627708435,
9224
+ "logps/chosen": -227.5773162841797,
9225
+ "logps/rejected": -301.1935119628906,
9226
+ "loss": 0.4205,
9227
+ "rewards/accuracies": 0.8125,
9228
+ "rewards/chosen": -1.2289221286773682,
9229
+ "rewards/margins": 1.7615307569503784,
9230
+ "rewards/rejected": -2.990453004837036,
9231
+ "step": 1204
9232
+ },
9233
+ {
9234
+ "epoch": 1.3911037416192056,
9235
+ "grad_norm": 49.387995302632135,
9236
+ "learning_rate": 4.7029841838829265e-08,
9237
+ "logits/chosen": -1.2980482578277588,
9238
+ "logits/rejected": -1.3223190307617188,
9239
+ "logps/chosen": -119.88655090332031,
9240
+ "logps/rejected": -152.50636291503906,
9241
+ "loss": 0.4627,
9242
+ "rewards/accuracies": 0.71875,
9243
+ "rewards/chosen": -0.888391375541687,
9244
+ "rewards/margins": 0.6946803331375122,
9245
+ "rewards/rejected": -1.5830717086791992,
9246
+ "step": 1206
9247
+ },
9248
+ {
9249
+ "epoch": 1.393410712998342,
9250
+ "grad_norm": 45.81858789813819,
9251
+ "learning_rate": 4.670368511092165e-08,
9252
+ "logits/chosen": -1.4421995878219604,
9253
+ "logits/rejected": -1.441506266593933,
9254
+ "logps/chosen": -104.14234161376953,
9255
+ "logps/rejected": -150.188720703125,
9256
+ "loss": 0.4279,
9257
+ "rewards/accuracies": 0.84375,
9258
+ "rewards/chosen": -0.8347434401512146,
9259
+ "rewards/margins": 1.1854957342147827,
9260
+ "rewards/rejected": -2.0202391147613525,
9261
+ "step": 1208
9262
+ },
9263
+ {
9264
+ "epoch": 1.3957176843774781,
9265
+ "grad_norm": 60.579372765659905,
9266
+ "learning_rate": 4.6378318362971024e-08,
9267
+ "logits/chosen": -1.5403013229370117,
9268
+ "logits/rejected": -1.49015474319458,
9269
+ "logps/chosen": -235.0990447998047,
9270
+ "logps/rejected": -266.61248779296875,
9271
+ "loss": 0.3995,
9272
+ "rewards/accuracies": 0.8125,
9273
+ "rewards/chosen": -1.1586740016937256,
9274
+ "rewards/margins": 1.0637941360473633,
9275
+ "rewards/rejected": -2.2224678993225098,
9276
+ "step": 1210
9277
+ },
9278
+ {
9279
+ "epoch": 1.3980246557566145,
9280
+ "grad_norm": 60.06295691045147,
9281
+ "learning_rate": 4.605374641769751e-08,
9282
+ "logits/chosen": -1.290532112121582,
9283
+ "logits/rejected": -1.332384467124939,
9284
+ "logps/chosen": -156.14129638671875,
9285
+ "logps/rejected": -215.77317810058594,
9286
+ "loss": 0.4003,
9287
+ "rewards/accuracies": 0.84375,
9288
+ "rewards/chosen": -1.1309702396392822,
9289
+ "rewards/margins": 1.253702163696289,
9290
+ "rewards/rejected": -2.3846724033355713,
9291
+ "step": 1212
9292
+ },
9293
+ {
9294
+ "epoch": 1.4003316271357509,
9295
+ "grad_norm": 51.192598664527935,
9296
+ "learning_rate": 4.572997408604035e-08,
9297
+ "logits/chosen": -1.3888226747512817,
9298
+ "logits/rejected": -1.3567888736724854,
9299
+ "logps/chosen": -143.24537658691406,
9300
+ "logps/rejected": -161.50547790527344,
9301
+ "loss": 0.398,
9302
+ "rewards/accuracies": 0.75,
9303
+ "rewards/chosen": -0.7951155304908752,
9304
+ "rewards/margins": 0.9197046756744385,
9305
+ "rewards/rejected": -1.714820146560669,
9306
+ "step": 1214
9307
+ },
9308
+ {
9309
+ "epoch": 1.402638598514887,
9310
+ "grad_norm": 53.52691613905668,
9311
+ "learning_rate": 4.540700616708657e-08,
9312
+ "logits/chosen": -1.395592212677002,
9313
+ "logits/rejected": -1.339430570602417,
9314
+ "logps/chosen": -173.7642822265625,
9315
+ "logps/rejected": -199.0121612548828,
9316
+ "loss": 0.3771,
9317
+ "rewards/accuracies": 0.71875,
9318
+ "rewards/chosen": -0.9041624665260315,
9319
+ "rewards/margins": 1.4243049621582031,
9320
+ "rewards/rejected": -2.32846736907959,
9321
+ "step": 1216
9322
+ },
9323
+ {
9324
+ "epoch": 1.4049455698940236,
9325
+ "grad_norm": 52.455537691348205,
9326
+ "learning_rate": 4.5084847447999786e-08,
9327
+ "logits/chosen": -1.2974121570587158,
9328
+ "logits/rejected": -1.327599048614502,
9329
+ "logps/chosen": -140.09539794921875,
9330
+ "logps/rejected": -200.61834716796875,
9331
+ "loss": 0.4076,
9332
+ "rewards/accuracies": 0.78125,
9333
+ "rewards/chosen": -0.8636670708656311,
9334
+ "rewards/margins": 1.1840355396270752,
9335
+ "rewards/rejected": -2.0477025508880615,
9336
+ "step": 1218
9337
+ },
9338
+ {
9339
+ "epoch": 1.4072525412731598,
9340
+ "grad_norm": 54.793630592320525,
9341
+ "learning_rate": 4.476350270394942e-08,
9342
+ "logits/chosen": -1.3982492685317993,
9343
+ "logits/rejected": -1.4513742923736572,
9344
+ "logps/chosen": -146.10171508789062,
9345
+ "logps/rejected": -177.4093780517578,
9346
+ "loss": 0.4384,
9347
+ "rewards/accuracies": 0.8125,
9348
+ "rewards/chosen": -0.7920992970466614,
9349
+ "rewards/margins": 1.0156512260437012,
9350
+ "rewards/rejected": -1.8077504634857178,
9351
+ "step": 1220
9352
+ },
9353
+ {
9354
+ "epoch": 1.4095595126522962,
9355
+ "grad_norm": 51.63343493220621,
9356
+ "learning_rate": 4.44429766980398e-08,
9357
+ "logits/chosen": -1.4327033758163452,
9358
+ "logits/rejected": -1.3436576128005981,
9359
+ "logps/chosen": -204.16064453125,
9360
+ "logps/rejected": -280.8416748046875,
9361
+ "loss": 0.3825,
9362
+ "rewards/accuracies": 0.84375,
9363
+ "rewards/chosen": -1.0972062349319458,
9364
+ "rewards/margins": 1.7975908517837524,
9365
+ "rewards/rejected": -2.8947970867156982,
9366
+ "step": 1222
9367
+ },
9368
+ {
9369
+ "epoch": 1.4118664840314326,
9370
+ "grad_norm": 41.36257251797993,
9371
+ "learning_rate": 4.412327418123951e-08,
9372
+ "logits/chosen": -1.5090527534484863,
9373
+ "logits/rejected": -1.410886287689209,
9374
+ "logps/chosen": -151.58053588867188,
9375
+ "logps/rejected": -172.71141052246094,
9376
+ "loss": 0.3836,
9377
+ "rewards/accuracies": 0.75,
9378
+ "rewards/chosen": -0.9768989086151123,
9379
+ "rewards/margins": 1.0158425569534302,
9380
+ "rewards/rejected": -1.992741584777832,
9381
+ "step": 1224
9382
+ },
9383
+ {
9384
+ "epoch": 1.4141734554105687,
9385
+ "grad_norm": 56.70346907426679,
9386
+ "learning_rate": 4.3804399892311105e-08,
9387
+ "logits/chosen": -1.4848518371582031,
9388
+ "logits/rejected": -1.4474397897720337,
9389
+ "logps/chosen": -146.97665405273438,
9390
+ "logps/rejected": -157.70277404785156,
9391
+ "loss": 0.4153,
9392
+ "rewards/accuracies": 0.8125,
9393
+ "rewards/chosen": -0.8606613874435425,
9394
+ "rewards/margins": 0.8153095841407776,
9395
+ "rewards/rejected": -1.6759709119796753,
9396
+ "step": 1226
9397
+ },
9398
+ {
9399
+ "epoch": 1.416480426789705,
9400
+ "grad_norm": 47.688929811885686,
9401
+ "learning_rate": 4.348635855774081e-08,
9402
+ "logits/chosen": -1.419485092163086,
9403
+ "logits/rejected": -1.461479902267456,
9404
+ "logps/chosen": -150.58755493164062,
9405
+ "logps/rejected": -242.1259765625,
9406
+ "loss": 0.4167,
9407
+ "rewards/accuracies": 0.9375,
9408
+ "rewards/chosen": -0.9045516848564148,
9409
+ "rewards/margins": 1.710062861442566,
9410
+ "rewards/rejected": -2.614614725112915,
9411
+ "step": 1228
9412
+ },
9413
+ {
9414
+ "epoch": 1.4187873981688415,
9415
+ "grad_norm": 45.84951550965181,
9416
+ "learning_rate": 4.316915489166846e-08,
9417
+ "logits/chosen": -1.3644304275512695,
9418
+ "logits/rejected": -1.4508379697799683,
9419
+ "logps/chosen": -203.9729766845703,
9420
+ "logps/rejected": -246.74143981933594,
9421
+ "loss": 0.3521,
9422
+ "rewards/accuracies": 0.90625,
9423
+ "rewards/chosen": -1.0053611993789673,
9424
+ "rewards/margins": 1.194942593574524,
9425
+ "rewards/rejected": -2.200303792953491,
9426
+ "step": 1230
9427
+ },
9428
+ {
9429
+ "epoch": 1.4210943695479779,
9430
+ "grad_norm": 46.447372971104215,
9431
+ "learning_rate": 4.2852793595817524e-08,
9432
+ "logits/chosen": -1.2737727165222168,
9433
+ "logits/rejected": -1.3535902500152588,
9434
+ "logps/chosen": -154.6007080078125,
9435
+ "logps/rejected": -199.8658447265625,
9436
+ "loss": 0.416,
9437
+ "rewards/accuracies": 0.78125,
9438
+ "rewards/chosen": -0.9416995048522949,
9439
+ "rewards/margins": 1.36875319480896,
9440
+ "rewards/rejected": -2.310452699661255,
9441
+ "step": 1232
9442
+ },
9443
+ {
9444
+ "epoch": 1.4234013409271142,
9445
+ "grad_norm": 48.96355848399301,
9446
+ "learning_rate": 4.2537279359425625e-08,
9447
+ "logits/chosen": -1.3873982429504395,
9448
+ "logits/rejected": -1.4506531953811646,
9449
+ "logps/chosen": -153.49920654296875,
9450
+ "logps/rejected": -220.96954345703125,
9451
+ "loss": 0.3912,
9452
+ "rewards/accuracies": 0.90625,
9453
+ "rewards/chosen": -0.8012152910232544,
9454
+ "rewards/margins": 1.5085324048995972,
9455
+ "rewards/rejected": -2.3097476959228516,
9456
+ "step": 1234
9457
+ },
9458
+ {
9459
+ "epoch": 1.4257083123062504,
9460
+ "grad_norm": 51.877984061457596,
9461
+ "learning_rate": 4.2222616859174884e-08,
9462
+ "logits/chosen": -1.2629234790802002,
9463
+ "logits/rejected": -1.1391026973724365,
9464
+ "logps/chosen": -237.13059997558594,
9465
+ "logps/rejected": -254.82974243164062,
9466
+ "loss": 0.394,
9467
+ "rewards/accuracies": 0.90625,
9468
+ "rewards/chosen": -1.0531830787658691,
9469
+ "rewards/margins": 1.6561241149902344,
9470
+ "rewards/rejected": -2.7093071937561035,
9471
+ "step": 1236
9472
+ },
9473
+ {
9474
+ "epoch": 1.4280152836853868,
9475
+ "grad_norm": 45.68765126004196,
9476
+ "learning_rate": 4.190881075912268e-08,
9477
+ "logits/chosen": -1.3556292057037354,
9478
+ "logits/rejected": -1.374952793121338,
9479
+ "logps/chosen": -207.07798767089844,
9480
+ "logps/rejected": -324.4890441894531,
9481
+ "loss": 0.3957,
9482
+ "rewards/accuracies": 0.84375,
9483
+ "rewards/chosen": -1.2260664701461792,
9484
+ "rewards/margins": 1.6907085180282593,
9485
+ "rewards/rejected": -2.9167752265930176,
9486
+ "step": 1238
9487
+ },
9488
+ {
9489
+ "epoch": 1.4303222550645232,
9490
+ "grad_norm": 58.52244034620422,
9491
+ "learning_rate": 4.1595865710632364e-08,
9492
+ "logits/chosen": -1.4333430528640747,
9493
+ "logits/rejected": -1.5848240852355957,
9494
+ "logps/chosen": -171.97352600097656,
9495
+ "logps/rejected": -274.1302795410156,
9496
+ "loss": 0.3436,
9497
+ "rewards/accuracies": 0.90625,
9498
+ "rewards/chosen": -0.5987138748168945,
9499
+ "rewards/margins": 1.624565839767456,
9500
+ "rewards/rejected": -2.2232794761657715,
9501
+ "step": 1240
9502
+ },
9503
+ {
9504
+ "epoch": 1.4326292264436593,
9505
+ "grad_norm": 41.43586539854694,
9506
+ "learning_rate": 4.1283786352304494e-08,
9507
+ "logits/chosen": -1.37808358669281,
9508
+ "logits/rejected": -1.4364588260650635,
9509
+ "logps/chosen": -209.56759643554688,
9510
+ "logps/rejected": -294.7293395996094,
9511
+ "loss": 0.381,
9512
+ "rewards/accuracies": 0.90625,
9513
+ "rewards/chosen": -0.9219837188720703,
9514
+ "rewards/margins": 1.9135119915008545,
9515
+ "rewards/rejected": -2.835495710372925,
9516
+ "step": 1242
9517
+ },
9518
+ {
9519
+ "epoch": 1.4349361978227957,
9520
+ "grad_norm": 50.55832261861475,
9521
+ "learning_rate": 4.0972577309908054e-08,
9522
+ "logits/chosen": -1.5222772359848022,
9523
+ "logits/rejected": -1.4453645944595337,
9524
+ "logps/chosen": -186.14736938476562,
9525
+ "logps/rejected": -196.9586944580078,
9526
+ "loss": 0.421,
9527
+ "rewards/accuracies": 0.875,
9528
+ "rewards/chosen": -0.628247857093811,
9529
+ "rewards/margins": 1.214928388595581,
9530
+ "rewards/rejected": -1.8431761264801025,
9531
+ "step": 1244
9532
+ },
9533
+ {
9534
+ "epoch": 1.437243169201932,
9535
+ "grad_norm": 44.420648415221365,
9536
+ "learning_rate": 4.066224319631181e-08,
9537
+ "logits/chosen": -1.3419413566589355,
9538
+ "logits/rejected": -1.3609391450881958,
9539
+ "logps/chosen": -127.96074676513672,
9540
+ "logps/rejected": -184.03919982910156,
9541
+ "loss": 0.3564,
9542
+ "rewards/accuracies": 0.84375,
9543
+ "rewards/chosen": -0.8036874532699585,
9544
+ "rewards/margins": 1.4429806470870972,
9545
+ "rewards/rejected": -2.2466681003570557,
9546
+ "step": 1246
9547
+ },
9548
+ {
9549
+ "epoch": 1.4395501405810684,
9550
+ "grad_norm": 61.95216492377816,
9551
+ "learning_rate": 4.035278861141588e-08,
9552
+ "logits/chosen": -1.3702654838562012,
9553
+ "logits/rejected": -1.376900315284729,
9554
+ "logps/chosen": -208.74798583984375,
9555
+ "logps/rejected": -250.13096618652344,
9556
+ "loss": 0.4303,
9557
+ "rewards/accuracies": 0.8125,
9558
+ "rewards/chosen": -1.0059157609939575,
9559
+ "rewards/margins": 1.4688332080841064,
9560
+ "rewards/rejected": -2.4747488498687744,
9561
+ "step": 1248
9562
+ },
9563
+ {
9564
+ "epoch": 1.4418571119602048,
9565
+ "grad_norm": 41.2065328676138,
9566
+ "learning_rate": 4.004421814208381e-08,
9567
+ "logits/chosen": -1.4238879680633545,
9568
+ "logits/rejected": -1.3295542001724243,
9569
+ "logps/chosen": -211.55715942382812,
9570
+ "logps/rejected": -216.31297302246094,
9571
+ "loss": 0.3928,
9572
+ "rewards/accuracies": 0.875,
9573
+ "rewards/chosen": -1.0619137287139893,
9574
+ "rewards/margins": 0.779721200466156,
9575
+ "rewards/rejected": -1.8416348695755005,
9576
+ "step": 1250
9577
+ },
9578
+ {
9579
+ "epoch": 1.444164083339341,
9580
+ "grad_norm": 49.78351955053435,
9581
+ "learning_rate": 3.973653636207437e-08,
9582
+ "logits/chosen": -1.3463867902755737,
9583
+ "logits/rejected": -1.382056474685669,
9584
+ "logps/chosen": -150.3297882080078,
9585
+ "logps/rejected": -177.75344848632812,
9586
+ "loss": 0.4125,
9587
+ "rewards/accuracies": 0.8125,
9588
+ "rewards/chosen": -0.8665183186531067,
9589
+ "rewards/margins": 0.8241917490959167,
9590
+ "rewards/rejected": -1.690710186958313,
9591
+ "step": 1252
9592
+ },
9593
+ {
9594
+ "epoch": 1.4464710547184774,
9595
+ "grad_norm": 55.635458128144826,
9596
+ "learning_rate": 3.942974783197369e-08,
9597
+ "logits/chosen": -1.4604573249816895,
9598
+ "logits/rejected": -1.479122281074524,
9599
+ "logps/chosen": -202.9034423828125,
9600
+ "logps/rejected": -222.47259521484375,
9601
+ "loss": 0.4657,
9602
+ "rewards/accuracies": 0.75,
9603
+ "rewards/chosen": -0.9005415439605713,
9604
+ "rewards/margins": 0.963725209236145,
9605
+ "rewards/rejected": -1.8642667531967163,
9606
+ "step": 1254
9607
+ },
9608
+ {
9609
+ "epoch": 1.4487780260976137,
9610
+ "grad_norm": 59.59313665883058,
9611
+ "learning_rate": 3.912385709912793e-08,
9612
+ "logits/chosen": -1.3942638635635376,
9613
+ "logits/rejected": -1.3953242301940918,
9614
+ "logps/chosen": -201.85675048828125,
9615
+ "logps/rejected": -182.12155151367188,
9616
+ "loss": 0.4202,
9617
+ "rewards/accuracies": 0.8125,
9618
+ "rewards/chosen": -0.9314505457878113,
9619
+ "rewards/margins": 0.9069394469261169,
9620
+ "rewards/rejected": -1.8383899927139282,
9621
+ "step": 1256
9622
+ },
9623
+ {
9624
+ "epoch": 1.4510849974767501,
9625
+ "grad_norm": 48.47930918097777,
9626
+ "learning_rate": 3.881886869757565e-08,
9627
+ "logits/chosen": -1.3910021781921387,
9628
+ "logits/rejected": -1.3850898742675781,
9629
+ "logps/chosen": -198.0294189453125,
9630
+ "logps/rejected": -240.89625549316406,
9631
+ "loss": 0.3894,
9632
+ "rewards/accuracies": 0.78125,
9633
+ "rewards/chosen": -1.1357581615447998,
9634
+ "rewards/margins": 1.160947561264038,
9635
+ "rewards/rejected": -2.296705722808838,
9636
+ "step": 1258
9637
+ },
9638
+ {
9639
+ "epoch": 1.4533919688558865,
9640
+ "grad_norm": 59.379663832920095,
9641
+ "learning_rate": 3.851478714798075e-08,
9642
+ "logits/chosen": -1.4543538093566895,
9643
+ "logits/rejected": -1.3412946462631226,
9644
+ "logps/chosen": -175.93966674804688,
9645
+ "logps/rejected": -162.14071655273438,
9646
+ "loss": 0.4025,
9647
+ "rewards/accuracies": 0.9375,
9648
+ "rewards/chosen": -0.5226415991783142,
9649
+ "rewards/margins": 1.147411584854126,
9650
+ "rewards/rejected": -1.6700531244277954,
9651
+ "step": 1260
9652
+ },
9653
+ {
9654
+ "epoch": 1.4556989402350227,
9655
+ "grad_norm": 48.337458683899825,
9656
+ "learning_rate": 3.821161695756528e-08,
9657
+ "logits/chosen": -1.4448070526123047,
9658
+ "logits/rejected": -1.4677135944366455,
9659
+ "logps/chosen": -207.90625,
9660
+ "logps/rejected": -218.8758544921875,
9661
+ "loss": 0.3906,
9662
+ "rewards/accuracies": 0.84375,
9663
+ "rewards/chosen": -1.042921543121338,
9664
+ "rewards/margins": 0.8819532990455627,
9665
+ "rewards/rejected": -1.9248747825622559,
9666
+ "step": 1262
9667
+ },
9668
+ {
9669
+ "epoch": 1.458005911614159,
9670
+ "grad_norm": 37.810521769650734,
9671
+ "learning_rate": 3.790936262004286e-08,
9672
+ "logits/chosen": -1.4521461725234985,
9673
+ "logits/rejected": -1.4518852233886719,
9674
+ "logps/chosen": -164.5406494140625,
9675
+ "logps/rejected": -222.47146606445312,
9676
+ "loss": 0.3938,
9677
+ "rewards/accuracies": 0.78125,
9678
+ "rewards/chosen": -0.953307569026947,
9679
+ "rewards/margins": 1.0731128454208374,
9680
+ "rewards/rejected": -2.0264203548431396,
9681
+ "step": 1264
9682
+ },
9683
+ {
9684
+ "epoch": 1.4603128829932954,
9685
+ "grad_norm": 47.582874328113576,
9686
+ "learning_rate": 3.760802861555192e-08,
9687
+ "logits/chosen": -1.341475009918213,
9688
+ "logits/rejected": -1.4166816473007202,
9689
+ "logps/chosen": -169.36488342285156,
9690
+ "logps/rejected": -240.3351593017578,
9691
+ "loss": 0.3934,
9692
+ "rewards/accuracies": 0.8125,
9693
+ "rewards/chosen": -0.9554527997970581,
9694
+ "rewards/margins": 1.2030149698257446,
9695
+ "rewards/rejected": -2.1584675312042236,
9696
+ "step": 1266
9697
+ },
9698
+ {
9699
+ "epoch": 1.4626198543724316,
9700
+ "grad_norm": 45.64058383800231,
9701
+ "learning_rate": 3.7307619410589374e-08,
9702
+ "logits/chosen": -1.3934252262115479,
9703
+ "logits/rejected": -1.4444223642349243,
9704
+ "logps/chosen": -172.47576904296875,
9705
+ "logps/rejected": -233.11859130859375,
9706
+ "loss": 0.3604,
9707
+ "rewards/accuracies": 0.90625,
9708
+ "rewards/chosen": -0.9768462777137756,
9709
+ "rewards/margins": 1.4340298175811768,
9710
+ "rewards/rejected": -2.4108762741088867,
9711
+ "step": 1268
9712
+ },
9713
+ {
9714
+ "epoch": 1.464926825751568,
9715
+ "grad_norm": 53.5262973877338,
9716
+ "learning_rate": 3.7008139457944244e-08,
9717
+ "logits/chosen": -1.3181649446487427,
9718
+ "logits/rejected": -1.3409029245376587,
9719
+ "logps/chosen": -165.74822998046875,
9720
+ "logps/rejected": -195.7538604736328,
9721
+ "loss": 0.3998,
9722
+ "rewards/accuracies": 0.8125,
9723
+ "rewards/chosen": -0.8591657876968384,
9724
+ "rewards/margins": 1.1731674671173096,
9725
+ "rewards/rejected": -2.0323331356048584,
9726
+ "step": 1270
9727
+ },
9728
+ {
9729
+ "epoch": 1.4672337971307043,
9730
+ "grad_norm": 46.63543094780117,
9731
+ "learning_rate": 3.670959319663195e-08,
9732
+ "logits/chosen": -1.5043174028396606,
9733
+ "logits/rejected": -1.4369632005691528,
9734
+ "logps/chosen": -272.2401428222656,
9735
+ "logps/rejected": -299.15362548828125,
9736
+ "loss": 0.4018,
9737
+ "rewards/accuracies": 0.84375,
9738
+ "rewards/chosen": -1.246598243713379,
9739
+ "rewards/margins": 1.317877173423767,
9740
+ "rewards/rejected": -2.5644755363464355,
9741
+ "step": 1272
9742
+ },
9743
+ {
9744
+ "epoch": 1.4695407685098407,
9745
+ "grad_norm": 55.055744444477284,
9746
+ "learning_rate": 3.6411985051828266e-08,
9747
+ "logits/chosen": -1.3811333179473877,
9748
+ "logits/rejected": -1.388543963432312,
9749
+ "logps/chosen": -170.03436279296875,
9750
+ "logps/rejected": -235.34446716308594,
9751
+ "loss": 0.402,
9752
+ "rewards/accuracies": 0.9375,
9753
+ "rewards/chosen": -1.035598874092102,
9754
+ "rewards/margins": 1.4591599702835083,
9755
+ "rewards/rejected": -2.4947586059570312,
9756
+ "step": 1274
9757
+ },
9758
+ {
9759
+ "epoch": 1.471847739888977,
9760
+ "grad_norm": 50.20941285474848,
9761
+ "learning_rate": 3.611531943480389e-08,
9762
+ "logits/chosen": -1.3351385593414307,
9763
+ "logits/rejected": -1.3272579908370972,
9764
+ "logps/chosen": -175.16346740722656,
9765
+ "logps/rejected": -189.85296630859375,
9766
+ "loss": 0.4145,
9767
+ "rewards/accuracies": 0.875,
9768
+ "rewards/chosen": -0.9872362613677979,
9769
+ "rewards/margins": 1.0476864576339722,
9770
+ "rewards/rejected": -2.0349225997924805,
9771
+ "step": 1276
9772
+ },
9773
+ {
9774
+ "epoch": 1.4741547112681133,
9775
+ "grad_norm": 59.89422080082632,
9776
+ "learning_rate": 3.5819600742858844e-08,
9777
+ "logits/chosen": -1.3087190389633179,
9778
+ "logits/rejected": -1.3633005619049072,
9779
+ "logps/chosen": -158.3797607421875,
9780
+ "logps/rejected": -188.01727294921875,
9781
+ "loss": 0.4171,
9782
+ "rewards/accuracies": 0.84375,
9783
+ "rewards/chosen": -1.1256399154663086,
9784
+ "rewards/margins": 1.0341967344284058,
9785
+ "rewards/rejected": -2.159836530685425,
9786
+ "step": 1278
9787
+ },
9788
+ {
9789
+ "epoch": 1.4764616826472496,
9790
+ "grad_norm": 49.11561908258762,
9791
+ "learning_rate": 3.5524833359257575e-08,
9792
+ "logits/chosen": -1.5165667533874512,
9793
+ "logits/rejected": -1.462736964225769,
9794
+ "logps/chosen": -247.8217315673828,
9795
+ "logps/rejected": -262.7837219238281,
9796
+ "loss": 0.4012,
9797
+ "rewards/accuracies": 0.90625,
9798
+ "rewards/chosen": -0.883770763874054,
9799
+ "rewards/margins": 1.4304596185684204,
9800
+ "rewards/rejected": -2.314230442047119,
9801
+ "step": 1280
9802
+ },
9803
+ {
9804
+ "epoch": 1.478768654026386,
9805
+ "grad_norm": 45.113121140151996,
9806
+ "learning_rate": 3.5231021653163804e-08,
9807
+ "logits/chosen": -1.4450606107711792,
9808
+ "logits/rejected": -1.3937573432922363,
9809
+ "logps/chosen": -174.6051025390625,
9810
+ "logps/rejected": -222.32730102539062,
9811
+ "loss": 0.4,
9812
+ "rewards/accuracies": 0.8125,
9813
+ "rewards/chosen": -0.893982470035553,
9814
+ "rewards/margins": 1.3176380395889282,
9815
+ "rewards/rejected": -2.211620569229126,
9816
+ "step": 1282
9817
+ },
9818
+ {
9819
+ "epoch": 1.4810756254055222,
9820
+ "grad_norm": 52.74309051653843,
9821
+ "learning_rate": 3.493816997957582e-08,
9822
+ "logits/chosen": -1.1566662788391113,
9823
+ "logits/rejected": -1.0729938745498657,
9824
+ "logps/chosen": -164.8375701904297,
9825
+ "logps/rejected": -182.715087890625,
9826
+ "loss": 0.4019,
9827
+ "rewards/accuracies": 0.8125,
9828
+ "rewards/chosen": -0.991936206817627,
9829
+ "rewards/margins": 1.1979286670684814,
9830
+ "rewards/rejected": -2.1898648738861084,
9831
+ "step": 1284
9832
+ },
9833
+ {
9834
+ "epoch": 1.4833825967846586,
9835
+ "grad_norm": 59.365698205986185,
9836
+ "learning_rate": 3.464628267926181e-08,
9837
+ "logits/chosen": -1.5087039470672607,
9838
+ "logits/rejected": -1.4981944561004639,
9839
+ "logps/chosen": -142.24920654296875,
9840
+ "logps/rejected": -175.18621826171875,
9841
+ "loss": 0.4186,
9842
+ "rewards/accuracies": 0.875,
9843
+ "rewards/chosen": -0.8559743165969849,
9844
+ "rewards/margins": 0.9344435930252075,
9845
+ "rewards/rejected": -1.7904179096221924,
9846
+ "step": 1286
9847
+ },
9848
+ {
9849
+ "epoch": 1.485689568163795,
9850
+ "grad_norm": 57.44209478222106,
9851
+ "learning_rate": 3.435536407869575e-08,
9852
+ "logits/chosen": -1.3180654048919678,
9853
+ "logits/rejected": -1.3062459230422974,
9854
+ "logps/chosen": -161.09739685058594,
9855
+ "logps/rejected": -201.8702392578125,
9856
+ "loss": 0.3926,
9857
+ "rewards/accuracies": 0.875,
9858
+ "rewards/chosen": -0.852310061454773,
9859
+ "rewards/margins": 1.2749509811401367,
9860
+ "rewards/rejected": -2.127261161804199,
9861
+ "step": 1288
9862
+ },
9863
+ {
9864
+ "epoch": 1.4879965395429313,
9865
+ "grad_norm": 43.26619291788839,
9866
+ "learning_rate": 3.406541848999312e-08,
9867
+ "logits/chosen": -1.1940698623657227,
9868
+ "logits/rejected": -1.2222319841384888,
9869
+ "logps/chosen": -194.698974609375,
9870
+ "logps/rejected": -273.7522277832031,
9871
+ "loss": 0.3836,
9872
+ "rewards/accuracies": 0.875,
9873
+ "rewards/chosen": -1.1189277172088623,
9874
+ "rewards/margins": 1.7288103103637695,
9875
+ "rewards/rejected": -2.847738265991211,
9876
+ "step": 1290
9877
+ },
9878
+ {
9879
+ "epoch": 1.4903035109220677,
9880
+ "grad_norm": 53.62926570037078,
9881
+ "learning_rate": 3.377645021084701e-08,
9882
+ "logits/chosen": -1.2409629821777344,
9883
+ "logits/rejected": -1.3555923700332642,
9884
+ "logps/chosen": -138.0780029296875,
9885
+ "logps/rejected": -208.65414428710938,
9886
+ "loss": 0.3607,
9887
+ "rewards/accuracies": 0.96875,
9888
+ "rewards/chosen": -0.7014888525009155,
9889
+ "rewards/margins": 1.3151085376739502,
9890
+ "rewards/rejected": -2.016597270965576,
9891
+ "step": 1292
9892
+ },
9893
+ {
9894
+ "epoch": 1.4926104823012039,
9895
+ "grad_norm": 51.970100056876014,
9896
+ "learning_rate": 3.348846352446435e-08,
9897
+ "logits/chosen": -1.4000651836395264,
9898
+ "logits/rejected": -1.4792894124984741,
9899
+ "logps/chosen": -132.2428436279297,
9900
+ "logps/rejected": -182.43931579589844,
9901
+ "loss": 0.4103,
9902
+ "rewards/accuracies": 0.8125,
9903
+ "rewards/chosen": -0.824167013168335,
9904
+ "rewards/margins": 1.0639564990997314,
9905
+ "rewards/rejected": -1.888123631477356,
9906
+ "step": 1294
9907
+ },
9908
+ {
9909
+ "epoch": 1.4949174536803402,
9910
+ "grad_norm": 51.98384854834489,
9911
+ "learning_rate": 3.32014626995026e-08,
9912
+ "logits/chosen": -1.247258186340332,
9913
+ "logits/rejected": -1.2670161724090576,
9914
+ "logps/chosen": -173.65634155273438,
9915
+ "logps/rejected": -184.025390625,
9916
+ "loss": 0.3922,
9917
+ "rewards/accuracies": 0.78125,
9918
+ "rewards/chosen": -0.9547139406204224,
9919
+ "rewards/margins": 0.8975260257720947,
9920
+ "rewards/rejected": -1.852239966392517,
9921
+ "step": 1296
9922
+ },
9923
+ {
9924
+ "epoch": 1.4972244250594766,
9925
+ "grad_norm": 54.4822774885669,
9926
+ "learning_rate": 3.291545199000636e-08,
9927
+ "logits/chosen": -1.3456276655197144,
9928
+ "logits/rejected": -1.2962102890014648,
9929
+ "logps/chosen": -189.99917602539062,
9930
+ "logps/rejected": -226.09519958496094,
9931
+ "loss": 0.4015,
9932
+ "rewards/accuracies": 0.90625,
9933
+ "rewards/chosen": -1.1853322982788086,
9934
+ "rewards/margins": 1.2129353284835815,
9935
+ "rewards/rejected": -2.3982675075531006,
9936
+ "step": 1298
9937
+ },
9938
+ {
9939
+ "epoch": 1.499531396438613,
9940
+ "grad_norm": 49.557670987283394,
9941
+ "learning_rate": 3.263043563534428e-08,
9942
+ "logits/chosen": -1.4072293043136597,
9943
+ "logits/rejected": -1.403496265411377,
9944
+ "logps/chosen": -191.06785583496094,
9945
+ "logps/rejected": -222.01727294921875,
9946
+ "loss": 0.4256,
9947
+ "rewards/accuracies": 0.84375,
9948
+ "rewards/chosen": -1.1364355087280273,
9949
+ "rewards/margins": 0.8410711288452148,
9950
+ "rewards/rejected": -1.9775067567825317,
9951
+ "step": 1300
9952
+ },
9953
+ {
9954
+ "epoch": 1.499531396438613,
9955
+ "eval_logits/chosen": -1.349912405014038,
9956
+ "eval_logits/rejected": -1.2728021144866943,
9957
+ "eval_logps/chosen": -198.44601440429688,
9958
+ "eval_logps/rejected": -170.63462829589844,
9959
+ "eval_loss": 0.5290127396583557,
9960
+ "eval_rewards/accuracies": 0.7200000286102295,
9961
+ "eval_rewards/chosen": -1.3263821601867676,
9962
+ "eval_rewards/margins": 0.6855400204658508,
9963
+ "eval_rewards/rejected": -2.0119218826293945,
9964
+ "eval_runtime": 23.425,
9965
+ "eval_samples_per_second": 4.269,
9966
+ "eval_steps_per_second": 1.067,
9967
+ "step": 1300
9968
+ },
9969
+ {
9970
+ "epoch": 1.5018383678177494,
9971
+ "grad_norm": 45.842584597120144,
9972
+ "learning_rate": 3.23464178601463e-08,
9973
+ "logits/chosen": -1.3447407484054565,
9974
+ "logits/rejected": -1.3684443235397339,
9975
+ "logps/chosen": -190.647216796875,
9976
+ "logps/rejected": -204.70425415039062,
9977
+ "loss": 0.3852,
9978
+ "rewards/accuracies": 0.8125,
9979
+ "rewards/chosen": -1.0248242616653442,
9980
+ "rewards/margins": 1.146057367324829,
9981
+ "rewards/rejected": -2.1708812713623047,
9982
+ "step": 1302
9983
+ },
9984
+ {
9985
+ "epoch": 1.5041453391968855,
9986
+ "grad_norm": 47.37239527648157,
9987
+ "learning_rate": 3.206340287424102e-08,
9988
+ "logits/chosen": -1.278390884399414,
9989
+ "logits/rejected": -1.2682162523269653,
9990
+ "logps/chosen": -196.60675048828125,
9991
+ "logps/rejected": -235.6786346435547,
9992
+ "loss": 0.4076,
9993
+ "rewards/accuracies": 0.90625,
9994
+ "rewards/chosen": -1.1913988590240479,
9995
+ "rewards/margins": 1.3531779050827026,
9996
+ "rewards/rejected": -2.544576644897461,
9997
+ "step": 1304
9998
+ },
9999
+ {
10000
+ "epoch": 1.506452310576022,
10001
+ "grad_norm": 54.28789803813994,
10002
+ "learning_rate": 3.178139487259329e-08,
10003
+ "logits/chosen": -1.4512503147125244,
10004
+ "logits/rejected": -1.4933993816375732,
10005
+ "logps/chosen": -179.09815979003906,
10006
+ "logps/rejected": -240.13143920898438,
10007
+ "loss": 0.3969,
10008
+ "rewards/accuracies": 0.84375,
10009
+ "rewards/chosen": -0.9774467945098877,
10010
+ "rewards/margins": 1.2948458194732666,
10011
+ "rewards/rejected": -2.2722926139831543,
10012
+ "step": 1306
10013
+ },
10014
+ {
10015
+ "epoch": 1.5087592819551583,
10016
+ "grad_norm": 46.589365963047385,
10017
+ "learning_rate": 3.1500398035241936e-08,
10018
+ "logits/chosen": -1.4314634799957275,
10019
+ "logits/rejected": -1.4590768814086914,
10020
+ "logps/chosen": -126.36434936523438,
10021
+ "logps/rejected": -152.21775817871094,
10022
+ "loss": 0.4119,
10023
+ "rewards/accuracies": 0.78125,
10024
+ "rewards/chosen": -0.7625494003295898,
10025
+ "rewards/margins": 0.9721799492835999,
10026
+ "rewards/rejected": -1.7347294092178345,
10027
+ "step": 1308
10028
+ },
10029
+ {
10030
+ "epoch": 1.5110662533342945,
10031
+ "grad_norm": 52.15126576008971,
10032
+ "learning_rate": 3.1220416527238005e-08,
10033
+ "logits/chosen": -1.4616860151290894,
10034
+ "logits/rejected": -1.435218334197998,
10035
+ "logps/chosen": -159.05282592773438,
10036
+ "logps/rejected": -175.42967224121094,
10037
+ "loss": 0.4312,
10038
+ "rewards/accuracies": 0.8125,
10039
+ "rewards/chosen": -0.903962254524231,
10040
+ "rewards/margins": 0.8932026624679565,
10041
+ "rewards/rejected": -1.797164797782898,
10042
+ "step": 1310
10043
+ },
10044
+ {
10045
+ "epoch": 1.513373224713431,
10046
+ "grad_norm": 58.832075530463044,
10047
+ "learning_rate": 3.094145449858284e-08,
10048
+ "logits/chosen": -1.442615032196045,
10049
+ "logits/rejected": -1.3926329612731934,
10050
+ "logps/chosen": -185.3905029296875,
10051
+ "logps/rejected": -191.25869750976562,
10052
+ "loss": 0.4393,
10053
+ "rewards/accuracies": 0.78125,
10054
+ "rewards/chosen": -1.0720655918121338,
10055
+ "rewards/margins": 0.6762962341308594,
10056
+ "rewards/rejected": -1.7483618259429932,
10057
+ "step": 1312
10058
+ },
10059
+ {
10060
+ "epoch": 1.5156801960925672,
10061
+ "grad_norm": 44.7076995014112,
10062
+ "learning_rate": 3.0663516084166706e-08,
10063
+ "logits/chosen": -1.2298978567123413,
10064
+ "logits/rejected": -1.3115289211273193,
10065
+ "logps/chosen": -187.91552734375,
10066
+ "logps/rejected": -252.78433227539062,
10067
+ "loss": 0.3453,
10068
+ "rewards/accuracies": 0.90625,
10069
+ "rewards/chosen": -1.2967660427093506,
10070
+ "rewards/margins": 1.4439966678619385,
10071
+ "rewards/rejected": -2.740762710571289,
10072
+ "step": 1314
10073
+ },
10074
+ {
10075
+ "epoch": 1.5179871674717036,
10076
+ "grad_norm": 44.67859571904197,
10077
+ "learning_rate": 3.038660540370735e-08,
10078
+ "logits/chosen": -1.3734447956085205,
10079
+ "logits/rejected": -1.3580108880996704,
10080
+ "logps/chosen": -132.02508544921875,
10081
+ "logps/rejected": -184.58181762695312,
10082
+ "loss": 0.393,
10083
+ "rewards/accuracies": 0.8125,
10084
+ "rewards/chosen": -0.8986393809318542,
10085
+ "rewards/margins": 1.3148921728134155,
10086
+ "rewards/rejected": -2.213531255722046,
10087
+ "step": 1316
10088
+ },
10089
+ {
10090
+ "epoch": 1.52029413885084,
10091
+ "grad_norm": 47.98876193889907,
10092
+ "learning_rate": 3.011072656168906e-08,
10093
+ "logits/chosen": -1.3537302017211914,
10094
+ "logits/rejected": -1.359252691268921,
10095
+ "logps/chosen": -139.8520965576172,
10096
+ "logps/rejected": -180.31007385253906,
10097
+ "loss": 0.4064,
10098
+ "rewards/accuracies": 0.84375,
10099
+ "rewards/chosen": -0.850949764251709,
10100
+ "rewards/margins": 1.204286813735962,
10101
+ "rewards/rejected": -2.055236577987671,
10102
+ "step": 1318
10103
+ },
10104
+ {
10105
+ "epoch": 1.5226011102299761,
10106
+ "grad_norm": 46.0224640343323,
10107
+ "learning_rate": 2.9835883647301826e-08,
10108
+ "logits/chosen": -1.3717573881149292,
10109
+ "logits/rejected": -1.3601585626602173,
10110
+ "logps/chosen": -219.96463012695312,
10111
+ "logps/rejected": -260.22247314453125,
10112
+ "loss": 0.3923,
10113
+ "rewards/accuracies": 0.8125,
10114
+ "rewards/chosen": -1.1594874858856201,
10115
+ "rewards/margins": 1.7778315544128418,
10116
+ "rewards/rejected": -2.937319278717041,
10117
+ "step": 1320
10118
+ },
10119
+ {
10120
+ "epoch": 1.5249080816091125,
10121
+ "grad_norm": 42.94236856581764,
10122
+ "learning_rate": 2.9562080734380678e-08,
10123
+ "logits/chosen": -1.3365228176116943,
10124
+ "logits/rejected": -1.2907369136810303,
10125
+ "logps/chosen": -206.2550048828125,
10126
+ "logps/rejected": -295.9128723144531,
10127
+ "loss": 0.3811,
10128
+ "rewards/accuracies": 0.9375,
10129
+ "rewards/chosen": -0.8169040679931641,
10130
+ "rewards/margins": 1.9202640056610107,
10131
+ "rewards/rejected": -2.737168312072754,
10132
+ "step": 1322
10133
+ },
10134
+ {
10135
+ "epoch": 1.5272150529882489,
10136
+ "grad_norm": 52.34290409904556,
10137
+ "learning_rate": 2.928932188134525e-08,
10138
+ "logits/chosen": -1.3455016613006592,
10139
+ "logits/rejected": -1.2972326278686523,
10140
+ "logps/chosen": -155.3500518798828,
10141
+ "logps/rejected": -162.52243041992188,
10142
+ "loss": 0.4153,
10143
+ "rewards/accuracies": 0.875,
10144
+ "rewards/chosen": -0.7506276965141296,
10145
+ "rewards/margins": 1.141640305519104,
10146
+ "rewards/rejected": -1.8922679424285889,
10147
+ "step": 1324
10148
+ },
10149
+ {
10150
+ "epoch": 1.529522024367385,
10151
+ "grad_norm": 49.03147359925327,
10152
+ "learning_rate": 2.9017611131139762e-08,
10153
+ "logits/chosen": -1.4706536531448364,
10154
+ "logits/rejected": -1.4308120012283325,
10155
+ "logps/chosen": -184.1258544921875,
10156
+ "logps/rejected": -195.16162109375,
10157
+ "loss": 0.3929,
10158
+ "rewards/accuracies": 0.84375,
10159
+ "rewards/chosen": -0.9032193422317505,
10160
+ "rewards/margins": 1.1196848154067993,
10161
+ "rewards/rejected": -2.02290415763855,
10162
+ "step": 1326
10163
+ },
10164
+ {
10165
+ "epoch": 1.5318289957465216,
10166
+ "grad_norm": 49.97091160023798,
10167
+ "learning_rate": 2.874695251117303e-08,
10168
+ "logits/chosen": -1.4324705600738525,
10169
+ "logits/rejected": -1.3871915340423584,
10170
+ "logps/chosen": -147.20001220703125,
10171
+ "logps/rejected": -225.0901336669922,
10172
+ "loss": 0.4162,
10173
+ "rewards/accuracies": 0.84375,
10174
+ "rewards/chosen": -0.7438746690750122,
10175
+ "rewards/margins": 1.2811241149902344,
10176
+ "rewards/rejected": -2.024998664855957,
10177
+ "step": 1328
10178
+ },
10179
+ {
10180
+ "epoch": 1.5341359671256578,
10181
+ "grad_norm": 43.41525443911614,
10182
+ "learning_rate": 2.8477350033258672e-08,
10183
+ "logits/chosen": -1.522569179534912,
10184
+ "logits/rejected": -1.4508986473083496,
10185
+ "logps/chosen": -162.15855407714844,
10186
+ "logps/rejected": -165.8858642578125,
10187
+ "loss": 0.3882,
10188
+ "rewards/accuracies": 0.8125,
10189
+ "rewards/chosen": -0.6052396893501282,
10190
+ "rewards/margins": 0.978552520275116,
10191
+ "rewards/rejected": -1.5837922096252441,
10192
+ "step": 1330
10193
+ },
10194
+ {
10195
+ "epoch": 1.5364429385047942,
10196
+ "grad_norm": 49.48217123812548,
10197
+ "learning_rate": 2.8208807693555814e-08,
10198
+ "logits/chosen": -1.376870036125183,
10199
+ "logits/rejected": -1.3188726902008057,
10200
+ "logps/chosen": -202.34939575195312,
10201
+ "logps/rejected": -221.72894287109375,
10202
+ "loss": 0.4297,
10203
+ "rewards/accuracies": 0.78125,
10204
+ "rewards/chosen": -1.0866365432739258,
10205
+ "rewards/margins": 1.028623104095459,
10206
+ "rewards/rejected": -2.115259885787964,
10207
+ "step": 1332
10208
+ },
10209
+ {
10210
+ "epoch": 1.5387499098839306,
10211
+ "grad_norm": 46.51618814969754,
10212
+ "learning_rate": 2.7941329472509767e-08,
10213
+ "logits/chosen": -1.4210199117660522,
10214
+ "logits/rejected": -1.4886133670806885,
10215
+ "logps/chosen": -191.4695281982422,
10216
+ "logps/rejected": -216.92849731445312,
10217
+ "loss": 0.3674,
10218
+ "rewards/accuracies": 0.875,
10219
+ "rewards/chosen": -1.083839774131775,
10220
+ "rewards/margins": 1.2328987121582031,
10221
+ "rewards/rejected": -2.3167383670806885,
10222
+ "step": 1334
10223
+ },
10224
+ {
10225
+ "epoch": 1.5410568812630667,
10226
+ "grad_norm": 50.002556121820184,
10227
+ "learning_rate": 2.7674919334793033e-08,
10228
+ "logits/chosen": -1.3935497999191284,
10229
+ "logits/rejected": -1.4638290405273438,
10230
+ "logps/chosen": -188.38775634765625,
10231
+ "logps/rejected": -218.97442626953125,
10232
+ "loss": 0.3643,
10233
+ "rewards/accuracies": 0.90625,
10234
+ "rewards/chosen": -0.9267150163650513,
10235
+ "rewards/margins": 1.4599485397338867,
10236
+ "rewards/rejected": -2.3866634368896484,
10237
+ "step": 1336
10238
+ },
10239
+ {
10240
+ "epoch": 1.543363852642203,
10241
+ "grad_norm": 43.96736215737179,
10242
+ "learning_rate": 2.7409581229246493e-08,
10243
+ "logits/chosen": -1.4587171077728271,
10244
+ "logits/rejected": -1.3520967960357666,
10245
+ "logps/chosen": -186.90631103515625,
10246
+ "logps/rejected": -198.11570739746094,
10247
+ "loss": 0.4022,
10248
+ "rewards/accuracies": 0.84375,
10249
+ "rewards/chosen": -0.6897175908088684,
10250
+ "rewards/margins": 1.123156189918518,
10251
+ "rewards/rejected": -1.8128737211227417,
10252
+ "step": 1338
10253
+ },
10254
+ {
10255
+ "epoch": 1.5456708240213395,
10256
+ "grad_norm": 48.95299994888663,
10257
+ "learning_rate": 2.7145319088820985e-08,
10258
+ "logits/chosen": -1.4856892824172974,
10259
+ "logits/rejected": -1.4046401977539062,
10260
+ "logps/chosen": -157.6865234375,
10261
+ "logps/rejected": -185.38278198242188,
10262
+ "loss": 0.3834,
10263
+ "rewards/accuracies": 0.8125,
10264
+ "rewards/chosen": -0.9328738451004028,
10265
+ "rewards/margins": 1.4649465084075928,
10266
+ "rewards/rejected": -2.397820234298706,
10267
+ "step": 1340
10268
+ },
10269
+ {
10270
+ "epoch": 1.5479777954004759,
10271
+ "grad_norm": 47.48346304531486,
10272
+ "learning_rate": 2.688213683051892e-08,
10273
+ "logits/chosen": -1.3646446466445923,
10274
+ "logits/rejected": -1.3468654155731201,
10275
+ "logps/chosen": -199.64413452148438,
10276
+ "logps/rejected": -227.54318237304688,
10277
+ "loss": 0.3599,
10278
+ "rewards/accuracies": 0.875,
10279
+ "rewards/chosen": -1.035663366317749,
10280
+ "rewards/margins": 1.2792800664901733,
10281
+ "rewards/rejected": -2.314943552017212,
10282
+ "step": 1342
10283
+ },
10284
+ {
10285
+ "epoch": 1.5502847667796122,
10286
+ "grad_norm": 47.27417390076413,
10287
+ "learning_rate": 2.6620038355336305e-08,
10288
+ "logits/chosen": -1.4909706115722656,
10289
+ "logits/rejected": -1.4687060117721558,
10290
+ "logps/chosen": -160.08041381835938,
10291
+ "logps/rejected": -195.14886474609375,
10292
+ "loss": 0.3712,
10293
+ "rewards/accuracies": 0.875,
10294
+ "rewards/chosen": -0.8120248317718506,
10295
+ "rewards/margins": 1.2791612148284912,
10296
+ "rewards/rejected": -2.091186285018921,
10297
+ "step": 1344
10298
+ },
10299
+ {
10300
+ "epoch": 1.5525917381587484,
10301
+ "grad_norm": 39.1002186836634,
10302
+ "learning_rate": 2.635902754820475e-08,
10303
+ "logits/chosen": -1.4342174530029297,
10304
+ "logits/rejected": -1.4962186813354492,
10305
+ "logps/chosen": -203.3734588623047,
10306
+ "logps/rejected": -263.7275695800781,
10307
+ "loss": 0.3782,
10308
+ "rewards/accuracies": 0.84375,
10309
+ "rewards/chosen": -0.8007220029830933,
10310
+ "rewards/margins": 1.4874347448349,
10311
+ "rewards/rejected": -2.288156747817993,
10312
+ "step": 1346
10313
+ },
10314
+ {
10315
+ "epoch": 1.5548987095378848,
10316
+ "grad_norm": 44.5283434969193,
10317
+ "learning_rate": 2.60991082779341e-08,
10318
+ "logits/chosen": -1.2880148887634277,
10319
+ "logits/rejected": -1.3925414085388184,
10320
+ "logps/chosen": -148.82077026367188,
10321
+ "logps/rejected": -200.53903198242188,
10322
+ "loss": 0.3493,
10323
+ "rewards/accuracies": 0.90625,
10324
+ "rewards/chosen": -0.9107972383499146,
10325
+ "rewards/margins": 1.1858881711959839,
10326
+ "rewards/rejected": -2.0966851711273193,
10327
+ "step": 1348
10328
+ },
10329
+ {
10330
+ "epoch": 1.5572056809170212,
10331
+ "grad_norm": 45.32965113835206,
10332
+ "learning_rate": 2.5840284397154965e-08,
10333
+ "logits/chosen": -1.4253007173538208,
10334
+ "logits/rejected": -1.3697370290756226,
10335
+ "logps/chosen": -140.25965881347656,
10336
+ "logps/rejected": -157.40565490722656,
10337
+ "loss": 0.3721,
10338
+ "rewards/accuracies": 0.96875,
10339
+ "rewards/chosen": -0.6943234205245972,
10340
+ "rewards/margins": 1.344458818435669,
10341
+ "rewards/rejected": -2.0387821197509766,
10342
+ "step": 1350
10343
+ },
10344
+ {
10345
+ "epoch": 1.5595126522961573,
10346
+ "grad_norm": 43.36149821146732,
10347
+ "learning_rate": 2.5582559742261645e-08,
10348
+ "logits/chosen": -1.2941721677780151,
10349
+ "logits/rejected": -1.2105193138122559,
10350
+ "logps/chosen": -151.37664794921875,
10351
+ "logps/rejected": -192.40484619140625,
10352
+ "loss": 0.4241,
10353
+ "rewards/accuracies": 0.8125,
10354
+ "rewards/chosen": -0.9218543171882629,
10355
+ "rewards/margins": 1.150537133216858,
10356
+ "rewards/rejected": -2.0723917484283447,
10357
+ "step": 1352
10358
+ },
10359
+ {
10360
+ "epoch": 1.561819623675294,
10361
+ "grad_norm": 48.73204722090299,
10362
+ "learning_rate": 2.532593813335524e-08,
10363
+ "logits/chosen": -1.4378621578216553,
10364
+ "logits/rejected": -1.4481985569000244,
10365
+ "logps/chosen": -165.80274963378906,
10366
+ "logps/rejected": -181.1370391845703,
10367
+ "loss": 0.4308,
10368
+ "rewards/accuracies": 0.71875,
10369
+ "rewards/chosen": -0.9549030661582947,
10370
+ "rewards/margins": 0.6894029378890991,
10371
+ "rewards/rejected": -1.6443060636520386,
10372
+ "step": 1354
10373
+ },
10374
+ {
10375
+ "epoch": 1.56412659505443,
10376
+ "grad_norm": 48.96214324123758,
10377
+ "learning_rate": 2.5070423374187066e-08,
10378
+ "logits/chosen": -1.3675079345703125,
10379
+ "logits/rejected": -1.3449468612670898,
10380
+ "logps/chosen": -179.66668701171875,
10381
+ "logps/rejected": -205.29490661621094,
10382
+ "loss": 0.4202,
10383
+ "rewards/accuracies": 0.9375,
10384
+ "rewards/chosen": -0.7179181575775146,
10385
+ "rewards/margins": 1.2601079940795898,
10386
+ "rewards/rejected": -1.9780261516571045,
10387
+ "step": 1356
10388
+ },
10389
+ {
10390
+ "epoch": 1.5664335664335665,
10391
+ "grad_norm": 55.81959533812462,
10392
+ "learning_rate": 2.4816019252102272e-08,
10393
+ "logits/chosen": -1.3808372020721436,
10394
+ "logits/rejected": -1.2713388204574585,
10395
+ "logps/chosen": -192.23965454101562,
10396
+ "logps/rejected": -183.83566284179688,
10397
+ "loss": 0.4231,
10398
+ "rewards/accuracies": 0.8125,
10399
+ "rewards/chosen": -1.2753582000732422,
10400
+ "rewards/margins": 0.6576382517814636,
10401
+ "rewards/rejected": -1.932996392250061,
10402
+ "step": 1358
10403
+ },
10404
+ {
10405
+ "epoch": 1.5687405378127028,
10406
+ "grad_norm": 56.59847700001908,
10407
+ "learning_rate": 2.4562729537983605e-08,
10408
+ "logits/chosen": -1.2573238611221313,
10409
+ "logits/rejected": -1.2922176122665405,
10410
+ "logps/chosen": -149.29942321777344,
10411
+ "logps/rejected": -166.0135955810547,
10412
+ "loss": 0.4638,
10413
+ "rewards/accuracies": 0.71875,
10414
+ "rewards/chosen": -0.8205444812774658,
10415
+ "rewards/margins": 0.7928743958473206,
10416
+ "rewards/rejected": -1.6134188175201416,
10417
+ "step": 1360
10418
+ },
10419
+ {
10420
+ "epoch": 1.571047509191839,
10421
+ "grad_norm": 37.26011990037049,
10422
+ "learning_rate": 2.4310557986195702e-08,
10423
+ "logits/chosen": -1.3010238409042358,
10424
+ "logits/rejected": -1.2920893430709839,
10425
+ "logps/chosen": -299.04022216796875,
10426
+ "logps/rejected": -379.41375732421875,
10427
+ "loss": 0.2954,
10428
+ "rewards/accuracies": 0.9375,
10429
+ "rewards/chosen": -1.4708349704742432,
10430
+ "rewards/margins": 2.2122201919555664,
10431
+ "rewards/rejected": -3.6830554008483887,
10432
+ "step": 1362
10433
+ },
10434
+ {
10435
+ "epoch": 1.5733544805709754,
10436
+ "grad_norm": 49.54139763435727,
10437
+ "learning_rate": 2.4059508334529277e-08,
10438
+ "logits/chosen": -1.3956283330917358,
10439
+ "logits/rejected": -1.457297921180725,
10440
+ "logps/chosen": -191.56198120117188,
10441
+ "logps/rejected": -222.62847900390625,
10442
+ "loss": 0.4168,
10443
+ "rewards/accuracies": 0.875,
10444
+ "rewards/chosen": -0.9353391528129578,
10445
+ "rewards/margins": 1.0330374240875244,
10446
+ "rewards/rejected": -1.968376636505127,
10447
+ "step": 1364
10448
+ },
10449
+ {
10450
+ "epoch": 1.5756614519501118,
10451
+ "grad_norm": 41.5799173152666,
10452
+ "learning_rate": 2.3809584304145824e-08,
10453
+ "logits/chosen": -1.3520365953445435,
10454
+ "logits/rejected": -1.3251811265945435,
10455
+ "logps/chosen": -155.65635681152344,
10456
+ "logps/rejected": -256.5829162597656,
10457
+ "loss": 0.3825,
10458
+ "rewards/accuracies": 0.78125,
10459
+ "rewards/chosen": -0.817021369934082,
10460
+ "rewards/margins": 1.9787249565124512,
10461
+ "rewards/rejected": -2.795746326446533,
10462
+ "step": 1366
10463
+ },
10464
+ {
10465
+ "epoch": 1.577968423329248,
10466
+ "grad_norm": 42.84011718919187,
10467
+ "learning_rate": 2.3560789599522324e-08,
10468
+ "logits/chosen": -1.544500708580017,
10469
+ "logits/rejected": -1.4618444442749023,
10470
+ "logps/chosen": -161.96481323242188,
10471
+ "logps/rejected": -154.49684143066406,
10472
+ "loss": 0.376,
10473
+ "rewards/accuracies": 0.8125,
10474
+ "rewards/chosen": -0.8055549263954163,
10475
+ "rewards/margins": 0.7844254374504089,
10476
+ "rewards/rejected": -1.5899803638458252,
10477
+ "step": 1368
10478
+ },
10479
+ {
10480
+ "epoch": 1.5802753947083845,
10481
+ "grad_norm": 41.774939807969034,
10482
+ "learning_rate": 2.3313127908396513e-08,
10483
+ "logits/chosen": -1.3718278408050537,
10484
+ "logits/rejected": -1.4343537092208862,
10485
+ "logps/chosen": -206.5829315185547,
10486
+ "logps/rejected": -266.7309265136719,
10487
+ "loss": 0.3721,
10488
+ "rewards/accuracies": 0.9375,
10489
+ "rewards/chosen": -0.9916315674781799,
10490
+ "rewards/margins": 1.702213168144226,
10491
+ "rewards/rejected": -2.69384503364563,
10492
+ "step": 1370
10493
+ },
10494
+ {
10495
+ "epoch": 1.5825823660875207,
10496
+ "grad_norm": 44.23281683503162,
10497
+ "learning_rate": 2.3066602901712107e-08,
10498
+ "logits/chosen": -1.4820692539215088,
10499
+ "logits/rejected": -1.411734700202942,
10500
+ "logps/chosen": -131.54576110839844,
10501
+ "logps/rejected": -182.11221313476562,
10502
+ "loss": 0.4274,
10503
+ "rewards/accuracies": 0.875,
10504
+ "rewards/chosen": -0.6956377029418945,
10505
+ "rewards/margins": 1.3419928550720215,
10506
+ "rewards/rejected": -2.037630558013916,
10507
+ "step": 1372
10508
+ },
10509
+ {
10510
+ "epoch": 1.584889337466657,
10511
+ "grad_norm": 45.40441226988886,
10512
+ "learning_rate": 2.282121823356443e-08,
10513
+ "logits/chosen": -1.382628321647644,
10514
+ "logits/rejected": -1.3758268356323242,
10515
+ "logps/chosen": -180.58364868164062,
10516
+ "logps/rejected": -184.94630432128906,
10517
+ "loss": 0.3771,
10518
+ "rewards/accuracies": 0.8125,
10519
+ "rewards/chosen": -0.8638473749160767,
10520
+ "rewards/margins": 1.0921387672424316,
10521
+ "rewards/rejected": -1.9559861421585083,
10522
+ "step": 1374
10523
+ },
10524
+ {
10525
+ "epoch": 1.5871963088457934,
10526
+ "grad_norm": 53.33501581540229,
10527
+ "learning_rate": 2.2576977541146192e-08,
10528
+ "logits/chosen": -1.4102963209152222,
10529
+ "logits/rejected": -1.3674509525299072,
10530
+ "logps/chosen": -155.62570190429688,
10531
+ "logps/rejected": -192.2836151123047,
10532
+ "loss": 0.4065,
10533
+ "rewards/accuracies": 0.84375,
10534
+ "rewards/chosen": -0.7117963433265686,
10535
+ "rewards/margins": 1.3161264657974243,
10536
+ "rewards/rejected": -2.0279228687286377,
10537
+ "step": 1376
10538
+ },
10539
+ {
10540
+ "epoch": 1.5895032802249296,
10541
+ "grad_norm": 51.75166271383647,
10542
+ "learning_rate": 2.233388444469365e-08,
10543
+ "logits/chosen": -1.4923657178878784,
10544
+ "logits/rejected": -1.4987457990646362,
10545
+ "logps/chosen": -155.1583709716797,
10546
+ "logps/rejected": -197.0376434326172,
10547
+ "loss": 0.3875,
10548
+ "rewards/accuracies": 0.875,
10549
+ "rewards/chosen": -0.562934398651123,
10550
+ "rewards/margins": 1.135040044784546,
10551
+ "rewards/rejected": -1.6979745626449585,
10552
+ "step": 1378
10553
+ },
10554
+ {
10555
+ "epoch": 1.5918102516040662,
10556
+ "grad_norm": 42.14115881844709,
10557
+ "learning_rate": 2.2091942547432952e-08,
10558
+ "logits/chosen": -1.3474974632263184,
10559
+ "logits/rejected": -1.4093090295791626,
10560
+ "logps/chosen": -168.27703857421875,
10561
+ "logps/rejected": -235.56069946289062,
10562
+ "loss": 0.3403,
10563
+ "rewards/accuracies": 0.875,
10564
+ "rewards/chosen": -0.7459644675254822,
10565
+ "rewards/margins": 1.911803960800171,
10566
+ "rewards/rejected": -2.6577682495117188,
10567
+ "step": 1380
10568
+ },
10569
+ {
10570
+ "epoch": 1.5941172229832024,
10571
+ "grad_norm": 45.28202449928773,
10572
+ "learning_rate": 2.185115543552668e-08,
10573
+ "logits/chosen": -1.3790557384490967,
10574
+ "logits/rejected": -1.3599779605865479,
10575
+ "logps/chosen": -207.94744873046875,
10576
+ "logps/rejected": -290.3175048828125,
10577
+ "loss": 0.349,
10578
+ "rewards/accuracies": 0.8125,
10579
+ "rewards/chosen": -1.0387717485427856,
10580
+ "rewards/margins": 1.9479621648788452,
10581
+ "rewards/rejected": -2.986733913421631,
10582
+ "step": 1382
10583
+ },
10584
+ {
10585
+ "epoch": 1.5964241943623387,
10586
+ "grad_norm": 46.671094507627366,
10587
+ "learning_rate": 2.161152667802065e-08,
10588
+ "logits/chosen": -1.4180598258972168,
10589
+ "logits/rejected": -1.4577977657318115,
10590
+ "logps/chosen": -204.82492065429688,
10591
+ "logps/rejected": -241.60826110839844,
10592
+ "loss": 0.3675,
10593
+ "rewards/accuracies": 0.875,
10594
+ "rewards/chosen": -1.1089719533920288,
10595
+ "rewards/margins": 1.2917826175689697,
10596
+ "rewards/rejected": -2.400754690170288,
10597
+ "step": 1384
10598
+ },
10599
+ {
10600
+ "epoch": 1.5987311657414751,
10601
+ "grad_norm": 56.85629358101348,
10602
+ "learning_rate": 2.137305982679114e-08,
10603
+ "logits/chosen": -1.3005365133285522,
10604
+ "logits/rejected": -1.319321632385254,
10605
+ "logps/chosen": -199.8241729736328,
10606
+ "logps/rejected": -261.0871887207031,
10607
+ "loss": 0.4032,
10608
+ "rewards/accuracies": 0.84375,
10609
+ "rewards/chosen": -1.0821079015731812,
10610
+ "rewards/margins": 1.5878570079803467,
10611
+ "rewards/rejected": -2.6699647903442383,
10612
+ "step": 1386
10613
+ },
10614
+ {
10615
+ "epoch": 1.6010381371206113,
10616
+ "grad_norm": 46.07174902767819,
10617
+ "learning_rate": 2.1135758416492165e-08,
10618
+ "logits/chosen": -1.5705550909042358,
10619
+ "logits/rejected": -1.4742302894592285,
10620
+ "logps/chosen": -198.67947387695312,
10621
+ "logps/rejected": -229.13302612304688,
10622
+ "loss": 0.3705,
10623
+ "rewards/accuracies": 0.9375,
10624
+ "rewards/chosen": -0.9288389682769775,
10625
+ "rewards/margins": 1.4559340476989746,
10626
+ "rewards/rejected": -2.384772777557373,
10627
+ "step": 1388
10628
+ },
10629
+ {
10630
+ "epoch": 1.6033451084997477,
10631
+ "grad_norm": 42.39566003219877,
10632
+ "learning_rate": 2.089962596450311e-08,
10633
+ "logits/chosen": -1.4445676803588867,
10634
+ "logits/rejected": -1.5513989925384521,
10635
+ "logps/chosen": -166.5742950439453,
10636
+ "logps/rejected": -210.93817138671875,
10637
+ "loss": 0.3723,
10638
+ "rewards/accuracies": 0.75,
10639
+ "rewards/chosen": -1.0038912296295166,
10640
+ "rewards/margins": 1.1079350709915161,
10641
+ "rewards/rejected": -2.1118264198303223,
10642
+ "step": 1390
10643
+ },
10644
+ {
10645
+ "epoch": 1.605652079878884,
10646
+ "grad_norm": 45.674613502568626,
10647
+ "learning_rate": 2.0664665970876492e-08,
10648
+ "logits/chosen": -1.4405865669250488,
10649
+ "logits/rejected": -1.3463035821914673,
10650
+ "logps/chosen": -192.42994689941406,
10651
+ "logps/rejected": -198.7202606201172,
10652
+ "loss": 0.3608,
10653
+ "rewards/accuracies": 0.875,
10654
+ "rewards/chosen": -0.9397283792495728,
10655
+ "rewards/margins": 1.1064425706863403,
10656
+ "rewards/rejected": -2.046170949935913,
10657
+ "step": 1392
10658
+ },
10659
+ {
10660
+ "epoch": 1.6079590512580202,
10661
+ "grad_norm": 51.58508472735133,
10662
+ "learning_rate": 2.043088191828627e-08,
10663
+ "logits/chosen": -1.4186500310897827,
10664
+ "logits/rejected": -1.4255372285842896,
10665
+ "logps/chosen": -118.36923217773438,
10666
+ "logps/rejected": -183.34219360351562,
10667
+ "loss": 0.4034,
10668
+ "rewards/accuracies": 0.875,
10669
+ "rewards/chosen": -0.6923995018005371,
10670
+ "rewards/margins": 1.1543622016906738,
10671
+ "rewards/rejected": -1.84676194190979,
10672
+ "step": 1394
10673
+ },
10674
+ {
10675
+ "epoch": 1.6102660226371568,
10676
+ "grad_norm": 44.70959574418251,
10677
+ "learning_rate": 2.019827727197605e-08,
10678
+ "logits/chosen": -1.4978314638137817,
10679
+ "logits/rejected": -1.4510836601257324,
10680
+ "logps/chosen": -230.2447967529297,
10681
+ "logps/rejected": -276.96380615234375,
10682
+ "loss": 0.3901,
10683
+ "rewards/accuracies": 0.875,
10684
+ "rewards/chosen": -0.9087953567504883,
10685
+ "rewards/margins": 1.4139595031738281,
10686
+ "rewards/rejected": -2.3227550983428955,
10687
+ "step": 1396
10688
+ },
10689
+ {
10690
+ "epoch": 1.612572994016293,
10691
+ "grad_norm": 44.12838016221303,
10692
+ "learning_rate": 1.9966855479707868e-08,
10693
+ "logits/chosen": -1.327124834060669,
10694
+ "logits/rejected": -1.3261754512786865,
10695
+ "logps/chosen": -154.4375,
10696
+ "logps/rejected": -215.845458984375,
10697
+ "loss": 0.4076,
10698
+ "rewards/accuracies": 0.8125,
10699
+ "rewards/chosen": -0.8609018921852112,
10700
+ "rewards/margins": 1.4171010255813599,
10701
+ "rewards/rejected": -2.278002977371216,
10702
+ "step": 1398
10703
+ },
10704
+ {
10705
+ "epoch": 1.6148799653954293,
10706
+ "grad_norm": 44.23536998519936,
10707
+ "learning_rate": 1.9736619971710888e-08,
10708
+ "logits/chosen": -1.3296552896499634,
10709
+ "logits/rejected": -1.3575528860092163,
10710
+ "logps/chosen": -153.06039428710938,
10711
+ "logps/rejected": -195.69317626953125,
10712
+ "loss": 0.3428,
10713
+ "rewards/accuracies": 0.9375,
10714
+ "rewards/chosen": -0.6315370202064514,
10715
+ "rewards/margins": 1.1925398111343384,
10716
+ "rewards/rejected": -1.8240768909454346,
10717
+ "step": 1400
10718
+ },
10719
+ {
10720
+ "epoch": 1.6148799653954293,
10721
+ "eval_logits/chosen": -1.3480746746063232,
10722
+ "eval_logits/rejected": -1.2724710702896118,
10723
+ "eval_logps/chosen": -198.91085815429688,
10724
+ "eval_logps/rejected": -171.2626495361328,
10725
+ "eval_loss": 0.5261049866676331,
10726
+ "eval_rewards/accuracies": 0.6800000071525574,
10727
+ "eval_rewards/chosen": -1.3728693723678589,
10728
+ "eval_rewards/margins": 0.7018558382987976,
10729
+ "eval_rewards/rejected": -2.0747251510620117,
10730
+ "eval_runtime": 22.7346,
10731
+ "eval_samples_per_second": 4.399,
10732
+ "eval_steps_per_second": 1.1,
10733
+ "step": 1400
10734
+ },
10735
+ {
10736
+ "epoch": 1.6171869367745657,
10737
+ "grad_norm": 52.08080969982687,
10738
+ "learning_rate": 1.9507574160630767e-08,
10739
+ "logits/chosen": -1.2814185619354248,
10740
+ "logits/rejected": -1.2460808753967285,
10741
+ "logps/chosen": -144.47117614746094,
10742
+ "logps/rejected": -188.13999938964844,
10743
+ "loss": 0.384,
10744
+ "rewards/accuracies": 0.875,
10745
+ "rewards/chosen": -0.7697983980178833,
10746
+ "rewards/margins": 1.3541502952575684,
10747
+ "rewards/rejected": -2.123948574066162,
10748
+ "step": 1402
10749
+ },
10750
+ {
10751
+ "epoch": 1.6194939081537019,
10752
+ "grad_norm": 45.36665871108164,
10753
+ "learning_rate": 1.9279721441479046e-08,
10754
+ "logits/chosen": -1.4106312990188599,
10755
+ "logits/rejected": -1.458733320236206,
10756
+ "logps/chosen": -192.62257385253906,
10757
+ "logps/rejected": -218.3051300048828,
10758
+ "loss": 0.4132,
10759
+ "rewards/accuracies": 0.84375,
10760
+ "rewards/chosen": -1.014463186264038,
10761
+ "rewards/margins": 1.271317958831787,
10762
+ "rewards/rejected": -2.2857813835144043,
10763
+ "step": 1404
10764
+ },
10765
+ {
10766
+ "epoch": 1.6218008795328382,
10767
+ "grad_norm": 58.50577692485291,
10768
+ "learning_rate": 1.9053065191582606e-08,
10769
+ "logits/chosen": -1.5028626918792725,
10770
+ "logits/rejected": -1.3959300518035889,
10771
+ "logps/chosen": -174.7528076171875,
10772
+ "logps/rejected": -221.61056518554688,
10773
+ "loss": 0.3783,
10774
+ "rewards/accuracies": 0.875,
10775
+ "rewards/chosen": -0.924181342124939,
10776
+ "rewards/margins": 1.244417428970337,
10777
+ "rewards/rejected": -2.1685988903045654,
10778
+ "step": 1406
10779
+ },
10780
+ {
10781
+ "epoch": 1.6241078509119746,
10782
+ "grad_norm": 55.69642576738633,
10783
+ "learning_rate": 1.8827608770533877e-08,
10784
+ "logits/chosen": -1.301309585571289,
10785
+ "logits/rejected": -1.219205617904663,
10786
+ "logps/chosen": -178.25352478027344,
10787
+ "logps/rejected": -169.32664489746094,
10788
+ "loss": 0.4345,
10789
+ "rewards/accuracies": 0.71875,
10790
+ "rewards/chosen": -0.880225419998169,
10791
+ "rewards/margins": 0.7701900005340576,
10792
+ "rewards/rejected": -1.6504155397415161,
10793
+ "step": 1408
10794
+ },
10795
+ {
10796
+ "epoch": 1.6264148222911108,
10797
+ "grad_norm": 50.05529030380214,
10798
+ "learning_rate": 1.8603355520140895e-08,
10799
+ "logits/chosen": -1.368369221687317,
10800
+ "logits/rejected": -1.25881826877594,
10801
+ "logps/chosen": -166.48806762695312,
10802
+ "logps/rejected": -160.21493530273438,
10803
+ "loss": 0.4011,
10804
+ "rewards/accuracies": 0.90625,
10805
+ "rewards/chosen": -1.0832921266555786,
10806
+ "rewards/margins": 1.3331881761550903,
10807
+ "rewards/rejected": -2.416480302810669,
10808
+ "step": 1410
10809
+ },
10810
+ {
10811
+ "epoch": 1.6287217936702474,
10812
+ "grad_norm": 35.31423347756389,
10813
+ "learning_rate": 1.838030876437784e-08,
10814
+ "logits/chosen": -1.4292563199996948,
10815
+ "logits/rejected": -1.424263596534729,
10816
+ "logps/chosen": -182.7539825439453,
10817
+ "logps/rejected": -216.91290283203125,
10818
+ "loss": 0.3719,
10819
+ "rewards/accuracies": 0.84375,
10820
+ "rewards/chosen": -0.6027979254722595,
10821
+ "rewards/margins": 1.2666302919387817,
10822
+ "rewards/rejected": -1.8694281578063965,
10823
+ "step": 1412
10824
+ },
10825
+ {
10826
+ "epoch": 1.6310287650493835,
10827
+ "grad_norm": 59.70248933056549,
10828
+ "learning_rate": 1.815847180933565e-08,
10829
+ "logits/chosen": -1.3337175846099854,
10830
+ "logits/rejected": -1.327998161315918,
10831
+ "logps/chosen": -158.14678955078125,
10832
+ "logps/rejected": -170.66796875,
10833
+ "loss": 0.4364,
10834
+ "rewards/accuracies": 0.78125,
10835
+ "rewards/chosen": -0.9497777819633484,
10836
+ "rewards/margins": 1.0439199209213257,
10837
+ "rewards/rejected": -1.9936977624893188,
10838
+ "step": 1414
10839
+ },
10840
+ {
10841
+ "epoch": 1.63333573642852,
10842
+ "grad_norm": 57.90179651786705,
10843
+ "learning_rate": 1.793784794317319e-08,
10844
+ "logits/chosen": -1.3915679454803467,
10845
+ "logits/rejected": -1.3838558197021484,
10846
+ "logps/chosen": -183.7068634033203,
10847
+ "logps/rejected": -220.16253662109375,
10848
+ "loss": 0.4023,
10849
+ "rewards/accuracies": 0.84375,
10850
+ "rewards/chosen": -0.9595727920532227,
10851
+ "rewards/margins": 1.3129405975341797,
10852
+ "rewards/rejected": -2.2725133895874023,
10853
+ "step": 1416
10854
+ },
10855
+ {
10856
+ "epoch": 1.6356427078076563,
10857
+ "grad_norm": 51.167487196457614,
10858
+ "learning_rate": 1.7718440436068382e-08,
10859
+ "logits/chosen": -1.3140525817871094,
10860
+ "logits/rejected": -1.340272068977356,
10861
+ "logps/chosen": -176.0500030517578,
10862
+ "logps/rejected": -204.65216064453125,
10863
+ "loss": 0.474,
10864
+ "rewards/accuracies": 0.8125,
10865
+ "rewards/chosen": -0.8645345568656921,
10866
+ "rewards/margins": 1.0821396112442017,
10867
+ "rewards/rejected": -1.9466743469238281,
10868
+ "step": 1418
10869
+ },
10870
+ {
10871
+ "epoch": 1.6379496791867925,
10872
+ "grad_norm": 56.94252600307747,
10873
+ "learning_rate": 1.750025254016978e-08,
10874
+ "logits/chosen": -1.2692408561706543,
10875
+ "logits/rejected": -1.3052905797958374,
10876
+ "logps/chosen": -175.3609161376953,
10877
+ "logps/rejected": -211.70640563964844,
10878
+ "loss": 0.3708,
10879
+ "rewards/accuracies": 0.84375,
10880
+ "rewards/chosen": -0.8849305510520935,
10881
+ "rewards/margins": 1.1714421510696411,
10882
+ "rewards/rejected": -2.05637264251709,
10883
+ "step": 1420
10884
+ },
10885
+ {
10886
+ "epoch": 1.640256650565929,
10887
+ "grad_norm": 44.444243917868796,
10888
+ "learning_rate": 1.7283287489548316e-08,
10889
+ "logits/chosen": -1.4492988586425781,
10890
+ "logits/rejected": -1.4249682426452637,
10891
+ "logps/chosen": -196.2264404296875,
10892
+ "logps/rejected": -216.29348754882812,
10893
+ "loss": 0.3663,
10894
+ "rewards/accuracies": 0.9375,
10895
+ "rewards/chosen": -0.5758548974990845,
10896
+ "rewards/margins": 1.248561143875122,
10897
+ "rewards/rejected": -1.824416160583496,
10898
+ "step": 1422
10899
+ },
10900
+ {
10901
+ "epoch": 1.6425636219450652,
10902
+ "grad_norm": 52.679148376432316,
10903
+ "learning_rate": 1.7067548500149453e-08,
10904
+ "logits/chosen": -1.359799861907959,
10905
+ "logits/rejected": -1.344727635383606,
10906
+ "logps/chosen": -164.31396484375,
10907
+ "logps/rejected": -226.0536651611328,
10908
+ "loss": 0.378,
10909
+ "rewards/accuracies": 0.9375,
10910
+ "rewards/chosen": -0.9102058410644531,
10911
+ "rewards/margins": 1.4322762489318848,
10912
+ "rewards/rejected": -2.342482328414917,
10913
+ "step": 1424
10914
+ },
10915
+ {
10916
+ "epoch": 1.6448705933242016,
10917
+ "grad_norm": 49.61927724673604,
10918
+ "learning_rate": 1.6853038769745463e-08,
10919
+ "logits/chosen": -1.2480251789093018,
10920
+ "logits/rejected": -1.3352330923080444,
10921
+ "logps/chosen": -154.8423614501953,
10922
+ "logps/rejected": -193.94419860839844,
10923
+ "loss": 0.4382,
10924
+ "rewards/accuracies": 0.84375,
10925
+ "rewards/chosen": -0.9381543397903442,
10926
+ "rewards/margins": 1.1628456115722656,
10927
+ "rewards/rejected": -2.1010000705718994,
10928
+ "step": 1426
10929
+ },
10930
+ {
10931
+ "epoch": 1.647177564703338,
10932
+ "grad_norm": 49.640113889302896,
10933
+ "learning_rate": 1.663976147788806e-08,
10934
+ "logits/chosen": -1.3284053802490234,
10935
+ "logits/rejected": -1.3385878801345825,
10936
+ "logps/chosen": -165.2407989501953,
10937
+ "logps/rejected": -197.4837188720703,
10938
+ "loss": 0.3626,
10939
+ "rewards/accuracies": 0.84375,
10940
+ "rewards/chosen": -0.84311842918396,
10941
+ "rewards/margins": 1.2521381378173828,
10942
+ "rewards/rejected": -2.0952565670013428,
10943
+ "step": 1428
10944
+ },
10945
+ {
10946
+ "epoch": 1.6494845360824741,
10947
+ "grad_norm": 43.114553829090994,
10948
+ "learning_rate": 1.642771978586116e-08,
10949
+ "logits/chosen": -1.2242742776870728,
10950
+ "logits/rejected": -1.2630822658538818,
10951
+ "logps/chosen": -154.83834838867188,
10952
+ "logps/rejected": -206.02879333496094,
10953
+ "loss": 0.427,
10954
+ "rewards/accuracies": 0.84375,
10955
+ "rewards/chosen": -0.9801341891288757,
10956
+ "rewards/margins": 1.2844398021697998,
10957
+ "rewards/rejected": -2.264573812484741,
10958
+ "step": 1430
10959
+ },
10960
+ {
10961
+ "epoch": 1.6517915074616105,
10962
+ "grad_norm": 51.33726232803117,
10963
+ "learning_rate": 1.6216916836634177e-08,
10964
+ "logits/chosen": -1.284468650817871,
10965
+ "logits/rejected": -1.2946577072143555,
10966
+ "logps/chosen": -209.00216674804688,
10967
+ "logps/rejected": -308.66400146484375,
10968
+ "loss": 0.36,
10969
+ "rewards/accuracies": 0.96875,
10970
+ "rewards/chosen": -1.1411385536193848,
10971
+ "rewards/margins": 1.973024606704712,
10972
+ "rewards/rejected": -3.114163637161255,
10973
+ "step": 1432
10974
+ },
10975
+ {
10976
+ "epoch": 1.654098478840747,
10977
+ "grad_norm": 48.222540985367566,
10978
+ "learning_rate": 1.6007355754815378e-08,
10979
+ "logits/chosen": -1.3908207416534424,
10980
+ "logits/rejected": -1.3934330940246582,
10981
+ "logps/chosen": -152.45028686523438,
10982
+ "logps/rejected": -180.3572235107422,
10983
+ "loss": 0.3947,
10984
+ "rewards/accuracies": 0.78125,
10985
+ "rewards/chosen": -0.8398849368095398,
10986
+ "rewards/margins": 0.8340297341346741,
10987
+ "rewards/rejected": -1.6739145517349243,
10988
+ "step": 1434
10989
+ },
10990
+ {
10991
+ "epoch": 1.656405450219883,
10992
+ "grad_norm": 46.78220848929133,
10993
+ "learning_rate": 1.5799039646605484e-08,
10994
+ "logits/chosen": -1.4227409362792969,
10995
+ "logits/rejected": -1.341314673423767,
10996
+ "logps/chosen": -119.38903045654297,
10997
+ "logps/rejected": -148.2276153564453,
10998
+ "loss": 0.4058,
10999
+ "rewards/accuracies": 0.84375,
11000
+ "rewards/chosen": -0.9205418825149536,
11001
+ "rewards/margins": 1.0745038986206055,
11002
+ "rewards/rejected": -1.995045781135559,
11003
+ "step": 1436
11004
+ },
11005
+ {
11006
+ "epoch": 1.6587124215990197,
11007
+ "grad_norm": 42.27815969127513,
11008
+ "learning_rate": 1.5591971599751795e-08,
11009
+ "logits/chosen": -1.243879795074463,
11010
+ "logits/rejected": -1.291917324066162,
11011
+ "logps/chosen": -138.65306091308594,
11012
+ "logps/rejected": -196.24070739746094,
11013
+ "loss": 0.4255,
11014
+ "rewards/accuracies": 0.875,
11015
+ "rewards/chosen": -0.8021218776702881,
11016
+ "rewards/margins": 1.5537731647491455,
11017
+ "rewards/rejected": -2.3558952808380127,
11018
+ "step": 1438
11019
+ },
11020
+ {
11021
+ "epoch": 1.6610193929781558,
11022
+ "grad_norm": 48.207824810254635,
11023
+ "learning_rate": 1.5386154683502274e-08,
11024
+ "logits/chosen": -1.4134782552719116,
11025
+ "logits/rejected": -1.3608386516571045,
11026
+ "logps/chosen": -188.5570068359375,
11027
+ "logps/rejected": -204.60919189453125,
11028
+ "loss": 0.4035,
11029
+ "rewards/accuracies": 0.8125,
11030
+ "rewards/chosen": -1.1449999809265137,
11031
+ "rewards/margins": 1.1091419458389282,
11032
+ "rewards/rejected": -2.2541420459747314,
11033
+ "step": 1440
11034
+ },
11035
+ {
11036
+ "epoch": 1.6633263643572922,
11037
+ "grad_norm": 57.39443136319576,
11038
+ "learning_rate": 1.5181591948560158e-08,
11039
+ "logits/chosen": -1.3800638914108276,
11040
+ "logits/rejected": -1.3380552530288696,
11041
+ "logps/chosen": -193.9412841796875,
11042
+ "logps/rejected": -215.79400634765625,
11043
+ "loss": 0.3816,
11044
+ "rewards/accuracies": 0.875,
11045
+ "rewards/chosen": -0.9195079803466797,
11046
+ "rewards/margins": 1.2903274297714233,
11047
+ "rewards/rejected": -2.2098352909088135,
11048
+ "step": 1442
11049
+ },
11050
+ {
11051
+ "epoch": 1.6656333357364286,
11052
+ "grad_norm": 43.25151944767239,
11053
+ "learning_rate": 1.49782864270386e-08,
11054
+ "logits/chosen": -1.4888612031936646,
11055
+ "logits/rejected": -1.4488850831985474,
11056
+ "logps/chosen": -151.032470703125,
11057
+ "logps/rejected": -182.21556091308594,
11058
+ "loss": 0.3453,
11059
+ "rewards/accuracies": 0.8125,
11060
+ "rewards/chosen": -1.026658535003662,
11061
+ "rewards/margins": 1.0582255125045776,
11062
+ "rewards/rejected": -2.0848841667175293,
11063
+ "step": 1444
11064
+ },
11065
+ {
11066
+ "epoch": 1.6679403071155647,
11067
+ "grad_norm": 38.80329184083557,
11068
+ "learning_rate": 1.4776241132415911e-08,
11069
+ "logits/chosen": -1.3972203731536865,
11070
+ "logits/rejected": -1.3395717144012451,
11071
+ "logps/chosen": -219.14697265625,
11072
+ "logps/rejected": -254.46820068359375,
11073
+ "loss": 0.3645,
11074
+ "rewards/accuracies": 0.9375,
11075
+ "rewards/chosen": -1.0151716470718384,
11076
+ "rewards/margins": 1.6079694032669067,
11077
+ "rewards/rejected": -2.623141050338745,
11078
+ "step": 1446
11079
+ },
11080
+ {
11081
+ "epoch": 1.6702472784947013,
11082
+ "grad_norm": 54.87680843012438,
11083
+ "learning_rate": 1.4575459059490769e-08,
11084
+ "logits/chosen": -1.3980488777160645,
11085
+ "logits/rejected": -1.5601296424865723,
11086
+ "logps/chosen": -173.1259765625,
11087
+ "logps/rejected": -261.9532470703125,
11088
+ "loss": 0.4345,
11089
+ "rewards/accuracies": 0.90625,
11090
+ "rewards/chosen": -0.8360046744346619,
11091
+ "rewards/margins": 1.6964924335479736,
11092
+ "rewards/rejected": -2.5324971675872803,
11093
+ "step": 1448
11094
+ },
11095
+ {
11096
+ "epoch": 1.6725542498738375,
11097
+ "grad_norm": 56.403772234535865,
11098
+ "learning_rate": 1.4375943184337869e-08,
11099
+ "logits/chosen": -1.2514413595199585,
11100
+ "logits/rejected": -1.2857670783996582,
11101
+ "logps/chosen": -148.14613342285156,
11102
+ "logps/rejected": -192.71554565429688,
11103
+ "loss": 0.3637,
11104
+ "rewards/accuracies": 0.84375,
11105
+ "rewards/chosen": -0.924572229385376,
11106
+ "rewards/margins": 1.3504137992858887,
11107
+ "rewards/rejected": -2.2749857902526855,
11108
+ "step": 1450
11109
+ },
11110
+ {
11111
+ "epoch": 1.6748612212529739,
11112
+ "grad_norm": 41.663170151108154,
11113
+ "learning_rate": 1.4177696464263722e-08,
11114
+ "logits/chosen": -1.554652452468872,
11115
+ "logits/rejected": -1.556633710861206,
11116
+ "logps/chosen": -173.3590545654297,
11117
+ "logps/rejected": -214.38238525390625,
11118
+ "loss": 0.437,
11119
+ "rewards/accuracies": 0.6875,
11120
+ "rewards/chosen": -0.890671968460083,
11121
+ "rewards/margins": 1.1289126873016357,
11122
+ "rewards/rejected": -2.0195844173431396,
11123
+ "step": 1452
11124
+ },
11125
+ {
11126
+ "epoch": 1.6771681926321103,
11127
+ "grad_norm": 55.4468403681626,
11128
+ "learning_rate": 1.3980721837763032e-08,
11129
+ "logits/chosen": -1.522512674331665,
11130
+ "logits/rejected": -1.4707545042037964,
11131
+ "logps/chosen": -187.02899169921875,
11132
+ "logps/rejected": -202.90972900390625,
11133
+ "loss": 0.385,
11134
+ "rewards/accuracies": 0.96875,
11135
+ "rewards/chosen": -1.1045527458190918,
11136
+ "rewards/margins": 1.0547027587890625,
11137
+ "rewards/rejected": -2.159255266189575,
11138
+ "step": 1454
11139
+ },
11140
+ {
11141
+ "epoch": 1.6794751640112464,
11142
+ "grad_norm": 45.11818002402701,
11143
+ "learning_rate": 1.378502222447494e-08,
11144
+ "logits/chosen": -1.2670139074325562,
11145
+ "logits/rejected": -1.2748316526412964,
11146
+ "logps/chosen": -163.23745727539062,
11147
+ "logps/rejected": -186.1403350830078,
11148
+ "loss": 0.3824,
11149
+ "rewards/accuracies": 0.90625,
11150
+ "rewards/chosen": -0.9486604928970337,
11151
+ "rewards/margins": 1.1920839548110962,
11152
+ "rewards/rejected": -2.14074444770813,
11153
+ "step": 1456
11154
+ },
11155
+ {
11156
+ "epoch": 1.6817821353903828,
11157
+ "grad_norm": 45.36839329689688,
11158
+ "learning_rate": 1.3590600525139762e-08,
11159
+ "logits/chosen": -1.427920937538147,
11160
+ "logits/rejected": -1.4643090963363647,
11161
+ "logps/chosen": -152.86471557617188,
11162
+ "logps/rejected": -143.46755981445312,
11163
+ "loss": 0.4027,
11164
+ "rewards/accuracies": 0.78125,
11165
+ "rewards/chosen": -0.7307279109954834,
11166
+ "rewards/margins": 1.0662072896957397,
11167
+ "rewards/rejected": -1.7969350814819336,
11168
+ "step": 1458
11169
+ },
11170
+ {
11171
+ "epoch": 1.6840891067695192,
11172
+ "grad_norm": 42.1130276807721,
11173
+ "learning_rate": 1.3397459621556128e-08,
11174
+ "logits/chosen": -1.4458112716674805,
11175
+ "logits/rejected": -1.405861496925354,
11176
+ "logps/chosen": -204.7304229736328,
11177
+ "logps/rejected": -236.4501953125,
11178
+ "loss": 0.37,
11179
+ "rewards/accuracies": 0.90625,
11180
+ "rewards/chosen": -1.182440996170044,
11181
+ "rewards/margins": 1.2389183044433594,
11182
+ "rewards/rejected": -2.421359062194824,
11183
+ "step": 1460
11184
+ },
11185
+ {
11186
+ "epoch": 1.6863960781486553,
11187
+ "grad_norm": 47.391757305539294,
11188
+ "learning_rate": 1.320560237653816e-08,
11189
+ "logits/chosen": -1.3273866176605225,
11190
+ "logits/rejected": -1.343310832977295,
11191
+ "logps/chosen": -167.19651794433594,
11192
+ "logps/rejected": -222.67767333984375,
11193
+ "loss": 0.4054,
11194
+ "rewards/accuracies": 0.84375,
11195
+ "rewards/chosen": -0.9558027982711792,
11196
+ "rewards/margins": 1.4386895895004272,
11197
+ "rewards/rejected": -2.3944923877716064,
11198
+ "step": 1462
11199
+ },
11200
+ {
11201
+ "epoch": 1.688703049527792,
11202
+ "grad_norm": 45.31930327974149,
11203
+ "learning_rate": 1.3015031633873075e-08,
11204
+ "logits/chosen": -1.3923242092132568,
11205
+ "logits/rejected": -1.3101178407669067,
11206
+ "logps/chosen": -155.1905517578125,
11207
+ "logps/rejected": -161.7040557861328,
11208
+ "loss": 0.4541,
11209
+ "rewards/accuracies": 0.84375,
11210
+ "rewards/chosen": -0.7979952096939087,
11211
+ "rewards/margins": 0.887161135673523,
11212
+ "rewards/rejected": -1.6851563453674316,
11213
+ "step": 1464
11214
+ },
11215
+ {
11216
+ "epoch": 1.691010020906928,
11217
+ "grad_norm": 47.965640105650365,
11218
+ "learning_rate": 1.2825750218278963e-08,
11219
+ "logits/chosen": -1.376510500907898,
11220
+ "logits/rejected": -1.351030945777893,
11221
+ "logps/chosen": -184.95018005371094,
11222
+ "logps/rejected": -222.78271484375,
11223
+ "loss": 0.3602,
11224
+ "rewards/accuracies": 0.8125,
11225
+ "rewards/chosen": -1.0790867805480957,
11226
+ "rewards/margins": 1.4311017990112305,
11227
+ "rewards/rejected": -2.510188579559326,
11228
+ "step": 1466
11229
+ },
11230
+ {
11231
+ "epoch": 1.6933169922860645,
11232
+ "grad_norm": 44.34985796124818,
11233
+ "learning_rate": 1.2637760935363052e-08,
11234
+ "logits/chosen": -1.5339727401733398,
11235
+ "logits/rejected": -1.4939508438110352,
11236
+ "logps/chosen": -172.8440399169922,
11237
+ "logps/rejected": -214.15769958496094,
11238
+ "loss": 0.4033,
11239
+ "rewards/accuracies": 0.78125,
11240
+ "rewards/chosen": -0.8286362290382385,
11241
+ "rewards/margins": 1.027283787727356,
11242
+ "rewards/rejected": -1.8559203147888184,
11243
+ "step": 1468
11244
+ },
11245
+ {
11246
+ "epoch": 1.6956239636652009,
11247
+ "grad_norm": 42.43166448841978,
11248
+ "learning_rate": 1.2451066571579993e-08,
11249
+ "logits/chosen": -1.4077023267745972,
11250
+ "logits/rejected": -1.3327652215957642,
11251
+ "logps/chosen": -180.84344482421875,
11252
+ "logps/rejected": -198.8247528076172,
11253
+ "loss": 0.3264,
11254
+ "rewards/accuracies": 0.90625,
11255
+ "rewards/chosen": -0.9496182203292847,
11256
+ "rewards/margins": 1.4146476984024048,
11257
+ "rewards/rejected": -2.3642659187316895,
11258
+ "step": 1470
11259
+ },
11260
+ {
11261
+ "epoch": 1.697930935044337,
11262
+ "grad_norm": 49.74620204940183,
11263
+ "learning_rate": 1.2265669894190667e-08,
11264
+ "logits/chosen": -1.3190773725509644,
11265
+ "logits/rejected": -1.2969920635223389,
11266
+ "logps/chosen": -210.91648864746094,
11267
+ "logps/rejected": -215.4746551513672,
11268
+ "loss": 0.4142,
11269
+ "rewards/accuracies": 0.875,
11270
+ "rewards/chosen": -1.1324176788330078,
11271
+ "rewards/margins": 1.0822185277938843,
11272
+ "rewards/rejected": -2.2146360874176025,
11273
+ "step": 1472
11274
+ },
11275
+ {
11276
+ "epoch": 1.7002379064234734,
11277
+ "grad_norm": 47.636203771282304,
11278
+ "learning_rate": 1.2081573651221034e-08,
11279
+ "logits/chosen": -1.4100513458251953,
11280
+ "logits/rejected": -1.3401373624801636,
11281
+ "logps/chosen": -198.85769653320312,
11282
+ "logps/rejected": -232.75079345703125,
11283
+ "loss": 0.4262,
11284
+ "rewards/accuracies": 0.90625,
11285
+ "rewards/chosen": -0.6716212630271912,
11286
+ "rewards/margins": 1.5022387504577637,
11287
+ "rewards/rejected": -2.1738598346710205,
11288
+ "step": 1474
11289
+ },
11290
+ {
11291
+ "epoch": 1.7025448778026098,
11292
+ "grad_norm": 41.23379055239197,
11293
+ "learning_rate": 1.1898780571421552e-08,
11294
+ "logits/chosen": -1.3071932792663574,
11295
+ "logits/rejected": -1.3262195587158203,
11296
+ "logps/chosen": -245.61962890625,
11297
+ "logps/rejected": -291.36395263671875,
11298
+ "loss": 0.366,
11299
+ "rewards/accuracies": 0.90625,
11300
+ "rewards/chosen": -1.0852869749069214,
11301
+ "rewards/margins": 2.006357431411743,
11302
+ "rewards/rejected": -3.091644287109375,
11303
+ "step": 1476
11304
+ },
11305
+ {
11306
+ "epoch": 1.704851849181746,
11307
+ "grad_norm": 38.8308365926166,
11308
+ "learning_rate": 1.171729336422661e-08,
11309
+ "logits/chosen": -1.3179391622543335,
11310
+ "logits/rejected": -1.2845818996429443,
11311
+ "logps/chosen": -148.53089904785156,
11312
+ "logps/rejected": -196.87857055664062,
11313
+ "loss": 0.3507,
11314
+ "rewards/accuracies": 0.9375,
11315
+ "rewards/chosen": -0.8545979261398315,
11316
+ "rewards/margins": 1.5212167501449585,
11317
+ "rewards/rejected": -2.375814437866211,
11318
+ "step": 1478
11319
+ },
11320
+ {
11321
+ "epoch": 1.7071588205608825,
11322
+ "grad_norm": 46.3266972725699,
11323
+ "learning_rate": 1.153711471971448e-08,
11324
+ "logits/chosen": -1.3267916440963745,
11325
+ "logits/rejected": -1.3675000667572021,
11326
+ "logps/chosen": -223.65921020507812,
11327
+ "logps/rejected": -267.6008605957031,
11328
+ "loss": 0.332,
11329
+ "rewards/accuracies": 0.875,
11330
+ "rewards/chosen": -1.1026298999786377,
11331
+ "rewards/margins": 1.4217543601989746,
11332
+ "rewards/rejected": -2.5243842601776123,
11333
+ "step": 1480
11334
+ },
11335
+ {
11336
+ "epoch": 1.7094657919400187,
11337
+ "grad_norm": 42.30797369115846,
11338
+ "learning_rate": 1.135824730856726e-08,
11339
+ "logits/chosen": -1.4186185598373413,
11340
+ "logits/rejected": -1.37624990940094,
11341
+ "logps/chosen": -180.34097290039062,
11342
+ "logps/rejected": -212.8303985595703,
11343
+ "loss": 0.3619,
11344
+ "rewards/accuracies": 1.0,
11345
+ "rewards/chosen": -0.7309121489524841,
11346
+ "rewards/margins": 1.391485333442688,
11347
+ "rewards/rejected": -2.1223974227905273,
11348
+ "step": 1482
11349
+ },
11350
+ {
11351
+ "epoch": 1.711772763319155,
11352
+ "grad_norm": 42.86481474468262,
11353
+ "learning_rate": 1.1180693782031514e-08,
11354
+ "logits/chosen": -1.4671962261199951,
11355
+ "logits/rejected": -1.362795352935791,
11356
+ "logps/chosen": -227.28549194335938,
11357
+ "logps/rejected": -223.4291229248047,
11358
+ "loss": 0.3623,
11359
+ "rewards/accuracies": 0.8125,
11360
+ "rewards/chosen": -1.074944257736206,
11361
+ "rewards/margins": 1.3030946254730225,
11362
+ "rewards/rejected": -2.3780391216278076,
11363
+ "step": 1484
11364
+ },
11365
+ {
11366
+ "epoch": 1.7140797346982914,
11367
+ "grad_norm": 50.32433653433989,
11368
+ "learning_rate": 1.1004456771878834e-08,
11369
+ "logits/chosen": -1.2398756742477417,
11370
+ "logits/rejected": -1.259413242340088,
11371
+ "logps/chosen": -166.67294311523438,
11372
+ "logps/rejected": -197.03199768066406,
11373
+ "loss": 0.3835,
11374
+ "rewards/accuracies": 0.8125,
11375
+ "rewards/chosen": -1.0385042428970337,
11376
+ "rewards/margins": 1.2296409606933594,
11377
+ "rewards/rejected": -2.2681450843811035,
11378
+ "step": 1486
11379
+ },
11380
+ {
11381
+ "epoch": 1.7163867060774276,
11382
+ "grad_norm": 44.51587268033867,
11383
+ "learning_rate": 1.0829538890366863e-08,
11384
+ "logits/chosen": -1.339663028717041,
11385
+ "logits/rejected": -1.4015851020812988,
11386
+ "logps/chosen": -171.54469299316406,
11387
+ "logps/rejected": -203.06884765625,
11388
+ "loss": 0.3534,
11389
+ "rewards/accuracies": 0.90625,
11390
+ "rewards/chosen": -0.7753598093986511,
11391
+ "rewards/margins": 1.3360155820846558,
11392
+ "rewards/rejected": -2.111375331878662,
11393
+ "step": 1488
11394
+ },
11395
+ {
11396
+ "epoch": 1.7186936774565642,
11397
+ "grad_norm": 49.445893229920515,
11398
+ "learning_rate": 1.065594273020055e-08,
11399
+ "logits/chosen": -1.3083471059799194,
11400
+ "logits/rejected": -1.340012550354004,
11401
+ "logps/chosen": -202.39198303222656,
11402
+ "logps/rejected": -247.21881103515625,
11403
+ "loss": 0.3148,
11404
+ "rewards/accuracies": 0.96875,
11405
+ "rewards/chosen": -0.9150687456130981,
11406
+ "rewards/margins": 1.5993800163269043,
11407
+ "rewards/rejected": -2.514448881149292,
11408
+ "step": 1490
11409
+ },
11410
+ {
11411
+ "epoch": 1.7210006488357004,
11412
+ "grad_norm": 55.13220155509763,
11413
+ "learning_rate": 1.0483670864493776e-08,
11414
+ "logits/chosen": -1.4246532917022705,
11415
+ "logits/rejected": -1.3996690511703491,
11416
+ "logps/chosen": -177.84730529785156,
11417
+ "logps/rejected": -263.4200744628906,
11418
+ "loss": 0.3937,
11419
+ "rewards/accuracies": 0.75,
11420
+ "rewards/chosen": -1.3569920063018799,
11421
+ "rewards/margins": 1.5898300409317017,
11422
+ "rewards/rejected": -2.946821928024292,
11423
+ "step": 1492
11424
+ },
11425
+ {
11426
+ "epoch": 1.7233076202148367,
11427
+ "grad_norm": 52.87877048383229,
11428
+ "learning_rate": 1.0312725846731175e-08,
11429
+ "logits/chosen": -1.535531997680664,
11430
+ "logits/rejected": -1.5023539066314697,
11431
+ "logps/chosen": -204.4673614501953,
11432
+ "logps/rejected": -205.1877899169922,
11433
+ "loss": 0.4313,
11434
+ "rewards/accuracies": 0.90625,
11435
+ "rewards/chosen": -0.8635197877883911,
11436
+ "rewards/margins": 1.0750128030776978,
11437
+ "rewards/rejected": -1.9385325908660889,
11438
+ "step": 1494
11439
+ },
11440
+ {
11441
+ "epoch": 1.7256145915939731,
11442
+ "grad_norm": 58.012698038087876,
11443
+ "learning_rate": 1.014311021073031e-08,
11444
+ "logits/chosen": -1.3397972583770752,
11445
+ "logits/rejected": -1.3506940603256226,
11446
+ "logps/chosen": -167.1254425048828,
11447
+ "logps/rejected": -176.68478393554688,
11448
+ "loss": 0.4236,
11449
+ "rewards/accuracies": 0.6875,
11450
+ "rewards/chosen": -1.0245790481567383,
11451
+ "rewards/margins": 0.8268385529518127,
11452
+ "rewards/rejected": -1.8514174222946167,
11453
+ "step": 1496
11454
+ },
11455
+ {
11456
+ "epoch": 1.7279215629731093,
11457
+ "grad_norm": 45.63432516497851,
11458
+ "learning_rate": 9.974826470604047e-09,
11459
+ "logits/chosen": -1.4238135814666748,
11460
+ "logits/rejected": -1.4215826988220215,
11461
+ "logps/chosen": -200.67486572265625,
11462
+ "logps/rejected": -243.21206665039062,
11463
+ "loss": 0.312,
11464
+ "rewards/accuracies": 0.875,
11465
+ "rewards/chosen": -0.8548165559768677,
11466
+ "rewards/margins": 1.6108603477478027,
11467
+ "rewards/rejected": -2.465676784515381,
11468
+ "step": 1498
11469
+ },
11470
+ {
11471
+ "epoch": 1.7302285343522457,
11472
+ "grad_norm": 45.67289683342217,
11473
+ "learning_rate": 9.807877120723395e-09,
11474
+ "logits/chosen": -1.3849635124206543,
11475
+ "logits/rejected": -1.4313040971755981,
11476
+ "logps/chosen": -172.61903381347656,
11477
+ "logps/rejected": -212.602783203125,
11478
+ "loss": 0.3868,
11479
+ "rewards/accuracies": 0.9375,
11480
+ "rewards/chosen": -0.7782556414604187,
11481
+ "rewards/margins": 1.354672908782959,
11482
+ "rewards/rejected": -2.1329286098480225,
11483
+ "step": 1500
11484
+ },
11485
+ {
11486
+ "epoch": 1.7302285343522457,
11487
+ "eval_logits/chosen": -1.342780351638794,
11488
+ "eval_logits/rejected": -1.2655624151229858,
11489
+ "eval_logps/chosen": -198.90330505371094,
11490
+ "eval_logps/rejected": -171.5903778076172,
11491
+ "eval_loss": 0.5269267559051514,
11492
+ "eval_rewards/accuracies": 0.7200000286102295,
11493
+ "eval_rewards/chosen": -1.3721123933792114,
11494
+ "eval_rewards/margins": 0.7353845834732056,
11495
+ "eval_rewards/rejected": -2.107496976852417,
11496
+ "eval_runtime": 23.0274,
11497
+ "eval_samples_per_second": 4.343,
11498
+ "eval_steps_per_second": 1.086,
11499
+ "step": 1500
11500
  }
11501
  ],
11502
  "logging_steps": 2,