RyanYr commited on
Commit
de4bd6a
1 Parent(s): f3e2680

Training in progress, step 1752, checkpoint

Browse files
last-checkpoint/global_step1752/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38c93c1a87f13462b5e0a5655712f0b5c4c012fffd291dcc75382e0879b36d9e
3
+ size 24090788996
last-checkpoint/global_step1752/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab31a681deaa44cca1b5ed9bc3093e52b27f87ad098476a6f765ed1702235565
3
+ size 24090788996
last-checkpoint/global_step1752/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1c7f5fed1fb99aea9d6c75635912272ce175d4bebe42e8ad47b5e85730f8f5
3
+ size 24090788996
last-checkpoint/global_step1752/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f69a4fa888baac249d427aa2ac2dc127a91fd0f7bee11e1b0be999f15e4438b7
3
+ size 24090788996
last-checkpoint/global_step1752/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c7d2aa6a6d4c0a30e10c39ed299af9b8aef9d66b6ff8d7a84c0a25a45701cd9
3
+ size 150693
last-checkpoint/global_step1752/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc324cb078fc2735eda0f2ec2e2afdefed8e49fbf43a1c59c83521b42cc9657
3
+ size 150693
last-checkpoint/global_step1752/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f1bc8bdd8263b75afd7b99b3c7ae1cf493a40191c5c0ed770efb3243c0528c
3
+ size 150693
last-checkpoint/global_step1752/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0aef5cf0e0dc06562d25766e6f1e03bd601dbe432d4bbd0b9eb3fbc4b6fdac0
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1500
 
1
+ global_step1752
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec16c0b98fcebf6052a9aa927090800759b4fb6700367ad2c29354ecbf45f9f7
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a78ea9ea6e0e17f21e9463d421205fee33f06a038f692cdbc15cc5da5406e6
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd85acac0b1cf3d6b603028d0abef6bbae49730ebe45add6807617156b350d1c
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67ebff0a52efc66dec3162b74b95953b162147486a84d2998d089213d5696860
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2df1f85a263fda844c3a5170fff9df97853e8127b0a5eddf0ad7744a2325916c
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9fbac7a8745fc91c66963c503ba1c883ddde4758539b7301fb550afd9e30274
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e480518cebd078a58ebf6e0cea1f57aa4919ad9372aba8cc8a04682ef0e504f
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:254f93f00f3d0d8c2a810b3561ea09850b051a412761bbe113d75573d3f92b1b
3
  size 1168138808
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4f003069486a57c6ac033f30cf4c4213eb6b7d659bab68a5a50fdb8da7c4118
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f22ced19e790cc864cefe3b7c711d9ae631c44f95d42fb4829688cc3de0153
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a016ef89b4392d083b2c15a7cf06a39bc61a759f648cf6dc03f1c32b89a526aa
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e0407513eba77d34cbf3adf0e59a58bd80716f4f00f414854253637e82be43d
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b56fe0893036dc052d18d90feba4328b90ea71561942150b07406ac3d7a700e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6060636c023258ce9b965e244b8a58b4c99d5784dde4405b39737550ef50cd4f
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0c203d12c2c308dab785ed672c9ca27fb6a2f72acd1e1552d1516c7b0006013
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c24ccdfdcde39cb2265c82c50c36ffdfcc670f757aba4bcf4bb0fdc6d1373c4c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ace6290c890a8d1e173a6da04a3c0a74aa055e1dc2c0b019def7feb7e061c29
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2800f1191a8da3fdb8a578f3e45335b90e0bd680c7897d41e35ad73896db01cd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.710498182595681,
5
  "eval_steps": 100,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11497,6 +11497,1928 @@
11497
  "eval_samples_per_second": 4.648,
11498
  "eval_steps_per_second": 1.162,
11499
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11500
  }
11501
  ],
11502
  "logging_steps": 2,
@@ -11511,7 +13433,7 @@
11511
  "should_evaluate": false,
11512
  "should_log": false,
11513
  "should_save": true,
11514
- "should_training_stop": false
11515
  },
11516
  "attributes": {}
11517
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9978618772717553,
5
  "eval_steps": 100,
6
+ "global_step": 1752,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11497
  "eval_samples_per_second": 4.648,
11498
  "eval_steps_per_second": 1.162,
11499
  "step": 1500
11500
+ },
11501
+ {
11502
+ "epoch": 1.712778846839142,
11503
+ "grad_norm": 59.337175090307106,
11504
+ "learning_rate": 1.1090063682116202e-08,
11505
+ "logits/chosen": -1.2650439739227295,
11506
+ "logits/rejected": -1.3502650260925293,
11507
+ "logps/chosen": -183.25546264648438,
11508
+ "logps/rejected": -224.42698669433594,
11509
+ "loss": 0.4722,
11510
+ "rewards/accuracies": 0.6875,
11511
+ "rewards/chosen": -0.49561411142349243,
11512
+ "rewards/margins": 1.1255251169204712,
11513
+ "rewards/rejected": -1.6211390495300293,
11514
+ "step": 1502
11515
+ },
11516
+ {
11517
+ "epoch": 1.7150595110826028,
11518
+ "grad_norm": 65.4693723853712,
11519
+ "learning_rate": 1.091662101338714e-08,
11520
+ "logits/chosen": -1.198454737663269,
11521
+ "logits/rejected": -1.1747905015945435,
11522
+ "logps/chosen": -176.22344970703125,
11523
+ "logps/rejected": -209.0,
11524
+ "loss": 0.428,
11525
+ "rewards/accuracies": 0.875,
11526
+ "rewards/chosen": -0.4550539553165436,
11527
+ "rewards/margins": 1.0402750968933105,
11528
+ "rewards/rejected": -1.4953290224075317,
11529
+ "step": 1504
11530
+ },
11531
+ {
11532
+ "epoch": 1.7173401753260638,
11533
+ "grad_norm": 56.55232069175886,
11534
+ "learning_rate": 1.0744466997123425e-08,
11535
+ "logits/chosen": -1.131535291671753,
11536
+ "logits/rejected": -1.2650810480117798,
11537
+ "logps/chosen": -149.54214477539062,
11538
+ "logps/rejected": -207.89288330078125,
11539
+ "loss": 0.4043,
11540
+ "rewards/accuracies": 0.78125,
11541
+ "rewards/chosen": -0.31441810727119446,
11542
+ "rewards/margins": 1.342848777770996,
11543
+ "rewards/rejected": -1.6572668552398682,
11544
+ "step": 1506
11545
+ },
11546
+ {
11547
+ "epoch": 1.7196208395695245,
11548
+ "grad_norm": 51.8349753003821,
11549
+ "learning_rate": 1.0573604123651503e-08,
11550
+ "logits/chosen": -1.3305891752243042,
11551
+ "logits/rejected": -1.357151985168457,
11552
+ "logps/chosen": -193.94952392578125,
11553
+ "logps/rejected": -216.302490234375,
11554
+ "loss": 0.3987,
11555
+ "rewards/accuracies": 0.875,
11556
+ "rewards/chosen": -0.3934152126312256,
11557
+ "rewards/margins": 1.1823620796203613,
11558
+ "rewards/rejected": -1.575777292251587,
11559
+ "step": 1508
11560
+ },
11561
+ {
11562
+ "epoch": 1.7219015038129855,
11563
+ "grad_norm": 63.60872727771506,
11564
+ "learning_rate": 1.0404034864620603e-08,
11565
+ "logits/chosen": -1.174712061882019,
11566
+ "logits/rejected": -1.2022875547409058,
11567
+ "logps/chosen": -148.9698486328125,
11568
+ "logps/rejected": -179.8749542236328,
11569
+ "loss": 0.4382,
11570
+ "rewards/accuracies": 0.9375,
11571
+ "rewards/chosen": -0.2437065690755844,
11572
+ "rewards/margins": 1.0777262449264526,
11573
+ "rewards/rejected": -1.3214329481124878,
11574
+ "step": 1510
11575
+ },
11576
+ {
11577
+ "epoch": 1.7241821680564464,
11578
+ "grad_norm": 56.263603035763026,
11579
+ "learning_rate": 1.0235761672966902e-08,
11580
+ "logits/chosen": -1.2788983583450317,
11581
+ "logits/rejected": -1.2880148887634277,
11582
+ "logps/chosen": -158.33840942382812,
11583
+ "logps/rejected": -187.15737915039062,
11584
+ "loss": 0.397,
11585
+ "rewards/accuracies": 0.9375,
11586
+ "rewards/chosen": -0.34705379605293274,
11587
+ "rewards/margins": 1.0756170749664307,
11588
+ "rewards/rejected": -1.422670841217041,
11589
+ "step": 1512
11590
+ },
11591
+ {
11592
+ "epoch": 1.7264628322999074,
11593
+ "grad_norm": 54.16757407624544,
11594
+ "learning_rate": 1.0068786982878087e-08,
11595
+ "logits/chosen": -1.3718278408050537,
11596
+ "logits/rejected": -1.337512731552124,
11597
+ "logps/chosen": -133.3784942626953,
11598
+ "logps/rejected": -142.33865356445312,
11599
+ "loss": 0.3983,
11600
+ "rewards/accuracies": 0.84375,
11601
+ "rewards/chosen": -0.2702075242996216,
11602
+ "rewards/margins": 0.9396376013755798,
11603
+ "rewards/rejected": -1.2098451852798462,
11604
+ "step": 1514
11605
+ },
11606
+ {
11607
+ "epoch": 1.7287434965433683,
11608
+ "grad_norm": 65.43992152706379,
11609
+ "learning_rate": 9.903113209758097e-09,
11610
+ "logits/chosen": -1.174783706665039,
11611
+ "logits/rejected": -1.1753756999969482,
11612
+ "logps/chosen": -141.90370178222656,
11613
+ "logps/rejected": -165.7357940673828,
11614
+ "loss": 0.3958,
11615
+ "rewards/accuracies": 0.84375,
11616
+ "rewards/chosen": -0.39702969789505005,
11617
+ "rewards/margins": 1.0974403619766235,
11618
+ "rewards/rejected": -1.494470238685608,
11619
+ "step": 1516
11620
+ },
11621
+ {
11622
+ "epoch": 1.7310241607868293,
11623
+ "grad_norm": 53.07155795474272,
11624
+ "learning_rate": 9.73874275019223e-09,
11625
+ "logits/chosen": -1.2876778841018677,
11626
+ "logits/rejected": -1.2978581190109253,
11627
+ "logps/chosen": -172.82862854003906,
11628
+ "logps/rejected": -199.86740112304688,
11629
+ "loss": 0.4196,
11630
+ "rewards/accuracies": 0.84375,
11631
+ "rewards/chosen": -0.360828161239624,
11632
+ "rewards/margins": 0.7934844493865967,
11633
+ "rewards/rejected": -1.1543126106262207,
11634
+ "step": 1518
11635
+ },
11636
+ {
11637
+ "epoch": 1.73330482503029,
11638
+ "grad_norm": 56.60655829896917,
11639
+ "learning_rate": 9.575677981912456e-09,
11640
+ "logits/chosen": -1.3419064283370972,
11641
+ "logits/rejected": -1.4197977781295776,
11642
+ "logps/chosen": -215.0706787109375,
11643
+ "logps/rejected": -234.6818084716797,
11644
+ "loss": 0.4216,
11645
+ "rewards/accuracies": 0.875,
11646
+ "rewards/chosen": -0.36836767196655273,
11647
+ "rewards/margins": 0.9383161067962646,
11648
+ "rewards/rejected": -1.3066837787628174,
11649
+ "step": 1520
11650
+ },
11651
+ {
11652
+ "epoch": 1.735585489273751,
11653
+ "grad_norm": 60.4365358347784,
11654
+ "learning_rate": 9.413921263763024e-09,
11655
+ "logits/chosen": -1.1901061534881592,
11656
+ "logits/rejected": -1.1732714176177979,
11657
+ "logps/chosen": -148.09774780273438,
11658
+ "logps/rejected": -138.72329711914062,
11659
+ "loss": 0.4147,
11660
+ "rewards/accuracies": 0.84375,
11661
+ "rewards/chosen": -0.24259831011295319,
11662
+ "rewards/margins": 0.738941490650177,
11663
+ "rewards/rejected": -0.9815397262573242,
11664
+ "step": 1522
11665
+ },
11666
+ {
11667
+ "epoch": 1.7378661535172117,
11668
+ "grad_norm": 61.21131572090355,
11669
+ "learning_rate": 9.253474935666338e-09,
11670
+ "logits/chosen": -1.319941520690918,
11671
+ "logits/rejected": -1.32881498336792,
11672
+ "logps/chosen": -186.58230590820312,
11673
+ "logps/rejected": -211.57525634765625,
11674
+ "loss": 0.5162,
11675
+ "rewards/accuracies": 0.84375,
11676
+ "rewards/chosen": -0.4148866832256317,
11677
+ "rewards/margins": 1.0180983543395996,
11678
+ "rewards/rejected": -1.4329850673675537,
11679
+ "step": 1524
11680
+ },
11681
+ {
11682
+ "epoch": 1.7401468177606727,
11683
+ "grad_norm": 55.57275725271543,
11684
+ "learning_rate": 9.094341318589071e-09,
11685
+ "logits/chosen": -1.1223199367523193,
11686
+ "logits/rejected": -1.2205249071121216,
11687
+ "logps/chosen": -123.37957763671875,
11688
+ "logps/rejected": -155.87530517578125,
11689
+ "loss": 0.4639,
11690
+ "rewards/accuracies": 0.875,
11691
+ "rewards/chosen": -0.4156235158443451,
11692
+ "rewards/margins": 0.8936042785644531,
11693
+ "rewards/rejected": -1.309227705001831,
11694
+ "step": 1526
11695
+ },
11696
+ {
11697
+ "epoch": 1.7424274820041337,
11698
+ "grad_norm": 75.32187086371678,
11699
+ "learning_rate": 8.936522714508676e-09,
11700
+ "logits/chosen": -1.1820390224456787,
11701
+ "logits/rejected": -1.227403163909912,
11702
+ "logps/chosen": -240.14578247070312,
11703
+ "logps/rejected": -284.03839111328125,
11704
+ "loss": 0.4268,
11705
+ "rewards/accuracies": 0.71875,
11706
+ "rewards/chosen": -0.9300968647003174,
11707
+ "rewards/margins": 1.1027811765670776,
11708
+ "rewards/rejected": -2.0328779220581055,
11709
+ "step": 1528
11710
+ },
11711
+ {
11712
+ "epoch": 1.7447081462475946,
11713
+ "grad_norm": 56.43871330540775,
11714
+ "learning_rate": 8.780021406380011e-09,
11715
+ "logits/chosen": -1.1268565654754639,
11716
+ "logits/rejected": -1.2307937145233154,
11717
+ "logps/chosen": -176.69204711914062,
11718
+ "logps/rejected": -203.42315673828125,
11719
+ "loss": 0.433,
11720
+ "rewards/accuracies": 0.84375,
11721
+ "rewards/chosen": -0.7230830788612366,
11722
+ "rewards/margins": 1.0807398557662964,
11723
+ "rewards/rejected": -1.8038227558135986,
11724
+ "step": 1530
11725
+ },
11726
+ {
11727
+ "epoch": 1.7469888104910556,
11728
+ "grad_norm": 53.44401452377932,
11729
+ "learning_rate": 8.624839658102345e-09,
11730
+ "logits/chosen": -1.172848105430603,
11731
+ "logits/rejected": -1.2623870372772217,
11732
+ "logps/chosen": -172.2826385498047,
11733
+ "logps/rejected": -218.0788116455078,
11734
+ "loss": 0.4167,
11735
+ "rewards/accuracies": 0.9375,
11736
+ "rewards/chosen": -0.4317859411239624,
11737
+ "rewards/margins": 1.20097017288208,
11738
+ "rewards/rejected": -1.632756233215332,
11739
+ "step": 1532
11740
+ },
11741
+ {
11742
+ "epoch": 1.7492694747345165,
11743
+ "grad_norm": 44.672996526234876,
11744
+ "learning_rate": 8.470979714486626e-09,
11745
+ "logits/chosen": -1.2581678628921509,
11746
+ "logits/rejected": -1.3436583280563354,
11747
+ "logps/chosen": -164.71560668945312,
11748
+ "logps/rejected": -197.1131134033203,
11749
+ "loss": 0.3891,
11750
+ "rewards/accuracies": 0.84375,
11751
+ "rewards/chosen": -0.48163390159606934,
11752
+ "rewards/margins": 1.20646333694458,
11753
+ "rewards/rejected": -1.688097357749939,
11754
+ "step": 1534
11755
+ },
11756
+ {
11757
+ "epoch": 1.7515501389779773,
11758
+ "grad_norm": 59.7183121930943,
11759
+ "learning_rate": 8.318443801222918e-09,
11760
+ "logits/chosen": -1.1442248821258545,
11761
+ "logits/rejected": -1.2501918077468872,
11762
+ "logps/chosen": -155.9255828857422,
11763
+ "logps/rejected": -185.97073364257812,
11764
+ "loss": 0.4332,
11765
+ "rewards/accuracies": 0.875,
11766
+ "rewards/chosen": -0.4522356390953064,
11767
+ "rewards/margins": 0.7766329050064087,
11768
+ "rewards/rejected": -1.2288686037063599,
11769
+ "step": 1536
11770
+ },
11771
+ {
11772
+ "epoch": 1.7538308032214383,
11773
+ "grad_norm": 52.4727599794761,
11774
+ "learning_rate": 8.167234124848343e-09,
11775
+ "logits/chosen": -1.2721006870269775,
11776
+ "logits/rejected": -1.3032159805297852,
11777
+ "logps/chosen": -213.73348999023438,
11778
+ "logps/rejected": -231.73304748535156,
11779
+ "loss": 0.4188,
11780
+ "rewards/accuracies": 0.875,
11781
+ "rewards/chosen": -0.38457486033439636,
11782
+ "rewards/margins": 1.214958667755127,
11783
+ "rewards/rejected": -1.5995336771011353,
11784
+ "step": 1538
11785
+ },
11786
+ {
11787
+ "epoch": 1.756111467464899,
11788
+ "grad_norm": 56.28899819514317,
11789
+ "learning_rate": 8.017352872715077e-09,
11790
+ "logits/chosen": -1.246867299079895,
11791
+ "logits/rejected": -1.321312665939331,
11792
+ "logps/chosen": -166.28347778320312,
11793
+ "logps/rejected": -189.37855529785156,
11794
+ "loss": 0.3864,
11795
+ "rewards/accuracies": 0.9375,
11796
+ "rewards/chosen": -0.6479480266571045,
11797
+ "rewards/margins": 0.916532039642334,
11798
+ "rewards/rejected": -1.5644800662994385,
11799
+ "step": 1540
11800
+ },
11801
+ {
11802
+ "epoch": 1.75839213170836,
11803
+ "grad_norm": 65.96019718362385,
11804
+ "learning_rate": 7.868802212958703e-09,
11805
+ "logits/chosen": -1.2408630847930908,
11806
+ "logits/rejected": -1.3144943714141846,
11807
+ "logps/chosen": -148.76805114746094,
11808
+ "logps/rejected": -169.2152099609375,
11809
+ "loss": 0.4679,
11810
+ "rewards/accuracies": 0.8125,
11811
+ "rewards/chosen": -0.313798189163208,
11812
+ "rewards/margins": 0.8195222020149231,
11813
+ "rewards/rejected": -1.1333203315734863,
11814
+ "step": 1542
11815
+ },
11816
+ {
11817
+ "epoch": 1.760672795951821,
11818
+ "grad_norm": 67.11700647965242,
11819
+ "learning_rate": 7.721584294466899e-09,
11820
+ "logits/chosen": -1.3456957340240479,
11821
+ "logits/rejected": -1.3798415660858154,
11822
+ "logps/chosen": -180.2974090576172,
11823
+ "logps/rejected": -203.2277374267578,
11824
+ "loss": 0.4066,
11825
+ "rewards/accuracies": 0.78125,
11826
+ "rewards/chosen": -0.5308500528335571,
11827
+ "rewards/margins": 0.968846321105957,
11828
+ "rewards/rejected": -1.4996963739395142,
11829
+ "step": 1544
11830
+ },
11831
+ {
11832
+ "epoch": 1.7629534601952819,
11833
+ "grad_norm": 59.467637180664155,
11834
+ "learning_rate": 7.575701246848299e-09,
11835
+ "logits/chosen": -1.2550122737884521,
11836
+ "logits/rejected": -1.4017306566238403,
11837
+ "logps/chosen": -176.01060485839844,
11838
+ "logps/rejected": -221.2160186767578,
11839
+ "loss": 0.4151,
11840
+ "rewards/accuracies": 0.84375,
11841
+ "rewards/chosen": -0.6037588715553284,
11842
+ "rewards/margins": 1.1545339822769165,
11843
+ "rewards/rejected": -1.7582929134368896,
11844
+ "step": 1546
11845
+ },
11846
+ {
11847
+ "epoch": 1.7652341244387428,
11848
+ "grad_norm": 59.34463361306739,
11849
+ "learning_rate": 7.431155180401704e-09,
11850
+ "logits/chosen": -1.1449761390686035,
11851
+ "logits/rejected": -1.2112215757369995,
11852
+ "logps/chosen": -175.2537384033203,
11853
+ "logps/rejected": -213.36187744140625,
11854
+ "loss": 0.3787,
11855
+ "rewards/accuracies": 0.90625,
11856
+ "rewards/chosen": -0.547699511051178,
11857
+ "rewards/margins": 1.1472948789596558,
11858
+ "rewards/rejected": -1.694994568824768,
11859
+ "step": 1548
11860
+ },
11861
+ {
11862
+ "epoch": 1.7675147886822038,
11863
+ "grad_norm": 107.3697379891581,
11864
+ "learning_rate": 7.287948186085613e-09,
11865
+ "logits/chosen": -1.1891409158706665,
11866
+ "logits/rejected": -1.2180662155151367,
11867
+ "logps/chosen": -101.96951293945312,
11868
+ "logps/rejected": -131.66488647460938,
11869
+ "loss": 0.5419,
11870
+ "rewards/accuracies": 0.90625,
11871
+ "rewards/chosen": -0.35958331823349,
11872
+ "rewards/margins": 0.8678293228149414,
11873
+ "rewards/rejected": -1.2274125814437866,
11874
+ "step": 1550
11875
+ },
11876
+ {
11877
+ "epoch": 1.7697954529256648,
11878
+ "grad_norm": 58.46793417479867,
11879
+ "learning_rate": 7.146082335487824e-09,
11880
+ "logits/chosen": -1.2643113136291504,
11881
+ "logits/rejected": -1.2983956336975098,
11882
+ "logps/chosen": -184.83563232421875,
11883
+ "logps/rejected": -210.6478729248047,
11884
+ "loss": 0.417,
11885
+ "rewards/accuracies": 0.75,
11886
+ "rewards/chosen": -0.5568149089813232,
11887
+ "rewards/margins": 0.8875846862792969,
11888
+ "rewards/rejected": -1.4443995952606201,
11889
+ "step": 1552
11890
+ },
11891
+ {
11892
+ "epoch": 1.7720761171691255,
11893
+ "grad_norm": 68.29885233859635,
11894
+ "learning_rate": 7.005559680795658e-09,
11895
+ "logits/chosen": -1.255906105041504,
11896
+ "logits/rejected": -1.2899165153503418,
11897
+ "logps/chosen": -123.41840362548828,
11898
+ "logps/rejected": -138.2176055908203,
11899
+ "loss": 0.4641,
11900
+ "rewards/accuracies": 0.84375,
11901
+ "rewards/chosen": -0.31046992540359497,
11902
+ "rewards/margins": 0.6126350164413452,
11903
+ "rewards/rejected": -0.9231049418449402,
11904
+ "step": 1554
11905
+ },
11906
+ {
11907
+ "epoch": 1.7743567814125865,
11908
+ "grad_norm": 54.33302324585405,
11909
+ "learning_rate": 6.866382254766156e-09,
11910
+ "logits/chosen": -1.330174207687378,
11911
+ "logits/rejected": -1.363855242729187,
11912
+ "logps/chosen": -196.70680236816406,
11913
+ "logps/rejected": -213.6434326171875,
11914
+ "loss": 0.4372,
11915
+ "rewards/accuracies": 0.78125,
11916
+ "rewards/chosen": -0.5205326676368713,
11917
+ "rewards/margins": 1.0018987655639648,
11918
+ "rewards/rejected": -1.5224316120147705,
11919
+ "step": 1556
11920
+ },
11921
+ {
11922
+ "epoch": 1.7766374456560472,
11923
+ "grad_norm": 66.94592762976625,
11924
+ "learning_rate": 6.7285520706966914e-09,
11925
+ "logits/chosen": -1.3225196599960327,
11926
+ "logits/rejected": -1.3494971990585327,
11927
+ "logps/chosen": -185.50836181640625,
11928
+ "logps/rejected": -207.73159790039062,
11929
+ "loss": 0.4265,
11930
+ "rewards/accuracies": 0.8125,
11931
+ "rewards/chosen": -0.5292472839355469,
11932
+ "rewards/margins": 0.988534152507782,
11933
+ "rewards/rejected": -1.5177814960479736,
11934
+ "step": 1558
11935
+ },
11936
+ {
11937
+ "epoch": 1.7789181098995082,
11938
+ "grad_norm": 66.14902165695068,
11939
+ "learning_rate": 6.592071122395848e-09,
11940
+ "logits/chosen": -1.2869586944580078,
11941
+ "logits/rejected": -1.2955509424209595,
11942
+ "logps/chosen": -203.05213928222656,
11943
+ "logps/rejected": -221.87176513671875,
11944
+ "loss": 0.4555,
11945
+ "rewards/accuracies": 0.90625,
11946
+ "rewards/chosen": -0.4785195589065552,
11947
+ "rewards/margins": 0.8982763886451721,
11948
+ "rewards/rejected": -1.3767958879470825,
11949
+ "step": 1560
11950
+ },
11951
+ {
11952
+ "epoch": 1.7811987741429691,
11953
+ "grad_norm": 56.82906531266912,
11954
+ "learning_rate": 6.4569413841546124e-09,
11955
+ "logits/chosen": -1.3752797842025757,
11956
+ "logits/rejected": -1.342197060585022,
11957
+ "logps/chosen": -213.2560577392578,
11958
+ "logps/rejected": -226.26138305664062,
11959
+ "loss": 0.3949,
11960
+ "rewards/accuracies": 0.78125,
11961
+ "rewards/chosen": -0.6319646835327148,
11962
+ "rewards/margins": 0.7403106093406677,
11963
+ "rewards/rejected": -1.3722753524780273,
11964
+ "step": 1562
11965
+ },
11966
+ {
11967
+ "epoch": 1.78347943838643,
11968
+ "grad_norm": 57.01107575636888,
11969
+ "learning_rate": 6.323164810717751e-09,
11970
+ "logits/chosen": -1.4058406352996826,
11971
+ "logits/rejected": -1.4284158945083618,
11972
+ "logps/chosen": -164.4686279296875,
11973
+ "logps/rejected": -175.3655548095703,
11974
+ "loss": 0.4426,
11975
+ "rewards/accuracies": 0.84375,
11976
+ "rewards/chosen": -0.2919442057609558,
11977
+ "rewards/margins": 0.8540188074111938,
11978
+ "rewards/rejected": -1.1459629535675049,
11979
+ "step": 1564
11980
+ },
11981
+ {
11982
+ "epoch": 1.785760102629891,
11983
+ "grad_norm": 53.78725622915069,
11984
+ "learning_rate": 6.1907433372555885e-09,
11985
+ "logits/chosen": -1.227329969406128,
11986
+ "logits/rejected": -1.2803211212158203,
11987
+ "logps/chosen": -215.01806640625,
11988
+ "logps/rejected": -240.11415100097656,
11989
+ "loss": 0.4856,
11990
+ "rewards/accuracies": 0.78125,
11991
+ "rewards/chosen": -0.37019410729408264,
11992
+ "rewards/margins": 1.2054839134216309,
11993
+ "rewards/rejected": -1.5756779909133911,
11994
+ "step": 1566
11995
+ },
11996
+ {
11997
+ "epoch": 1.788040766873352,
11998
+ "grad_norm": 50.06277787185665,
11999
+ "learning_rate": 6.0596788793360055e-09,
12000
+ "logits/chosen": -1.2019214630126953,
12001
+ "logits/rejected": -1.167099952697754,
12002
+ "logps/chosen": -128.18609619140625,
12003
+ "logps/rejected": -156.6576690673828,
12004
+ "loss": 0.3964,
12005
+ "rewards/accuracies": 0.84375,
12006
+ "rewards/chosen": -0.24505016207695007,
12007
+ "rewards/margins": 0.7954214811325073,
12008
+ "rewards/rejected": -1.0404715538024902,
12009
+ "step": 1568
12010
+ },
12011
+ {
12012
+ "epoch": 1.7903214311168127,
12013
+ "grad_norm": 58.820202526748965,
12014
+ "learning_rate": 5.929973332896676e-09,
12015
+ "logits/chosen": -1.295718789100647,
12016
+ "logits/rejected": -1.3617008924484253,
12017
+ "logps/chosen": -177.0591583251953,
12018
+ "logps/rejected": -229.13153076171875,
12019
+ "loss": 0.3765,
12020
+ "rewards/accuracies": 0.84375,
12021
+ "rewards/chosen": -0.3693293035030365,
12022
+ "rewards/margins": 1.0683492422103882,
12023
+ "rewards/rejected": -1.437678575515747,
12024
+ "step": 1570
12025
+ },
12026
+ {
12027
+ "epoch": 1.7926020953602737,
12028
+ "grad_norm": 69.22569134675261,
12029
+ "learning_rate": 5.801628574217732e-09,
12030
+ "logits/chosen": -1.231567621231079,
12031
+ "logits/rejected": -1.2869716882705688,
12032
+ "logps/chosen": -115.6909408569336,
12033
+ "logps/rejected": -169.88333129882812,
12034
+ "loss": 0.4175,
12035
+ "rewards/accuracies": 0.8125,
12036
+ "rewards/chosen": -0.2287057340145111,
12037
+ "rewards/margins": 0.9424973130226135,
12038
+ "rewards/rejected": -1.1712028980255127,
12039
+ "step": 1572
12040
+ },
12041
+ {
12042
+ "epoch": 1.7948827596037344,
12043
+ "grad_norm": 57.45741658297443,
12044
+ "learning_rate": 5.674646459894539e-09,
12045
+ "logits/chosen": -1.3439488410949707,
12046
+ "logits/rejected": -1.3449054956436157,
12047
+ "logps/chosen": -169.16116333007812,
12048
+ "logps/rejected": -214.2422332763672,
12049
+ "loss": 0.4044,
12050
+ "rewards/accuracies": 0.84375,
12051
+ "rewards/chosen": -0.29708918929100037,
12052
+ "rewards/margins": 1.2709224224090576,
12053
+ "rewards/rejected": -1.5680116415023804,
12054
+ "step": 1574
12055
+ },
12056
+ {
12057
+ "epoch": 1.7971634238471954,
12058
+ "grad_norm": 53.78313839896124,
12059
+ "learning_rate": 5.549028826810886e-09,
12060
+ "logits/chosen": -1.3002756834030151,
12061
+ "logits/rejected": -1.305463433265686,
12062
+ "logps/chosen": -191.0225067138672,
12063
+ "logps/rejected": -210.54293823242188,
12064
+ "loss": 0.4336,
12065
+ "rewards/accuracies": 0.90625,
12066
+ "rewards/chosen": -0.6976014971733093,
12067
+ "rewards/margins": 1.1452404260635376,
12068
+ "rewards/rejected": -1.8428419828414917,
12069
+ "step": 1576
12070
+ },
12071
+ {
12072
+ "epoch": 1.7994440880906564,
12073
+ "grad_norm": 54.73230632912025,
12074
+ "learning_rate": 5.42477749211242e-09,
12075
+ "logits/chosen": -1.1779245138168335,
12076
+ "logits/rejected": -1.1519317626953125,
12077
+ "logps/chosen": -162.1126708984375,
12078
+ "logps/rejected": -177.6374053955078,
12079
+ "loss": 0.4314,
12080
+ "rewards/accuracies": 0.875,
12081
+ "rewards/chosen": -0.23180986940860748,
12082
+ "rewards/margins": 1.1439672708511353,
12083
+ "rewards/rejected": -1.375777244567871,
12084
+ "step": 1578
12085
+ },
12086
+ {
12087
+ "epoch": 1.8017247523341173,
12088
+ "grad_norm": 72.74484543771715,
12089
+ "learning_rate": 5.301894253180295e-09,
12090
+ "logits/chosen": -1.179969310760498,
12091
+ "logits/rejected": -1.2561529874801636,
12092
+ "logps/chosen": -142.0485382080078,
12093
+ "logps/rejected": -183.1311492919922,
12094
+ "loss": 0.4323,
12095
+ "rewards/accuracies": 0.75,
12096
+ "rewards/chosen": -0.3770468533039093,
12097
+ "rewards/margins": 0.8587576150894165,
12098
+ "rewards/rejected": -1.2358046770095825,
12099
+ "step": 1580
12100
+ },
12101
+ {
12102
+ "epoch": 1.8040054165775783,
12103
+ "grad_norm": 52.220335591750064,
12104
+ "learning_rate": 5.180380887605252e-09,
12105
+ "logits/chosen": -1.292273998260498,
12106
+ "logits/rejected": -1.3601034879684448,
12107
+ "logps/chosen": -198.21466064453125,
12108
+ "logps/rejected": -246.2793731689453,
12109
+ "loss": 0.3947,
12110
+ "rewards/accuracies": 0.90625,
12111
+ "rewards/chosen": -0.4539499282836914,
12112
+ "rewards/margins": 1.7145434617996216,
12113
+ "rewards/rejected": -2.1684935092926025,
12114
+ "step": 1582
12115
+ },
12116
+ {
12117
+ "epoch": 1.8062860808210393,
12118
+ "grad_norm": 53.485899864483656,
12119
+ "learning_rate": 5.060239153161872e-09,
12120
+ "logits/chosen": -1.1770296096801758,
12121
+ "logits/rejected": -1.2744455337524414,
12122
+ "logps/chosen": -188.80868530273438,
12123
+ "logps/rejected": -251.5846405029297,
12124
+ "loss": 0.4114,
12125
+ "rewards/accuracies": 0.84375,
12126
+ "rewards/chosen": -0.5241818428039551,
12127
+ "rewards/margins": 1.110929250717163,
12128
+ "rewards/rejected": -1.6351109743118286,
12129
+ "step": 1584
12130
+ },
12131
+ {
12132
+ "epoch": 1.8085667450645,
12133
+ "grad_norm": 71.54349228655005,
12134
+ "learning_rate": 4.941470787783131e-09,
12135
+ "logits/chosen": -1.2420802116394043,
12136
+ "logits/rejected": -1.2955926656723022,
12137
+ "logps/chosen": -153.46408081054688,
12138
+ "logps/rejected": -185.44371032714844,
12139
+ "loss": 0.4323,
12140
+ "rewards/accuracies": 0.8125,
12141
+ "rewards/chosen": -0.4371834695339203,
12142
+ "rewards/margins": 1.0911630392074585,
12143
+ "rewards/rejected": -1.5283464193344116,
12144
+ "step": 1586
12145
+ },
12146
+ {
12147
+ "epoch": 1.810847409307961,
12148
+ "grad_norm": 43.02051086457159,
12149
+ "learning_rate": 4.8240775095352515e-09,
12150
+ "logits/chosen": -1.2829195261001587,
12151
+ "logits/rejected": -1.3175885677337646,
12152
+ "logps/chosen": -147.54412841796875,
12153
+ "logps/rejected": -172.52725219726562,
12154
+ "loss": 0.3904,
12155
+ "rewards/accuracies": 0.84375,
12156
+ "rewards/chosen": -0.4066880941390991,
12157
+ "rewards/margins": 0.901046633720398,
12158
+ "rewards/rejected": -1.307734727859497,
12159
+ "step": 1588
12160
+ },
12161
+ {
12162
+ "epoch": 1.8131280735514217,
12163
+ "grad_norm": 56.502783078071204,
12164
+ "learning_rate": 4.708061016592923e-09,
12165
+ "logits/chosen": -1.327852725982666,
12166
+ "logits/rejected": -1.3697575330734253,
12167
+ "logps/chosen": -178.9647216796875,
12168
+ "logps/rejected": -206.71951293945312,
12169
+ "loss": 0.4688,
12170
+ "rewards/accuracies": 0.78125,
12171
+ "rewards/chosen": -0.5031104683876038,
12172
+ "rewards/margins": 1.0050235986709595,
12173
+ "rewards/rejected": -1.5081340074539185,
12174
+ "step": 1590
12175
+ },
12176
+ {
12177
+ "epoch": 1.8154087377948827,
12178
+ "grad_norm": 52.124874452064724,
12179
+ "learning_rate": 4.593422987214668e-09,
12180
+ "logits/chosen": -1.2299569845199585,
12181
+ "logits/rejected": -1.276241421699524,
12182
+ "logps/chosen": -146.35475158691406,
12183
+ "logps/rejected": -174.0493621826172,
12184
+ "loss": 0.4263,
12185
+ "rewards/accuracies": 0.96875,
12186
+ "rewards/chosen": 0.01635855622589588,
12187
+ "rewards/margins": 1.0491864681243896,
12188
+ "rewards/rejected": -1.0328278541564941,
12189
+ "step": 1592
12190
+ },
12191
+ {
12192
+ "epoch": 1.8176894020383436,
12193
+ "grad_norm": 52.42542929704426,
12194
+ "learning_rate": 4.480165079718568e-09,
12195
+ "logits/chosen": -1.2522296905517578,
12196
+ "logits/rejected": -1.2631944417953491,
12197
+ "logps/chosen": -195.42518615722656,
12198
+ "logps/rejected": -221.43161010742188,
12199
+ "loss": 0.4216,
12200
+ "rewards/accuracies": 0.90625,
12201
+ "rewards/chosen": -0.45845019817352295,
12202
+ "rewards/margins": 1.135830044746399,
12203
+ "rewards/rejected": -1.5942802429199219,
12204
+ "step": 1594
12205
+ },
12206
+ {
12207
+ "epoch": 1.8199700662818046,
12208
+ "grad_norm": 64.83240480126588,
12209
+ "learning_rate": 4.368288932458308e-09,
12210
+ "logits/chosen": -1.3056426048278809,
12211
+ "logits/rejected": -1.2827690839767456,
12212
+ "logps/chosen": -172.3594207763672,
12213
+ "logps/rejected": -187.4986114501953,
12214
+ "loss": 0.4437,
12215
+ "rewards/accuracies": 0.8125,
12216
+ "rewards/chosen": -0.4361146092414856,
12217
+ "rewards/margins": 0.8268535733222961,
12218
+ "rewards/rejected": -1.2629680633544922,
12219
+ "step": 1596
12220
+ },
12221
+ {
12222
+ "epoch": 1.8222507305252655,
12223
+ "grad_norm": 52.10599638998205,
12224
+ "learning_rate": 4.257796163799454e-09,
12225
+ "logits/chosen": -1.1653319597244263,
12226
+ "logits/rejected": -1.2118003368377686,
12227
+ "logps/chosen": -109.62616729736328,
12228
+ "logps/rejected": -133.0435028076172,
12229
+ "loss": 0.4041,
12230
+ "rewards/accuracies": 0.75,
12231
+ "rewards/chosen": -0.14802826941013336,
12232
+ "rewards/margins": 0.935161828994751,
12233
+ "rewards/rejected": -1.0831902027130127,
12234
+ "step": 1598
12235
+ },
12236
+ {
12237
+ "epoch": 1.8245313947687265,
12238
+ "grad_norm": 55.52964788045627,
12239
+ "learning_rate": 4.1486883720960435e-09,
12240
+ "logits/chosen": -1.1933330297470093,
12241
+ "logits/rejected": -1.228639006614685,
12242
+ "logps/chosen": -149.2159423828125,
12243
+ "logps/rejected": -200.82742309570312,
12244
+ "loss": 0.3988,
12245
+ "rewards/accuracies": 0.875,
12246
+ "rewards/chosen": -0.32056179642677307,
12247
+ "rewards/margins": 1.0430424213409424,
12248
+ "rewards/rejected": -1.3636044263839722,
12249
+ "step": 1600
12250
+ },
12251
+ {
12252
+ "epoch": 1.8245313947687265,
12253
+ "eval_logits/chosen": -1.3431406021118164,
12254
+ "eval_logits/rejected": -1.3253653049468994,
12255
+ "eval_logps/chosen": -132.28553771972656,
12256
+ "eval_logps/rejected": -139.2881317138672,
12257
+ "eval_loss": 0.5448750853538513,
12258
+ "eval_rewards/accuracies": 0.7200000286102295,
12259
+ "eval_rewards/chosen": -0.23875679075717926,
12260
+ "eval_rewards/margins": 0.48378121852874756,
12261
+ "eval_rewards/rejected": -0.7225379943847656,
12262
+ "eval_runtime": 20.9101,
12263
+ "eval_samples_per_second": 4.782,
12264
+ "eval_steps_per_second": 1.196,
12265
+ "step": 1600
12266
+ },
12267
+ {
12268
+ "epoch": 1.8268120590121875,
12269
+ "grad_norm": 52.449005735628376,
12270
+ "learning_rate": 4.040967135667472e-09,
12271
+ "logits/chosen": -1.3316993713378906,
12272
+ "logits/rejected": -1.4035625457763672,
12273
+ "logps/chosen": -134.9228057861328,
12274
+ "logps/rejected": -165.58567810058594,
12275
+ "loss": 0.408,
12276
+ "rewards/accuracies": 0.75,
12277
+ "rewards/chosen": -0.16615627706050873,
12278
+ "rewards/margins": 0.7779840230941772,
12279
+ "rewards/rejected": -0.9441402554512024,
12280
+ "step": 1602
12281
+ },
12282
+ {
12283
+ "epoch": 1.8290927232556482,
12284
+ "grad_norm": 58.59433738136844,
12285
+ "learning_rate": 3.9346340127756616e-09,
12286
+ "logits/chosen": -1.3039021492004395,
12287
+ "logits/rejected": -1.3089298009872437,
12288
+ "logps/chosen": -196.00987243652344,
12289
+ "logps/rejected": -218.62310791015625,
12290
+ "loss": 0.4161,
12291
+ "rewards/accuracies": 0.875,
12292
+ "rewards/chosen": -0.46055513620376587,
12293
+ "rewards/margins": 0.8772752285003662,
12294
+ "rewards/rejected": -1.3378304243087769,
12295
+ "step": 1604
12296
+ },
12297
+ {
12298
+ "epoch": 1.8313733874991092,
12299
+ "grad_norm": 63.586529609086426,
12300
+ "learning_rate": 3.829690541602504e-09,
12301
+ "logits/chosen": -1.173471450805664,
12302
+ "logits/rejected": -1.27016282081604,
12303
+ "logps/chosen": -162.02359008789062,
12304
+ "logps/rejected": -195.39901733398438,
12305
+ "loss": 0.3966,
12306
+ "rewards/accuracies": 0.84375,
12307
+ "rewards/chosen": -0.4807528853416443,
12308
+ "rewards/margins": 1.1842964887619019,
12309
+ "rewards/rejected": -1.665049433708191,
12310
+ "step": 1606
12311
+ },
12312
+ {
12313
+ "epoch": 1.83365405174257,
12314
+ "grad_norm": 59.32975592376087,
12315
+ "learning_rate": 3.726138240227628e-09,
12316
+ "logits/chosen": -1.255246639251709,
12317
+ "logits/rejected": -1.3199628591537476,
12318
+ "logps/chosen": -132.19796752929688,
12319
+ "logps/rejected": -187.007568359375,
12320
+ "loss": 0.433,
12321
+ "rewards/accuracies": 0.875,
12322
+ "rewards/chosen": -0.2542805075645447,
12323
+ "rewards/margins": 1.1002681255340576,
12324
+ "rewards/rejected": -1.3545485734939575,
12325
+ "step": 1608
12326
+ },
12327
+ {
12328
+ "epoch": 1.8359347159860309,
12329
+ "grad_norm": 53.62231825347675,
12330
+ "learning_rate": 3.623978606606426e-09,
12331
+ "logits/chosen": -1.165490984916687,
12332
+ "logits/rejected": -1.1475247144699097,
12333
+ "logps/chosen": -173.6094970703125,
12334
+ "logps/rejected": -202.46153259277344,
12335
+ "loss": 0.3824,
12336
+ "rewards/accuracies": 0.75,
12337
+ "rewards/chosen": -0.47014373540878296,
12338
+ "rewards/margins": 0.9737652540206909,
12339
+ "rewards/rejected": -1.4439090490341187,
12340
+ "step": 1610
12341
+ },
12342
+ {
12343
+ "epoch": 1.8382153802294918,
12344
+ "grad_norm": 54.19282245857111,
12345
+ "learning_rate": 3.523213118548407e-09,
12346
+ "logits/chosen": -1.2416139841079712,
12347
+ "logits/rejected": -1.278630256652832,
12348
+ "logps/chosen": -152.09185791015625,
12349
+ "logps/rejected": -186.2200164794922,
12350
+ "loss": 0.4421,
12351
+ "rewards/accuracies": 0.78125,
12352
+ "rewards/chosen": -0.47047847509384155,
12353
+ "rewards/margins": 0.9149271845817566,
12354
+ "rewards/rejected": -1.3854056596755981,
12355
+ "step": 1612
12356
+ },
12357
+ {
12358
+ "epoch": 1.8404960444729528,
12359
+ "grad_norm": 66.35928406172948,
12360
+ "learning_rate": 3.423843233695789e-09,
12361
+ "logits/chosen": -1.2137271165847778,
12362
+ "logits/rejected": -1.209067940711975,
12363
+ "logps/chosen": -162.6099853515625,
12364
+ "logps/rejected": -168.80848693847656,
12365
+ "loss": 0.5097,
12366
+ "rewards/accuracies": 0.8125,
12367
+ "rewards/chosen": -0.4765735864639282,
12368
+ "rewards/margins": 0.7680255174636841,
12369
+ "rewards/rejected": -1.2445989847183228,
12370
+ "step": 1614
12371
+ },
12372
+ {
12373
+ "epoch": 1.8427767087164137,
12374
+ "grad_norm": 76.5553780804754,
12375
+ "learning_rate": 3.3258703895024386e-09,
12376
+ "logits/chosen": -1.2277370691299438,
12377
+ "logits/rejected": -1.2694729566574097,
12378
+ "logps/chosen": -167.89581298828125,
12379
+ "logps/rejected": -190.85592651367188,
12380
+ "loss": 0.448,
12381
+ "rewards/accuracies": 0.6875,
12382
+ "rewards/chosen": -0.586859941482544,
12383
+ "rewards/margins": 0.8319023847579956,
12384
+ "rewards/rejected": -1.4187625646591187,
12385
+ "step": 1616
12386
+ },
12387
+ {
12388
+ "epoch": 1.8450573729598747,
12389
+ "grad_norm": 62.481075662440986,
12390
+ "learning_rate": 3.2292960032130557e-09,
12391
+ "logits/chosen": -1.3022971153259277,
12392
+ "logits/rejected": -1.3909885883331299,
12393
+ "logps/chosen": -138.24668884277344,
12394
+ "logps/rejected": -144.76327514648438,
12395
+ "loss": 0.5064,
12396
+ "rewards/accuracies": 0.75,
12397
+ "rewards/chosen": -0.29410520195961,
12398
+ "rewards/margins": 0.5049476623535156,
12399
+ "rewards/rejected": -0.7990528345108032,
12400
+ "step": 1618
12401
+ },
12402
+ {
12403
+ "epoch": 1.8473380372033354,
12404
+ "grad_norm": 49.40037975448036,
12405
+ "learning_rate": 3.134121471842688e-09,
12406
+ "logits/chosen": -1.3413515090942383,
12407
+ "logits/rejected": -1.3459126949310303,
12408
+ "logps/chosen": -168.7082977294922,
12409
+ "logps/rejected": -210.79649353027344,
12410
+ "loss": 0.3703,
12411
+ "rewards/accuracies": 0.8125,
12412
+ "rewards/chosen": -0.37254247069358826,
12413
+ "rewards/margins": 1.4622108936309814,
12414
+ "rewards/rejected": -1.8347532749176025,
12415
+ "step": 1620
12416
+ },
12417
+ {
12418
+ "epoch": 1.8496187014467964,
12419
+ "grad_norm": 64.64467682395964,
12420
+ "learning_rate": 3.0403481721565373e-09,
12421
+ "logits/chosen": -1.165281891822815,
12422
+ "logits/rejected": -1.2723007202148438,
12423
+ "logps/chosen": -136.62118530273438,
12424
+ "logps/rejected": -162.6814422607422,
12425
+ "loss": 0.4646,
12426
+ "rewards/accuracies": 0.78125,
12427
+ "rewards/chosen": -0.21550993621349335,
12428
+ "rewards/margins": 0.7950283288955688,
12429
+ "rewards/rejected": -1.0105382204055786,
12430
+ "step": 1622
12431
+ },
12432
+ {
12433
+ "epoch": 1.8518993656902571,
12434
+ "grad_norm": 52.728229173624364,
12435
+ "learning_rate": 2.947977460649975e-09,
12436
+ "logits/chosen": -1.2271634340286255,
12437
+ "logits/rejected": -1.2183120250701904,
12438
+ "logps/chosen": -120.3056640625,
12439
+ "logps/rejected": -152.6262664794922,
12440
+ "loss": 0.4505,
12441
+ "rewards/accuracies": 0.8125,
12442
+ "rewards/chosen": -0.4195891320705414,
12443
+ "rewards/margins": 0.9882142543792725,
12444
+ "rewards/rejected": -1.4078034162521362,
12445
+ "step": 1624
12446
+ },
12447
+ {
12448
+ "epoch": 1.854180029933718,
12449
+ "grad_norm": 69.89183140204983,
12450
+ "learning_rate": 2.8570106735290144e-09,
12451
+ "logits/chosen": -1.2754062414169312,
12452
+ "logits/rejected": -1.285915732383728,
12453
+ "logps/chosen": -148.9326171875,
12454
+ "logps/rejected": -176.26077270507812,
12455
+ "loss": 0.4184,
12456
+ "rewards/accuracies": 0.75,
12457
+ "rewards/chosen": -0.42058467864990234,
12458
+ "rewards/margins": 1.0723397731781006,
12459
+ "rewards/rejected": -1.492924451828003,
12460
+ "step": 1626
12461
+ },
12462
+ {
12463
+ "epoch": 1.856460694177179,
12464
+ "grad_norm": 78.54847866654863,
12465
+ "learning_rate": 2.7674491266909016e-09,
12466
+ "logits/chosen": -1.2117140293121338,
12467
+ "logits/rejected": -1.2298004627227783,
12468
+ "logps/chosen": -187.1058807373047,
12469
+ "logps/rejected": -215.0411376953125,
12470
+ "loss": 0.4243,
12471
+ "rewards/accuracies": 0.875,
12472
+ "rewards/chosen": -0.9483327865600586,
12473
+ "rewards/margins": 1.1213573217391968,
12474
+ "rewards/rejected": -2.069690227508545,
12475
+ "step": 1628
12476
+ },
12477
+ {
12478
+ "epoch": 1.85874135842064,
12479
+ "grad_norm": 55.582996135186875,
12480
+ "learning_rate": 2.679294115705144e-09,
12481
+ "logits/chosen": -1.2989763021469116,
12482
+ "logits/rejected": -1.425642967224121,
12483
+ "logps/chosen": -191.18502807617188,
12484
+ "logps/rejected": -235.31173706054688,
12485
+ "loss": 0.3977,
12486
+ "rewards/accuracies": 0.84375,
12487
+ "rewards/chosen": -0.30440258979797363,
12488
+ "rewards/margins": 1.0316964387893677,
12489
+ "rewards/rejected": -1.3360989093780518,
12490
+ "step": 1630
12491
+ },
12492
+ {
12493
+ "epoch": 1.861022022664101,
12494
+ "grad_norm": 51.611066982113556,
12495
+ "learning_rate": 2.5925469157947135e-09,
12496
+ "logits/chosen": -1.2190608978271484,
12497
+ "logits/rejected": -1.2017196416854858,
12498
+ "logps/chosen": -209.8916778564453,
12499
+ "logps/rejected": -251.41043090820312,
12500
+ "loss": 0.3837,
12501
+ "rewards/accuracies": 0.875,
12502
+ "rewards/chosen": -0.5788516402244568,
12503
+ "rewards/margins": 1.5164175033569336,
12504
+ "rewards/rejected": -2.095268964767456,
12505
+ "step": 1632
12506
+ },
12507
+ {
12508
+ "epoch": 1.863302686907562,
12509
+ "grad_norm": 61.654874361770986,
12510
+ "learning_rate": 2.507208781817638e-09,
12511
+ "logits/chosen": -1.2814161777496338,
12512
+ "logits/rejected": -1.4046311378479004,
12513
+ "logps/chosen": -190.91163635253906,
12514
+ "logps/rejected": -240.64810180664062,
12515
+ "loss": 0.4435,
12516
+ "rewards/accuracies": 0.84375,
12517
+ "rewards/chosen": -0.6178247928619385,
12518
+ "rewards/margins": 1.0759724378585815,
12519
+ "rewards/rejected": -1.6937971115112305,
12520
+ "step": 1634
12521
+ },
12522
+ {
12523
+ "epoch": 1.8655833511510227,
12524
+ "grad_norm": 65.59685553528632,
12525
+ "learning_rate": 2.4232809482488403e-09,
12526
+ "logits/chosen": -1.204872965812683,
12527
+ "logits/rejected": -1.2012598514556885,
12528
+ "logps/chosen": -180.18150329589844,
12529
+ "logps/rejected": -206.04872131347656,
12530
+ "loss": 0.4425,
12531
+ "rewards/accuracies": 0.75,
12532
+ "rewards/chosen": -0.6783896684646606,
12533
+ "rewards/margins": 1.0068707466125488,
12534
+ "rewards/rejected": -1.68526029586792,
12535
+ "step": 1636
12536
+ },
12537
+ {
12538
+ "epoch": 1.8678640153944837,
12539
+ "grad_norm": 60.54718272304763,
12540
+ "learning_rate": 2.340764629162284e-09,
12541
+ "logits/chosen": -1.1621663570404053,
12542
+ "logits/rejected": -1.2637563943862915,
12543
+ "logps/chosen": -178.81527709960938,
12544
+ "logps/rejected": -228.64007568359375,
12545
+ "loss": 0.3834,
12546
+ "rewards/accuracies": 0.8125,
12547
+ "rewards/chosen": -0.3456554114818573,
12548
+ "rewards/margins": 0.9540256261825562,
12549
+ "rewards/rejected": -1.2996809482574463,
12550
+ "step": 1638
12551
+ },
12552
+ {
12553
+ "epoch": 1.8701446796379444,
12554
+ "grad_norm": 55.533431370620015,
12555
+ "learning_rate": 2.2596610182133325e-09,
12556
+ "logits/chosen": -1.2759058475494385,
12557
+ "logits/rejected": -1.3463444709777832,
12558
+ "logps/chosen": -156.00009155273438,
12559
+ "logps/rejected": -192.7170867919922,
12560
+ "loss": 0.4157,
12561
+ "rewards/accuracies": 0.90625,
12562
+ "rewards/chosen": -0.003083046991378069,
12563
+ "rewards/margins": 1.3031508922576904,
12564
+ "rewards/rejected": -1.3062340021133423,
12565
+ "step": 1640
12566
+ },
12567
+ {
12568
+ "epoch": 1.8724253438814054,
12569
+ "grad_norm": 62.10012065390903,
12570
+ "learning_rate": 2.1799712886216628e-09,
12571
+ "logits/chosen": -1.1718287467956543,
12572
+ "logits/rejected": -1.1970500946044922,
12573
+ "logps/chosen": -146.75628662109375,
12574
+ "logps/rejected": -208.56558227539062,
12575
+ "loss": 0.3858,
12576
+ "rewards/accuracies": 0.875,
12577
+ "rewards/chosen": -0.1122959554195404,
12578
+ "rewards/margins": 1.5379095077514648,
12579
+ "rewards/rejected": -1.650205373764038,
12580
+ "step": 1642
12581
+ },
12582
+ {
12583
+ "epoch": 1.8747060081248663,
12584
+ "grad_norm": 56.04768532231553,
12585
+ "learning_rate": 2.1016965931541007e-09,
12586
+ "logits/chosen": -1.253339171409607,
12587
+ "logits/rejected": -1.3260321617126465,
12588
+ "logps/chosen": -207.69583129882812,
12589
+ "logps/rejected": -243.12586975097656,
12590
+ "loss": 0.4223,
12591
+ "rewards/accuracies": 0.84375,
12592
+ "rewards/chosen": -0.5657081604003906,
12593
+ "rewards/margins": 1.0796537399291992,
12594
+ "rewards/rejected": -1.6453620195388794,
12595
+ "step": 1644
12596
+ },
12597
+ {
12598
+ "epoch": 1.8769866723683273,
12599
+ "grad_norm": 60.14746373631622,
12600
+ "learning_rate": 2.02483806410807e-09,
12601
+ "logits/chosen": -1.114426851272583,
12602
+ "logits/rejected": -1.2585283517837524,
12603
+ "logps/chosen": -144.21237182617188,
12604
+ "logps/rejected": -216.4067840576172,
12605
+ "loss": 0.4165,
12606
+ "rewards/accuracies": 0.875,
12607
+ "rewards/chosen": -0.6168836951255798,
12608
+ "rewards/margins": 1.211737036705017,
12609
+ "rewards/rejected": -1.8286206722259521,
12610
+ "step": 1646
12611
+ },
12612
+ {
12613
+ "epoch": 1.8792673366117882,
12614
+ "grad_norm": 58.35041588601004,
12615
+ "learning_rate": 1.9493968132951455e-09,
12616
+ "logits/chosen": -1.2065017223358154,
12617
+ "logits/rejected": -1.2185966968536377,
12618
+ "logps/chosen": -144.52651977539062,
12619
+ "logps/rejected": -182.16015625,
12620
+ "loss": 0.4119,
12621
+ "rewards/accuracies": 0.875,
12622
+ "rewards/chosen": -0.5021862387657166,
12623
+ "rewards/margins": 1.0205962657928467,
12624
+ "rewards/rejected": -1.522782564163208,
12625
+ "step": 1648
12626
+ },
12627
+ {
12628
+ "epoch": 1.8815480008552492,
12629
+ "grad_norm": 61.66500286457481,
12630
+ "learning_rate": 1.875373932025015e-09,
12631
+ "logits/chosen": -1.113441824913025,
12632
+ "logits/rejected": -1.2277448177337646,
12633
+ "logps/chosen": -88.60232543945312,
12634
+ "logps/rejected": -123.64103698730469,
12635
+ "loss": 0.5025,
12636
+ "rewards/accuracies": 0.6875,
12637
+ "rewards/chosen": -0.3603760600090027,
12638
+ "rewards/margins": 0.6354212760925293,
12639
+ "rewards/rejected": -0.9957974553108215,
12640
+ "step": 1650
12641
+ },
12642
+ {
12643
+ "epoch": 1.8838286650987102,
12644
+ "grad_norm": 66.53236233989348,
12645
+ "learning_rate": 1.8027704910896668e-09,
12646
+ "logits/chosen": -1.2696802616119385,
12647
+ "logits/rejected": -1.293856143951416,
12648
+ "logps/chosen": -178.1632843017578,
12649
+ "logps/rejected": -208.1569061279297,
12650
+ "loss": 0.4141,
12651
+ "rewards/accuracies": 0.84375,
12652
+ "rewards/chosen": -0.4311864674091339,
12653
+ "rewards/margins": 1.18040931224823,
12654
+ "rewards/rejected": -1.611595869064331,
12655
+ "step": 1652
12656
+ },
12657
+ {
12658
+ "epoch": 1.886109329342171,
12659
+ "grad_norm": 73.576105680566,
12660
+ "learning_rate": 1.731587540747903e-09,
12661
+ "logits/chosen": -1.3243728876113892,
12662
+ "logits/rejected": -1.3624733686447144,
12663
+ "logps/chosen": -178.9629669189453,
12664
+ "logps/rejected": -203.73611450195312,
12665
+ "loss": 0.4112,
12666
+ "rewards/accuracies": 0.84375,
12667
+ "rewards/chosen": -0.35923391580581665,
12668
+ "rewards/margins": 1.0950078964233398,
12669
+ "rewards/rejected": -1.4542417526245117,
12670
+ "step": 1654
12671
+ },
12672
+ {
12673
+ "epoch": 1.8883899935856319,
12674
+ "grad_norm": 60.917071878292,
12675
+ "learning_rate": 1.6618261107101628e-09,
12676
+ "logits/chosen": -1.2426798343658447,
12677
+ "logits/rejected": -1.299846887588501,
12678
+ "logps/chosen": -166.9674530029297,
12679
+ "logps/rejected": -200.53256225585938,
12680
+ "loss": 0.4225,
12681
+ "rewards/accuracies": 0.875,
12682
+ "rewards/chosen": -0.36187708377838135,
12683
+ "rewards/margins": 1.0805463790893555,
12684
+ "rewards/rejected": -1.4424233436584473,
12685
+ "step": 1656
12686
+ },
12687
+ {
12688
+ "epoch": 1.8906706578290926,
12689
+ "grad_norm": 58.315768026639354,
12690
+ "learning_rate": 1.5934872101235785e-09,
12691
+ "logits/chosen": -1.1988379955291748,
12692
+ "logits/rejected": -1.276864767074585,
12693
+ "logps/chosen": -117.3199691772461,
12694
+ "logps/rejected": -166.17037963867188,
12695
+ "loss": 0.4669,
12696
+ "rewards/accuracies": 0.6875,
12697
+ "rewards/chosen": -0.37138426303863525,
12698
+ "rewards/margins": 0.7784909009933472,
12699
+ "rewards/rejected": -1.1498751640319824,
12700
+ "step": 1658
12701
+ },
12702
+ {
12703
+ "epoch": 1.8929513220725536,
12704
+ "grad_norm": 73.00783189642374,
12705
+ "learning_rate": 1.5265718275574656e-09,
12706
+ "logits/chosen": -1.199881672859192,
12707
+ "logits/rejected": -1.294306755065918,
12708
+ "logps/chosen": -163.18499755859375,
12709
+ "logps/rejected": -240.10389709472656,
12710
+ "loss": 0.4091,
12711
+ "rewards/accuracies": 0.90625,
12712
+ "rewards/chosen": -0.5089311599731445,
12713
+ "rewards/margins": 1.5384039878845215,
12714
+ "rewards/rejected": -2.047335147857666,
12715
+ "step": 1660
12716
+ },
12717
+ {
12718
+ "epoch": 1.8952319863160145,
12719
+ "grad_norm": 59.87965880073438,
12720
+ "learning_rate": 1.4610809309889338e-09,
12721
+ "logits/chosen": -1.1384081840515137,
12722
+ "logits/rejected": -1.2086718082427979,
12723
+ "logps/chosen": -228.3030548095703,
12724
+ "logps/rejected": -291.9572448730469,
12725
+ "loss": 0.3708,
12726
+ "rewards/accuracies": 0.875,
12727
+ "rewards/chosen": -0.6747040748596191,
12728
+ "rewards/margins": 1.962480068206787,
12729
+ "rewards/rejected": -2.6371843814849854,
12730
+ "step": 1662
12731
+ },
12732
+ {
12733
+ "epoch": 1.8975126505594755,
12734
+ "grad_norm": 65.04049810626964,
12735
+ "learning_rate": 1.3970154677889312e-09,
12736
+ "logits/chosen": -1.2462139129638672,
12737
+ "logits/rejected": -1.3531625270843506,
12738
+ "logps/chosen": -166.90621948242188,
12739
+ "logps/rejected": -219.62258911132812,
12740
+ "loss": 0.4477,
12741
+ "rewards/accuracies": 0.78125,
12742
+ "rewards/chosen": -0.6883362531661987,
12743
+ "rewards/margins": 1.3010790348052979,
12744
+ "rewards/rejected": -1.989415168762207,
12745
+ "step": 1664
12746
+ },
12747
+ {
12748
+ "epoch": 1.8997933148029365,
12749
+ "grad_norm": 67.17708620355747,
12750
+ "learning_rate": 1.3343763647085337e-09,
12751
+ "logits/chosen": -1.149139404296875,
12752
+ "logits/rejected": -1.2173185348510742,
12753
+ "logps/chosen": -155.94863891601562,
12754
+ "logps/rejected": -230.28384399414062,
12755
+ "loss": 0.4712,
12756
+ "rewards/accuracies": 0.84375,
12757
+ "rewards/chosen": -0.5960929989814758,
12758
+ "rewards/margins": 1.4794337749481201,
12759
+ "rewards/rejected": -2.075526714324951,
12760
+ "step": 1666
12761
+ },
12762
+ {
12763
+ "epoch": 1.9020739790463974,
12764
+ "grad_norm": 56.97928801975319,
12765
+ "learning_rate": 1.2731645278655445e-09,
12766
+ "logits/chosen": -1.2667725086212158,
12767
+ "logits/rejected": -1.3436360359191895,
12768
+ "logps/chosen": -160.5927734375,
12769
+ "logps/rejected": -186.63018798828125,
12770
+ "loss": 0.4321,
12771
+ "rewards/accuracies": 0.8125,
12772
+ "rewards/chosen": -0.48528626561164856,
12773
+ "rewards/margins": 0.931164562702179,
12774
+ "rewards/rejected": -1.4164507389068604,
12775
+ "step": 1668
12776
+ },
12777
+ {
12778
+ "epoch": 1.9043546432898582,
12779
+ "grad_norm": 63.8162468661416,
12780
+ "learning_rate": 1.2133808427313485e-09,
12781
+ "logits/chosen": -1.2804149389266968,
12782
+ "logits/rejected": -1.3368322849273682,
12783
+ "logps/chosen": -131.45181274414062,
12784
+ "logps/rejected": -180.07037353515625,
12785
+ "loss": 0.4551,
12786
+ "rewards/accuracies": 0.65625,
12787
+ "rewards/chosen": -0.5746269226074219,
12788
+ "rewards/margins": 0.8660170435905457,
12789
+ "rewards/rejected": -1.4406440258026123,
12790
+ "step": 1670
12791
+ },
12792
+ {
12793
+ "epoch": 1.9066353075333191,
12794
+ "grad_norm": 52.46121205171607,
12795
+ "learning_rate": 1.1550261741181565e-09,
12796
+ "logits/chosen": -1.2648401260375977,
12797
+ "logits/rejected": -1.239923119544983,
12798
+ "logps/chosen": -163.7249298095703,
12799
+ "logps/rejected": -190.42611694335938,
12800
+ "loss": 0.3885,
12801
+ "rewards/accuracies": 0.84375,
12802
+ "rewards/chosen": -0.3933155834674835,
12803
+ "rewards/margins": 1.0268510580062866,
12804
+ "rewards/rejected": -1.4201666116714478,
12805
+ "step": 1672
12806
+ },
12807
+ {
12808
+ "epoch": 1.9089159717767799,
12809
+ "grad_norm": 64.22449427271324,
12810
+ "learning_rate": 1.0981013661664706e-09,
12811
+ "logits/chosen": -1.293068289756775,
12812
+ "logits/rejected": -1.4030743837356567,
12813
+ "logps/chosen": -174.76348876953125,
12814
+ "logps/rejected": -219.83175659179688,
12815
+ "loss": 0.4548,
12816
+ "rewards/accuracies": 0.75,
12817
+ "rewards/chosen": -0.3287160098552704,
12818
+ "rewards/margins": 0.9871301651000977,
12819
+ "rewards/rejected": -1.3158462047576904,
12820
+ "step": 1674
12821
+ },
12822
+ {
12823
+ "epoch": 1.9111966360202408,
12824
+ "grad_norm": 55.77569297347896,
12825
+ "learning_rate": 1.042607242332838e-09,
12826
+ "logits/chosen": -1.2760488986968994,
12827
+ "logits/rejected": -1.31367027759552,
12828
+ "logps/chosen": -148.86257934570312,
12829
+ "logps/rejected": -173.52359008789062,
12830
+ "loss": 0.4349,
12831
+ "rewards/accuracies": 0.78125,
12832
+ "rewards/chosen": -0.24598023295402527,
12833
+ "rewards/margins": 0.7863931655883789,
12834
+ "rewards/rejected": -1.0323734283447266,
12835
+ "step": 1676
12836
+ },
12837
+ {
12838
+ "epoch": 1.9134773002637018,
12839
+ "grad_norm": 57.697067279665426,
12840
+ "learning_rate": 9.885446053780277e-10,
12841
+ "logits/chosen": -1.3217543363571167,
12842
+ "logits/rejected": -1.3588594198226929,
12843
+ "logps/chosen": -234.18218994140625,
12844
+ "logps/rejected": -244.6886444091797,
12845
+ "loss": 0.4099,
12846
+ "rewards/accuracies": 0.78125,
12847
+ "rewards/chosen": -0.6677453517913818,
12848
+ "rewards/margins": 0.864335834980011,
12849
+ "rewards/rejected": -1.5320810079574585,
12850
+ "step": 1678
12851
+ },
12852
+ {
12853
+ "epoch": 1.9157579645071627,
12854
+ "grad_norm": 52.93198869808111,
12855
+ "learning_rate": 9.359142373553286e-10,
12856
+ "logits/chosen": -1.2812589406967163,
12857
+ "logits/rejected": -1.374354362487793,
12858
+ "logps/chosen": -181.33377075195312,
12859
+ "logps/rejected": -226.33584594726562,
12860
+ "loss": 0.3969,
12861
+ "rewards/accuracies": 0.84375,
12862
+ "rewards/chosen": -0.40316393971443176,
12863
+ "rewards/margins": 1.0113019943237305,
12864
+ "rewards/rejected": -1.4144660234451294,
12865
+ "step": 1680
12866
+ },
12867
+ {
12868
+ "epoch": 1.9180386287506237,
12869
+ "grad_norm": 56.81221018582624,
12870
+ "learning_rate": 8.847168995992915e-10,
12871
+ "logits/chosen": -1.3222296237945557,
12872
+ "logits/rejected": -1.3470890522003174,
12873
+ "logps/chosen": -144.92401123046875,
12874
+ "logps/rejected": -160.66360473632812,
12875
+ "loss": 0.4557,
12876
+ "rewards/accuracies": 0.8125,
12877
+ "rewards/chosen": -0.2944653630256653,
12878
+ "rewards/margins": 0.691783607006073,
12879
+ "rewards/rejected": -0.9862489104270935,
12880
+ "step": 1682
12881
+ },
12882
+ {
12883
+ "epoch": 1.9203192929940847,
12884
+ "grad_norm": 59.96217669380799,
12885
+ "learning_rate": 8.349533327146719e-10,
12886
+ "logits/chosen": -1.1826375722885132,
12887
+ "logits/rejected": -1.2797571420669556,
12888
+ "logps/chosen": -173.0347442626953,
12889
+ "logps/rejected": -217.1268768310547,
12890
+ "loss": 0.421,
12891
+ "rewards/accuracies": 0.875,
12892
+ "rewards/chosen": -0.6681329011917114,
12893
+ "rewards/margins": 1.3352155685424805,
12894
+ "rewards/rejected": -2.0033483505249023,
12895
+ "step": 1684
12896
+ },
12897
+ {
12898
+ "epoch": 1.9225999572375454,
12899
+ "grad_norm": 53.71192157200429,
12900
+ "learning_rate": 7.866242565657599e-10,
12901
+ "logits/chosen": -1.133514642715454,
12902
+ "logits/rejected": -1.2035727500915527,
12903
+ "logps/chosen": -123.30015563964844,
12904
+ "logps/rejected": -165.7233123779297,
12905
+ "loss": 0.4164,
12906
+ "rewards/accuracies": 0.90625,
12907
+ "rewards/chosen": -0.38910388946533203,
12908
+ "rewards/margins": 1.0756738185882568,
12909
+ "rewards/rejected": -1.4647778272628784,
12910
+ "step": 1686
12911
+ },
12912
+ {
12913
+ "epoch": 1.9248806214810064,
12914
+ "grad_norm": 84.94214636470907,
12915
+ "learning_rate": 7.397303702659674e-10,
12916
+ "logits/chosen": -1.2931269407272339,
12917
+ "logits/rejected": -1.3728893995285034,
12918
+ "logps/chosen": -153.67669677734375,
12919
+ "logps/rejected": -198.1190185546875,
12920
+ "loss": 0.4512,
12921
+ "rewards/accuracies": 0.78125,
12922
+ "rewards/chosen": -0.3215530216693878,
12923
+ "rewards/margins": 1.3663368225097656,
12924
+ "rewards/rejected": -1.6878899335861206,
12925
+ "step": 1688
12926
+ },
12927
+ {
12928
+ "epoch": 1.927161285724467,
12929
+ "grad_norm": 60.620511783810954,
12930
+ "learning_rate": 6.942723521676464e-10,
12931
+ "logits/chosen": -1.2509236335754395,
12932
+ "logits/rejected": -1.3288919925689697,
12933
+ "logps/chosen": -171.9304962158203,
12934
+ "logps/rejected": -200.7103729248047,
12935
+ "loss": 0.406,
12936
+ "rewards/accuracies": 0.90625,
12937
+ "rewards/chosen": -0.2758704423904419,
12938
+ "rewards/margins": 0.8382859230041504,
12939
+ "rewards/rejected": -1.1141563653945923,
12940
+ "step": 1690
12941
+ },
12942
+ {
12943
+ "epoch": 1.929441949967928,
12944
+ "grad_norm": 61.96268516778484,
12945
+ "learning_rate": 6.502508598523748e-10,
12946
+ "logits/chosen": -1.1139042377471924,
12947
+ "logits/rejected": -1.113561987876892,
12948
+ "logps/chosen": -145.94715881347656,
12949
+ "logps/rejected": -177.67091369628906,
12950
+ "loss": 0.4153,
12951
+ "rewards/accuracies": 0.84375,
12952
+ "rewards/chosen": -0.3293991684913635,
12953
+ "rewards/margins": 0.9399027824401855,
12954
+ "rewards/rejected": -1.2693020105361938,
12955
+ "step": 1692
12956
+ },
12957
+ {
12958
+ "epoch": 1.931722614211389,
12959
+ "grad_norm": 64.38956995843415,
12960
+ "learning_rate": 6.076665301213646e-10,
12961
+ "logits/chosen": -1.2710590362548828,
12962
+ "logits/rejected": -1.3398025035858154,
12963
+ "logps/chosen": -258.7908935546875,
12964
+ "logps/rejected": -270.1191711425781,
12965
+ "loss": 0.4663,
12966
+ "rewards/accuracies": 0.75,
12967
+ "rewards/chosen": -1.183610200881958,
12968
+ "rewards/margins": 1.2962732315063477,
12969
+ "rewards/rejected": -2.4798836708068848,
12970
+ "step": 1694
12971
+ },
12972
+ {
12973
+ "epoch": 1.93400327845485,
12974
+ "grad_norm": 70.46099460182324,
12975
+ "learning_rate": 5.665199789862907e-10,
12976
+ "logits/chosen": -1.3359112739562988,
12977
+ "logits/rejected": -1.3275290727615356,
12978
+ "logps/chosen": -183.69839477539062,
12979
+ "logps/rejected": -183.45762634277344,
12980
+ "loss": 0.5587,
12981
+ "rewards/accuracies": 0.6875,
12982
+ "rewards/chosen": -0.5863617658615112,
12983
+ "rewards/margins": 0.39982593059539795,
12984
+ "rewards/rejected": -0.9861876368522644,
12985
+ "step": 1696
12986
+ },
12987
+ {
12988
+ "epoch": 1.936283942698311,
12989
+ "grad_norm": 59.14480772543764,
12990
+ "learning_rate": 5.268118016603651e-10,
12991
+ "logits/chosen": -1.2939709424972534,
12992
+ "logits/rejected": -1.2879596948623657,
12993
+ "logps/chosen": -242.49627685546875,
12994
+ "logps/rejected": -255.47325134277344,
12995
+ "loss": 0.5263,
12996
+ "rewards/accuracies": 0.875,
12997
+ "rewards/chosen": -0.6082537174224854,
12998
+ "rewards/margins": 0.8644734621047974,
12999
+ "rewards/rejected": -1.4727270603179932,
13000
+ "step": 1698
13001
+ },
13002
+ {
13003
+ "epoch": 1.938564606941772,
13004
+ "grad_norm": 65.14033380217037,
13005
+ "learning_rate": 4.88542572549755e-10,
13006
+ "logits/chosen": -1.258131742477417,
13007
+ "logits/rejected": -1.341509461402893,
13008
+ "logps/chosen": -280.3218688964844,
13009
+ "logps/rejected": -349.66766357421875,
13010
+ "loss": 0.4044,
13011
+ "rewards/accuracies": 0.875,
13012
+ "rewards/chosen": -0.9879501461982727,
13013
+ "rewards/margins": 1.8015985488891602,
13014
+ "rewards/rejected": -2.789548873901367,
13015
+ "step": 1700
13016
+ },
13017
+ {
13018
+ "epoch": 1.938564606941772,
13019
+ "eval_logits/chosen": -1.3368662595748901,
13020
+ "eval_logits/rejected": -1.3193824291229248,
13021
+ "eval_logps/chosen": -132.35743713378906,
13022
+ "eval_logps/rejected": -139.43797302246094,
13023
+ "eval_loss": 0.5376756191253662,
13024
+ "eval_rewards/accuracies": 0.7200000286102295,
13025
+ "eval_rewards/chosen": -0.2459474503993988,
13026
+ "eval_rewards/margins": 0.49157509207725525,
13027
+ "eval_rewards/rejected": -0.7375224828720093,
13028
+ "eval_runtime": 20.9786,
13029
+ "eval_samples_per_second": 4.767,
13030
+ "eval_steps_per_second": 1.192,
13031
+ "step": 1700
13032
+ },
13033
+ {
13034
+ "epoch": 1.9408452711852326,
13035
+ "grad_norm": 67.5831820584933,
13036
+ "learning_rate": 4.5171284524521127e-10,
13037
+ "logits/chosen": -1.1273996829986572,
13038
+ "logits/rejected": -1.200268030166626,
13039
+ "logps/chosen": -141.1074981689453,
13040
+ "logps/rejected": -161.90756225585938,
13041
+ "loss": 0.3986,
13042
+ "rewards/accuracies": 0.875,
13043
+ "rewards/chosen": -0.13401609659194946,
13044
+ "rewards/margins": 0.9844987392425537,
13045
+ "rewards/rejected": -1.118514895439148,
13046
+ "step": 1702
13047
+ },
13048
+ {
13049
+ "epoch": 1.9431259354286936,
13050
+ "grad_norm": 64.49222478839803,
13051
+ "learning_rate": 4.163231525141309e-10,
13052
+ "logits/chosen": -1.4064816236495972,
13053
+ "logits/rejected": -1.4361658096313477,
13054
+ "logps/chosen": -240.72976684570312,
13055
+ "logps/rejected": -262.8630676269531,
13056
+ "loss": 0.4409,
13057
+ "rewards/accuracies": 0.9375,
13058
+ "rewards/chosen": -0.6487561464309692,
13059
+ "rewards/margins": 1.2385480403900146,
13060
+ "rewards/rejected": -1.8873043060302734,
13061
+ "step": 1704
13062
+ },
13063
+ {
13064
+ "epoch": 1.9454065996721546,
13065
+ "grad_norm": 50.83672179405874,
13066
+ "learning_rate": 3.8237400629280714e-10,
13067
+ "logits/chosen": -1.0193315744400024,
13068
+ "logits/rejected": -0.9887692928314209,
13069
+ "logps/chosen": -120.17151641845703,
13070
+ "logps/rejected": -136.94674682617188,
13071
+ "loss": 0.4543,
13072
+ "rewards/accuracies": 0.78125,
13073
+ "rewards/chosen": -0.27739959955215454,
13074
+ "rewards/margins": 0.6725433468818665,
13075
+ "rewards/rejected": -0.949942946434021,
13076
+ "step": 1706
13077
+ },
13078
+ {
13079
+ "epoch": 1.9476872639156153,
13080
+ "grad_norm": 53.5967901694209,
13081
+ "learning_rate": 3.4986589767902476e-10,
13082
+ "logits/chosen": -1.2411226034164429,
13083
+ "logits/rejected": -1.3345215320587158,
13084
+ "logps/chosen": -90.0027084350586,
13085
+ "logps/rejected": -116.62909698486328,
13086
+ "loss": 0.4495,
13087
+ "rewards/accuracies": 0.78125,
13088
+ "rewards/chosen": -0.078713558614254,
13089
+ "rewards/margins": 0.6798267364501953,
13090
+ "rewards/rejected": -0.7585403323173523,
13091
+ "step": 1708
13092
+ },
13093
+ {
13094
+ "epoch": 1.9499679281590763,
13095
+ "grad_norm": 59.266256355757655,
13096
+ "learning_rate": 3.187992969249875e-10,
13097
+ "logits/chosen": -1.2389315366744995,
13098
+ "logits/rejected": -1.2326477766036987,
13099
+ "logps/chosen": -127.89494323730469,
13100
+ "logps/rejected": -154.4901580810547,
13101
+ "loss": 0.4366,
13102
+ "rewards/accuracies": 0.78125,
13103
+ "rewards/chosen": -0.4514698088169098,
13104
+ "rewards/margins": 0.6477700471878052,
13105
+ "rewards/rejected": -1.0992399454116821,
13106
+ "step": 1710
13107
+ },
13108
+ {
13109
+ "epoch": 1.9522485924025372,
13110
+ "grad_norm": 62.77597760270175,
13111
+ "learning_rate": 2.8917465343047954e-10,
13112
+ "logits/chosen": -1.3058414459228516,
13113
+ "logits/rejected": -1.374163269996643,
13114
+ "logps/chosen": -171.95448303222656,
13115
+ "logps/rejected": -207.97914123535156,
13116
+ "loss": 0.3709,
13117
+ "rewards/accuracies": 0.84375,
13118
+ "rewards/chosen": -0.32011568546295166,
13119
+ "rewards/margins": 1.2897684574127197,
13120
+ "rewards/rejected": -1.6098840236663818,
13121
+ "step": 1712
13122
+ },
13123
+ {
13124
+ "epoch": 1.9545292566459982,
13125
+ "grad_norm": 56.41697287299428,
13126
+ "learning_rate": 2.609923957363702e-10,
13127
+ "logits/chosen": -1.2662739753723145,
13128
+ "logits/rejected": -1.3142364025115967,
13129
+ "logps/chosen": -149.61746215820312,
13130
+ "logps/rejected": -184.44931030273438,
13131
+ "loss": 0.4197,
13132
+ "rewards/accuracies": 0.84375,
13133
+ "rewards/chosen": -0.28848132491111755,
13134
+ "rewards/margins": 1.1091067790985107,
13135
+ "rewards/rejected": -1.3975881338119507,
13136
+ "step": 1714
13137
+ },
13138
+ {
13139
+ "epoch": 1.9568099208894592,
13140
+ "grad_norm": 58.90785637715505,
13141
+ "learning_rate": 2.3425293151845273e-10,
13142
+ "logits/chosen": -1.2464115619659424,
13143
+ "logits/rejected": -1.2890043258666992,
13144
+ "logps/chosen": -146.8938446044922,
13145
+ "logps/rejected": -162.07830810546875,
13146
+ "loss": 0.4128,
13147
+ "rewards/accuracies": 0.8125,
13148
+ "rewards/chosen": -0.2892056405544281,
13149
+ "rewards/margins": 0.7288376092910767,
13150
+ "rewards/rejected": -1.0180431604385376,
13151
+ "step": 1716
13152
+ },
13153
+ {
13154
+ "epoch": 1.9590905851329201,
13155
+ "grad_norm": 66.05988715134325,
13156
+ "learning_rate": 2.0895664758151521e-10,
13157
+ "logits/chosen": -1.1935899257659912,
13158
+ "logits/rejected": -1.254826545715332,
13159
+ "logps/chosen": -194.64492797851562,
13160
+ "logps/rejected": -225.4056854248047,
13161
+ "loss": 0.4007,
13162
+ "rewards/accuracies": 0.8125,
13163
+ "rewards/chosen": -0.525715172290802,
13164
+ "rewards/margins": 1.1838706731796265,
13165
+ "rewards/rejected": -1.7095859050750732,
13166
+ "step": 1718
13167
+ },
13168
+ {
13169
+ "epoch": 1.9613712493763809,
13170
+ "grad_norm": 64.18176403648182,
13171
+ "learning_rate": 1.8510390985371216e-10,
13172
+ "logits/chosen": -1.1969249248504639,
13173
+ "logits/rejected": -1.2043269872665405,
13174
+ "logps/chosen": -201.9025115966797,
13175
+ "logps/rejected": -233.6451873779297,
13176
+ "loss": 0.4386,
13177
+ "rewards/accuracies": 0.84375,
13178
+ "rewards/chosen": -0.5428147315979004,
13179
+ "rewards/margins": 0.8293758630752563,
13180
+ "rewards/rejected": -1.3721905946731567,
13181
+ "step": 1720
13182
+ },
13183
+ {
13184
+ "epoch": 1.9636519136198418,
13185
+ "grad_norm": 58.86675845104934,
13186
+ "learning_rate": 1.626950633813351e-10,
13187
+ "logits/chosen": -1.2230490446090698,
13188
+ "logits/rejected": -1.2853928804397583,
13189
+ "logps/chosen": -160.97621154785156,
13190
+ "logps/rejected": -223.6080780029297,
13191
+ "loss": 0.4377,
13192
+ "rewards/accuracies": 0.8125,
13193
+ "rewards/chosen": -0.3992640972137451,
13194
+ "rewards/margins": 1.2427630424499512,
13195
+ "rewards/rejected": -1.6420272588729858,
13196
+ "step": 1722
13197
+ },
13198
+ {
13199
+ "epoch": 1.9659325778633026,
13200
+ "grad_norm": 63.84847943868757,
13201
+ "learning_rate": 1.4173043232380554e-10,
13202
+ "logits/chosen": -1.1713950634002686,
13203
+ "logits/rejected": -1.2190814018249512,
13204
+ "logps/chosen": -160.047119140625,
13205
+ "logps/rejected": -184.2767791748047,
13206
+ "loss": 0.4372,
13207
+ "rewards/accuracies": 0.84375,
13208
+ "rewards/chosen": -0.4601638913154602,
13209
+ "rewards/margins": 0.9584896564483643,
13210
+ "rewards/rejected": -1.4186536073684692,
13211
+ "step": 1724
13212
+ },
13213
+ {
13214
+ "epoch": 1.9682132421067635,
13215
+ "grad_norm": 64.3589769132475,
13216
+ "learning_rate": 1.222103199489455e-10,
13217
+ "logits/chosen": -1.3236342668533325,
13218
+ "logits/rejected": -1.3482894897460938,
13219
+ "logps/chosen": -182.65269470214844,
13220
+ "logps/rejected": -217.88861083984375,
13221
+ "loss": 0.4358,
13222
+ "rewards/accuracies": 0.875,
13223
+ "rewards/chosen": -0.43886712193489075,
13224
+ "rewards/margins": 1.474854588508606,
13225
+ "rewards/rejected": -1.9137215614318848,
13226
+ "step": 1726
13227
+ },
13228
+ {
13229
+ "epoch": 1.9704939063502245,
13230
+ "grad_norm": 71.97812711207425,
13231
+ "learning_rate": 1.0413500862864743e-10,
13232
+ "logits/chosen": -1.377000093460083,
13233
+ "logits/rejected": -1.4007121324539185,
13234
+ "logps/chosen": -237.48316955566406,
13235
+ "logps/rejected": -259.9788818359375,
13236
+ "loss": 0.4143,
13237
+ "rewards/accuracies": 0.90625,
13238
+ "rewards/chosen": -0.6071898937225342,
13239
+ "rewards/margins": 1.190570592880249,
13240
+ "rewards/rejected": -1.7977604866027832,
13241
+ "step": 1728
13242
+ },
13243
+ {
13244
+ "epoch": 1.9727745705936854,
13245
+ "grad_norm": 61.7108383512953,
13246
+ "learning_rate": 8.750475983472227e-11,
13247
+ "logits/chosen": -1.2676740884780884,
13248
+ "logits/rejected": -1.2757924795150757,
13249
+ "logps/chosen": -190.92140197753906,
13250
+ "logps/rejected": -197.59115600585938,
13251
+ "loss": 0.4226,
13252
+ "rewards/accuracies": 0.90625,
13253
+ "rewards/chosen": -0.5921288132667542,
13254
+ "rewards/margins": 1.0576057434082031,
13255
+ "rewards/rejected": -1.6497344970703125,
13256
+ "step": 1730
13257
+ },
13258
+ {
13259
+ "epoch": 1.9750552348371464,
13260
+ "grad_norm": 188.64844039216806,
13261
+ "learning_rate": 7.231981413520217e-11,
13262
+ "logits/chosen": -1.263267993927002,
13263
+ "logits/rejected": -1.2889572381973267,
13264
+ "logps/chosen": -157.240478515625,
13265
+ "logps/rejected": -197.0875701904297,
13266
+ "loss": 0.4582,
13267
+ "rewards/accuracies": 0.875,
13268
+ "rewards/chosen": -0.24919547140598297,
13269
+ "rewards/margins": 0.7916581630706787,
13270
+ "rewards/rejected": -1.0408536195755005,
13271
+ "step": 1732
13272
+ },
13273
+ {
13274
+ "epoch": 1.9773358990806074,
13275
+ "grad_norm": 65.4504778217422,
13276
+ "learning_rate": 5.858039119077673e-11,
13277
+ "logits/chosen": -1.2087372541427612,
13278
+ "logits/rejected": -1.2781308889389038,
13279
+ "logps/chosen": -176.43057250976562,
13280
+ "logps/rejected": -216.8165740966797,
13281
+ "loss": 0.4563,
13282
+ "rewards/accuracies": 0.84375,
13283
+ "rewards/chosen": -0.41199278831481934,
13284
+ "rewards/margins": 0.922531008720398,
13285
+ "rewards/rejected": -1.3345237970352173,
13286
+ "step": 1734
13287
+ },
13288
+ {
13289
+ "epoch": 1.979616563324068,
13290
+ "grad_norm": 61.72345732533114,
13291
+ "learning_rate": 4.628668975166228e-11,
13292
+ "logits/chosen": -1.4069479703903198,
13293
+ "logits/rejected": -1.425885558128357,
13294
+ "logps/chosen": -155.35098266601562,
13295
+ "logps/rejected": -172.41546630859375,
13296
+ "loss": 0.3891,
13297
+ "rewards/accuracies": 0.84375,
13298
+ "rewards/chosen": -0.3574288785457611,
13299
+ "rewards/margins": 1.1354269981384277,
13300
+ "rewards/rejected": -1.4928559064865112,
13301
+ "step": 1736
13302
+ },
13303
+ {
13304
+ "epoch": 1.981897227567529,
13305
+ "grad_norm": 50.29362647543594,
13306
+ "learning_rate": 3.5438887654737346e-11,
13307
+ "logits/chosen": -1.1932240724563599,
13308
+ "logits/rejected": -1.2252675294876099,
13309
+ "logps/chosen": -104.49500274658203,
13310
+ "logps/rejected": -128.2619171142578,
13311
+ "loss": 0.3998,
13312
+ "rewards/accuracies": 0.78125,
13313
+ "rewards/chosen": -0.0990162342786789,
13314
+ "rewards/margins": 0.8109432458877563,
13315
+ "rewards/rejected": -0.9099595546722412,
13316
+ "step": 1738
13317
+ },
13318
+ {
13319
+ "epoch": 1.9841778918109898,
13320
+ "grad_norm": 63.67796104250234,
13321
+ "learning_rate": 2.603714182093375e-11,
13322
+ "logits/chosen": -1.2760851383209229,
13323
+ "logits/rejected": -1.3577792644500732,
13324
+ "logps/chosen": -172.22714233398438,
13325
+ "logps/rejected": -215.27749633789062,
13326
+ "loss": 0.4565,
13327
+ "rewards/accuracies": 0.84375,
13328
+ "rewards/chosen": -0.560702919960022,
13329
+ "rewards/margins": 1.0820647478103638,
13330
+ "rewards/rejected": -1.6427676677703857,
13331
+ "step": 1740
13332
+ },
13333
+ {
13334
+ "epoch": 1.9864585560544508,
13335
+ "grad_norm": 58.508228872135426,
13336
+ "learning_rate": 1.808158825297168e-11,
13337
+ "logits/chosen": -1.3208973407745361,
13338
+ "logits/rejected": -1.3905422687530518,
13339
+ "logps/chosen": -147.84127807617188,
13340
+ "logps/rejected": -178.2074737548828,
13341
+ "loss": 0.4758,
13342
+ "rewards/accuracies": 0.6875,
13343
+ "rewards/chosen": -0.48498719930648804,
13344
+ "rewards/margins": 0.888887882232666,
13345
+ "rewards/rejected": -1.3738751411437988,
13346
+ "step": 1742
13347
+ },
13348
+ {
13349
+ "epoch": 1.9887392202979117,
13350
+ "grad_norm": 58.05411220405763,
13351
+ "learning_rate": 1.1572342033416838e-11,
13352
+ "logits/chosen": -1.3570505380630493,
13353
+ "logits/rejected": -1.3865540027618408,
13354
+ "logps/chosen": -171.88287353515625,
13355
+ "logps/rejected": -210.6277618408203,
13356
+ "loss": 0.4492,
13357
+ "rewards/accuracies": 0.90625,
13358
+ "rewards/chosen": -0.28449732065200806,
13359
+ "rewards/margins": 1.1635254621505737,
13360
+ "rewards/rejected": -1.448022723197937,
13361
+ "step": 1744
13362
+ },
13363
+ {
13364
+ "epoch": 1.9910198845413727,
13365
+ "grad_norm": 60.292954042523704,
13366
+ "learning_rate": 6.50949732301509e-12,
13367
+ "logits/chosen": -1.2242615222930908,
13368
+ "logits/rejected": -1.3328139781951904,
13369
+ "logps/chosen": -164.70863342285156,
13370
+ "logps/rejected": -213.20814514160156,
13371
+ "loss": 0.3912,
13372
+ "rewards/accuracies": 0.78125,
13373
+ "rewards/chosen": -0.497256338596344,
13374
+ "rewards/margins": 1.1804652214050293,
13375
+ "rewards/rejected": -1.677721619606018,
13376
+ "step": 1746
13377
+ },
13378
+ {
13379
+ "epoch": 1.9933005487848336,
13380
+ "grad_norm": 68.33201893652561,
13381
+ "learning_rate": 2.893127359282488e-12,
13382
+ "logits/chosen": -1.2822688817977905,
13383
+ "logits/rejected": -1.3241004943847656,
13384
+ "logps/chosen": -207.72021484375,
13385
+ "logps/rejected": -262.7379455566406,
13386
+ "loss": 0.3881,
13387
+ "rewards/accuracies": 0.875,
13388
+ "rewards/chosen": -0.34897035360336304,
13389
+ "rewards/margins": 1.5757369995117188,
13390
+ "rewards/rejected": -1.924707293510437,
13391
+ "step": 1748
13392
+ },
13393
+ {
13394
+ "epoch": 1.9955812130282946,
13395
+ "grad_norm": 62.482127853658426,
13396
+ "learning_rate": 7.232844555282725e-13,
13397
+ "logits/chosen": -1.3659021854400635,
13398
+ "logits/rejected": -1.3771145343780518,
13399
+ "logps/chosen": -201.26531982421875,
13400
+ "logps/rejected": -219.556640625,
13401
+ "loss": 0.3859,
13402
+ "rewards/accuracies": 0.875,
13403
+ "rewards/chosen": -0.7029599547386169,
13404
+ "rewards/margins": 1.120865821838379,
13405
+ "rewards/rejected": -1.823825716972351,
13406
+ "step": 1750
13407
+ },
13408
+ {
13409
+ "epoch": 1.9978618772717553,
13410
+ "grad_norm": 67.65957613657208,
13411
+ "learning_rate": 0.0,
13412
+ "logits/chosen": -1.171149492263794,
13413
+ "logits/rejected": -1.2374571561813354,
13414
+ "logps/chosen": -129.96214294433594,
13415
+ "logps/rejected": -161.25244140625,
13416
+ "loss": 0.4188,
13417
+ "rewards/accuracies": 0.90625,
13418
+ "rewards/chosen": -0.34549519419670105,
13419
+ "rewards/margins": 1.0673820972442627,
13420
+ "rewards/rejected": -1.4128773212432861,
13421
+ "step": 1752
13422
  }
13423
  ],
13424
  "logging_steps": 2,
 
13433
  "should_evaluate": false,
13434
  "should_log": false,
13435
  "should_save": true,
13436
+ "should_training_stop": true
13437
  },
13438
  "attributes": {}
13439
  }