RyanYr commited on
Commit
c42f46b
1 Parent(s): 5c05534

Training in progress, step 1726, checkpoint

Browse files
last-checkpoint/global_step1726/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8153b1f364ce8143dd8ca6a91c02d9b732a7f246e193af8418363fb39c323e
3
+ size 24090788996
last-checkpoint/global_step1726/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c33f67b21fac802a7b9f40879e4213877fdadc621faf76516815fbfb899b0182
3
+ size 24090788996
last-checkpoint/global_step1726/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e7ad1f2e6b5d52de51c9142c253e4504b14cb96ba450e1d677982e176eb40e6
3
+ size 24090788996
last-checkpoint/global_step1726/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d4e1f3390a9b43c817745dc7aacca53a7f480440f86642baacca43ef2b6cbf
3
+ size 24090788996
last-checkpoint/global_step1726/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3e44efac1a79db42ada5e189c9066027261b7fb0012fcb570344c3f83140e7
3
+ size 150693
last-checkpoint/global_step1726/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0503a2119d3a7637d504d6bee881b7a97f77742d4a2e4dcc7a27a1dd1b027a5
3
+ size 150693
last-checkpoint/global_step1726/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:361b7e8e42f99ecbc4792177905ee85e8dffc028b9a1b65f86bf348be14d0039
3
+ size 150693
last-checkpoint/global_step1726/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd50bc52fa7d9aa5ccff56846315efd88d3dbf255e3f23d479db9461ebdfd302
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1500
 
1
+ global_step1726
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99de85720481c98cc093f3faf5805a4ff05d5df419d49b8575ed63ce236d5815
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06d89588d2c5a6b7c30a35a96b4705ea4a256222a1999601f01382cead91d16
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:368f761161ebad7292a8dbdeca4656fb602262d1f2495446f32f49896062f7dc
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:195f937f5574012744471b0d6769d312a0af52820de2cb76093f28e08f193898
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:306982a5a2f0fc8003fdb3eebf34d6850d83379bebc04fbe40d7a6bb9f8b6a5c
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590d0c8a9ae18db231102e3d95cf9330b9873b37d3e0d44bb2af348b5368d23d
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa2c5d55d66df705380544c1b8076cf199a0a6e6da3583e847a01a69fbf8edb4
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71c6b037e8207917d33c1313e1bd133b2aed3cfcf9087dc27c9683c5ff38c99
3
  size 1168138808
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4f003069486a57c6ac033f30cf4c4213eb6b7d659bab68a5a50fdb8da7c4118
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f22ced19e790cc864cefe3b7c711d9ae631c44f95d42fb4829688cc3de0153
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a016ef89b4392d083b2c15a7cf06a39bc61a759f648cf6dc03f1c32b89a526aa
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e0407513eba77d34cbf3adf0e59a58bd80716f4f00f414854253637e82be43d
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b56fe0893036dc052d18d90feba4328b90ea71561942150b07406ac3d7a700e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6060636c023258ce9b965e244b8a58b4c99d5784dde4405b39737550ef50cd4f
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0c203d12c2c308dab785ed672c9ca27fb6a2f72acd1e1552d1516c7b0006013
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c24ccdfdcde39cb2265c82c50c36ffdfcc670f757aba4bcf4bb0fdc6d1373c4c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0eed8fec4102664205a804b0fbc28ba65f44e3fb811cdaf695f0e9321c6fe0b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56a0f1c3322e3323ab0de90511453e2d705194cbbc2d4c04dd46c593fd07065
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7372421281216068,
5
  "eval_steps": 100,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11497,6 +11497,1733 @@
11497
  "eval_samples_per_second": 4.338,
11498
  "eval_steps_per_second": 1.085,
11499
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11500
  }
11501
  ],
11502
  "logging_steps": 2,
@@ -11511,7 +13238,7 @@
11511
  "should_evaluate": false,
11512
  "should_log": false,
11513
  "should_save": true,
11514
- "should_training_stop": false
11515
  },
11516
  "attributes": {}
11517
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9989866087585957,
5
  "eval_steps": 100,
6
+ "global_step": 1726,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11497
  "eval_samples_per_second": 4.338,
11498
  "eval_steps_per_second": 1.085,
11499
  "step": 1500
11500
+ },
11501
+ {
11502
+ "epoch": 1.7395584509591024,
11503
+ "grad_norm": 55.81693817141226,
11504
+ "learning_rate": 9.220091139554887e-09,
11505
+ "logits/chosen": -1.1932331323623657,
11506
+ "logits/rejected": -1.1756948232650757,
11507
+ "logps/chosen": -103.76750183105469,
11508
+ "logps/rejected": -129.5191650390625,
11509
+ "loss": 0.4117,
11510
+ "rewards/accuracies": 0.8125,
11511
+ "rewards/chosen": -0.2089885175228119,
11512
+ "rewards/margins": 0.9426325559616089,
11513
+ "rewards/rejected": -1.1516211032867432,
11514
+ "step": 1502
11515
+ },
11516
+ {
11517
+ "epoch": 1.741874773796598,
11518
+ "grad_norm": 62.56378010964504,
11519
+ "learning_rate": 9.05870280610117e-09,
11520
+ "logits/chosen": -1.2296499013900757,
11521
+ "logits/rejected": -1.1992714405059814,
11522
+ "logps/chosen": -123.05607604980469,
11523
+ "logps/rejected": -130.20932006835938,
11524
+ "loss": 0.4251,
11525
+ "rewards/accuracies": 0.8125,
11526
+ "rewards/chosen": -0.41981515288352966,
11527
+ "rewards/margins": 0.8822442293167114,
11528
+ "rewards/rejected": -1.3020594120025635,
11529
+ "step": 1504
11530
+ },
11531
+ {
11532
+ "epoch": 1.7441910966340934,
11533
+ "grad_norm": 81.02596584931305,
11534
+ "learning_rate": 8.898672408511553e-09,
11535
+ "logits/chosen": -1.2401373386383057,
11536
+ "logits/rejected": -1.17184317111969,
11537
+ "logps/chosen": -168.21986389160156,
11538
+ "logps/rejected": -162.71383666992188,
11539
+ "loss": 0.4402,
11540
+ "rewards/accuracies": 0.78125,
11541
+ "rewards/chosen": -0.5058491230010986,
11542
+ "rewards/margins": 1.3156105279922485,
11543
+ "rewards/rejected": -1.8214595317840576,
11544
+ "step": 1506
11545
+ },
11546
+ {
11547
+ "epoch": 1.7465074194715888,
11548
+ "grad_norm": 56.282435640432055,
11549
+ "learning_rate": 8.740002336360686e-09,
11550
+ "logits/chosen": -1.351073980331421,
11551
+ "logits/rejected": -1.4128607511520386,
11552
+ "logps/chosen": -152.2001190185547,
11553
+ "logps/rejected": -191.74932861328125,
11554
+ "loss": 0.4226,
11555
+ "rewards/accuracies": 0.90625,
11556
+ "rewards/chosen": -0.2529861629009247,
11557
+ "rewards/margins": 1.520397424697876,
11558
+ "rewards/rejected": -1.773383617401123,
11559
+ "step": 1508
11560
+ },
11561
+ {
11562
+ "epoch": 1.7488237423090842,
11563
+ "grad_norm": 49.81866700382731,
11564
+ "learning_rate": 8.582694958910807e-09,
11565
+ "logits/chosen": -1.2254369258880615,
11566
+ "logits/rejected": -1.2749468088150024,
11567
+ "logps/chosen": -182.1556854248047,
11568
+ "logps/rejected": -216.18411254882812,
11569
+ "loss": 0.3902,
11570
+ "rewards/accuracies": 0.84375,
11571
+ "rewards/chosen": -0.6539211869239807,
11572
+ "rewards/margins": 1.5710985660552979,
11573
+ "rewards/rejected": -2.225019693374634,
11574
+ "step": 1510
11575
+ },
11576
+ {
11577
+ "epoch": 1.7511400651465798,
11578
+ "grad_norm": 52.34711654194476,
11579
+ "learning_rate": 8.426752625076373e-09,
11580
+ "logits/chosen": -1.2552261352539062,
11581
+ "logits/rejected": -1.3420953750610352,
11582
+ "logps/chosen": -186.13934326171875,
11583
+ "logps/rejected": -285.68804931640625,
11584
+ "loss": 0.3499,
11585
+ "rewards/accuracies": 0.9375,
11586
+ "rewards/chosen": -0.5289927124977112,
11587
+ "rewards/margins": 3.851708173751831,
11588
+ "rewards/rejected": -4.380701065063477,
11589
+ "step": 1512
11590
+ },
11591
+ {
11592
+ "epoch": 1.7534563879840754,
11593
+ "grad_norm": 52.389521034020916,
11594
+ "learning_rate": 8.272177663389046e-09,
11595
+ "logits/chosen": -1.1967260837554932,
11596
+ "logits/rejected": -1.1967551708221436,
11597
+ "logps/chosen": -217.95095825195312,
11598
+ "logps/rejected": -236.05380249023438,
11599
+ "loss": 0.3782,
11600
+ "rewards/accuracies": 0.9375,
11601
+ "rewards/chosen": -0.3343973159790039,
11602
+ "rewards/margins": 1.9475483894348145,
11603
+ "rewards/rejected": -2.2819457054138184,
11604
+ "step": 1514
11605
+ },
11606
+ {
11607
+ "epoch": 1.7557727108215708,
11608
+ "grad_norm": 55.99063639865476,
11609
+ "learning_rate": 8.118972381962851e-09,
11610
+ "logits/chosen": -1.1716736555099487,
11611
+ "logits/rejected": -1.2387813329696655,
11612
+ "logps/chosen": -161.53382873535156,
11613
+ "logps/rejected": -189.86782836914062,
11614
+ "loss": 0.3571,
11615
+ "rewards/accuracies": 0.875,
11616
+ "rewards/chosen": -0.7027201652526855,
11617
+ "rewards/margins": 1.415562629699707,
11618
+ "rewards/rejected": -2.1182827949523926,
11619
+ "step": 1516
11620
+ },
11621
+ {
11622
+ "epoch": 1.7580890336590662,
11623
+ "grad_norm": 60.98846930904024,
11624
+ "learning_rate": 7.967139068459726e-09,
11625
+ "logits/chosen": -1.1493229866027832,
11626
+ "logits/rejected": -1.146936297416687,
11627
+ "logps/chosen": -128.60189819335938,
11628
+ "logps/rejected": -160.15321350097656,
11629
+ "loss": 0.3883,
11630
+ "rewards/accuracies": 0.90625,
11631
+ "rewards/chosen": -0.36255598068237305,
11632
+ "rewards/margins": 1.3770678043365479,
11633
+ "rewards/rejected": -1.7396236658096313,
11634
+ "step": 1518
11635
+ },
11636
+ {
11637
+ "epoch": 1.7604053564965616,
11638
+ "grad_norm": 176.68417193498476,
11639
+ "learning_rate": 7.81667999005543e-09,
11640
+ "logits/chosen": -1.3078656196594238,
11641
+ "logits/rejected": -1.3120546340942383,
11642
+ "logps/chosen": -179.33438110351562,
11643
+ "logps/rejected": -186.1118621826172,
11644
+ "loss": 0.4993,
11645
+ "rewards/accuracies": 0.78125,
11646
+ "rewards/chosen": -0.22156819701194763,
11647
+ "rewards/margins": 0.9435240030288696,
11648
+ "rewards/rejected": -1.16509211063385,
11649
+ "step": 1520
11650
+ },
11651
+ {
11652
+ "epoch": 1.7627216793340572,
11653
+ "grad_norm": 51.91509618352603,
11654
+ "learning_rate": 7.6675973934056e-09,
11655
+ "logits/chosen": -1.0760035514831543,
11656
+ "logits/rejected": -1.1664559841156006,
11657
+ "logps/chosen": -136.77081298828125,
11658
+ "logps/rejected": -175.25926208496094,
11659
+ "loss": 0.4397,
11660
+ "rewards/accuracies": 0.875,
11661
+ "rewards/chosen": -0.2903676927089691,
11662
+ "rewards/margins": 1.1137360334396362,
11663
+ "rewards/rejected": -1.4041036367416382,
11664
+ "step": 1522
11665
+ },
11666
+ {
11667
+ "epoch": 1.7650380021715528,
11668
+ "grad_norm": 59.67977888278656,
11669
+ "learning_rate": 7.51989350461224e-09,
11670
+ "logits/chosen": -1.2789033651351929,
11671
+ "logits/rejected": -1.3050099611282349,
11672
+ "logps/chosen": -145.60400390625,
11673
+ "logps/rejected": -156.1199951171875,
11674
+ "loss": 0.4456,
11675
+ "rewards/accuracies": 0.84375,
11676
+ "rewards/chosen": -0.3298056125640869,
11677
+ "rewards/margins": 0.7980384826660156,
11678
+ "rewards/rejected": -1.1278440952301025,
11679
+ "step": 1524
11680
+ },
11681
+ {
11682
+ "epoch": 1.7673543250090482,
11683
+ "grad_norm": 50.021597145613505,
11684
+ "learning_rate": 7.373570529190498e-09,
11685
+ "logits/chosen": -1.2145639657974243,
11686
+ "logits/rejected": -1.2063783407211304,
11687
+ "logps/chosen": -162.4467315673828,
11688
+ "logps/rejected": -196.65577697753906,
11689
+ "loss": 0.3705,
11690
+ "rewards/accuracies": 0.75,
11691
+ "rewards/chosen": -0.2843925654888153,
11692
+ "rewards/margins": 1.6666440963745117,
11693
+ "rewards/rejected": -1.9510366916656494,
11694
+ "step": 1526
11695
+ },
11696
+ {
11697
+ "epoch": 1.7696706478465436,
11698
+ "grad_norm": 62.64810414938643,
11699
+ "learning_rate": 7.228630652035717e-09,
11700
+ "logits/chosen": -1.2518867254257202,
11701
+ "logits/rejected": -1.1622406244277954,
11702
+ "logps/chosen": -169.1246337890625,
11703
+ "logps/rejected": -189.02169799804688,
11704
+ "loss": 0.3845,
11705
+ "rewards/accuracies": 0.9375,
11706
+ "rewards/chosen": -0.23387570679187775,
11707
+ "rewards/margins": 1.9417215585708618,
11708
+ "rewards/rejected": -2.1755971908569336,
11709
+ "step": 1528
11710
+ },
11711
+ {
11712
+ "epoch": 1.771986970684039,
11713
+ "grad_norm": 56.92633456964183,
11714
+ "learning_rate": 7.08507603739078e-09,
11715
+ "logits/chosen": -1.2512166500091553,
11716
+ "logits/rejected": -1.2901430130004883,
11717
+ "logps/chosen": -162.3385772705078,
11718
+ "logps/rejected": -193.41940307617188,
11719
+ "loss": 0.4376,
11720
+ "rewards/accuracies": 0.875,
11721
+ "rewards/chosen": -0.40012550354003906,
11722
+ "rewards/margins": 1.2529363632202148,
11723
+ "rewards/rejected": -1.653061866760254,
11724
+ "step": 1530
11725
+ },
11726
+ {
11727
+ "epoch": 1.7743032935215346,
11728
+ "grad_norm": 76.55205063599432,
11729
+ "learning_rate": 6.942908828813876e-09,
11730
+ "logits/chosen": -1.323652982711792,
11731
+ "logits/rejected": -1.2494463920593262,
11732
+ "logps/chosen": -192.47853088378906,
11733
+ "logps/rejected": -190.863037109375,
11734
+ "loss": 0.3828,
11735
+ "rewards/accuracies": 0.78125,
11736
+ "rewards/chosen": -0.46822619438171387,
11737
+ "rewards/margins": 1.0589056015014648,
11738
+ "rewards/rejected": -1.5271317958831787,
11739
+ "step": 1532
11740
+ },
11741
+ {
11742
+ "epoch": 1.77661961635903,
11743
+ "grad_norm": 58.83800639947019,
11744
+ "learning_rate": 6.802131149146373e-09,
11745
+ "logits/chosen": -1.254701018333435,
11746
+ "logits/rejected": -1.2828840017318726,
11747
+ "logps/chosen": -144.4114990234375,
11748
+ "logps/rejected": -155.79037475585938,
11749
+ "loss": 0.374,
11750
+ "rewards/accuracies": 0.84375,
11751
+ "rewards/chosen": -0.3679081201553345,
11752
+ "rewards/margins": 1.2586240768432617,
11753
+ "rewards/rejected": -1.6265323162078857,
11754
+ "step": 1534
11755
+ },
11756
+ {
11757
+ "epoch": 1.7789359391965256,
11758
+ "grad_norm": 68.11680500501693,
11759
+ "learning_rate": 6.662745100481271e-09,
11760
+ "logits/chosen": -1.1942329406738281,
11761
+ "logits/rejected": -1.289471983909607,
11762
+ "logps/chosen": -103.48456573486328,
11763
+ "logps/rejected": -110.4488754272461,
11764
+ "loss": 0.3996,
11765
+ "rewards/accuracies": 0.71875,
11766
+ "rewards/chosen": -0.294292151927948,
11767
+ "rewards/margins": 0.796977162361145,
11768
+ "rewards/rejected": -1.0912692546844482,
11769
+ "step": 1536
11770
+ },
11771
+ {
11772
+ "epoch": 1.781252262034021,
11773
+ "grad_norm": 60.49404420207855,
11774
+ "learning_rate": 6.5247527641316465e-09,
11775
+ "logits/chosen": -1.2070562839508057,
11776
+ "logits/rejected": -1.2548003196716309,
11777
+ "logps/chosen": -156.2920379638672,
11778
+ "logps/rejected": -177.38890075683594,
11779
+ "loss": 0.4381,
11780
+ "rewards/accuracies": 0.90625,
11781
+ "rewards/chosen": -0.03995545208454132,
11782
+ "rewards/margins": 1.5764446258544922,
11783
+ "rewards/rejected": -1.616400122642517,
11784
+ "step": 1538
11785
+ },
11786
+ {
11787
+ "epoch": 1.7835685848715164,
11788
+ "grad_norm": 58.839873216052546,
11789
+ "learning_rate": 6.388156200599726e-09,
11790
+ "logits/chosen": -1.1764907836914062,
11791
+ "logits/rejected": -1.2363911867141724,
11792
+ "logps/chosen": -138.08792114257812,
11793
+ "logps/rejected": -156.8811492919922,
11794
+ "loss": 0.4437,
11795
+ "rewards/accuracies": 0.8125,
11796
+ "rewards/chosen": -0.5578911304473877,
11797
+ "rewards/margins": 0.9130861163139343,
11798
+ "rewards/rejected": -1.4709770679473877,
11799
+ "step": 1540
11800
+ },
11801
+ {
11802
+ "epoch": 1.785884907709012,
11803
+ "grad_norm": 54.830012236677426,
11804
+ "learning_rate": 6.2529574495459815e-09,
11805
+ "logits/chosen": -1.2464194297790527,
11806
+ "logits/rejected": -1.2431282997131348,
11807
+ "logps/chosen": -146.96051025390625,
11808
+ "logps/rejected": -175.07481384277344,
11809
+ "loss": 0.386,
11810
+ "rewards/accuracies": 0.8125,
11811
+ "rewards/chosen": -0.2085748016834259,
11812
+ "rewards/margins": 1.4820951223373413,
11813
+ "rewards/rejected": -1.6906698942184448,
11814
+ "step": 1542
11815
+ },
11816
+ {
11817
+ "epoch": 1.7882012305465074,
11818
+ "grad_norm": 53.964612199236846,
11819
+ "learning_rate": 6.119158529758817e-09,
11820
+ "logits/chosen": -1.2010880708694458,
11821
+ "logits/rejected": -1.252152681350708,
11822
+ "logps/chosen": -125.40618896484375,
11823
+ "logps/rejected": -147.0822296142578,
11824
+ "loss": 0.4294,
11825
+ "rewards/accuracies": 0.84375,
11826
+ "rewards/chosen": -0.6274422407150269,
11827
+ "rewards/margins": 1.2626943588256836,
11828
+ "rewards/rejected": -1.8901365995407104,
11829
+ "step": 1544
11830
+ },
11831
+ {
11832
+ "epoch": 1.790517553384003,
11833
+ "grad_norm": 61.93429459296764,
11834
+ "learning_rate": 5.986761439124288e-09,
11835
+ "logits/chosen": -1.0499889850616455,
11836
+ "logits/rejected": -1.0637288093566895,
11837
+ "logps/chosen": -145.4034881591797,
11838
+ "logps/rejected": -170.8926239013672,
11839
+ "loss": 0.4097,
11840
+ "rewards/accuracies": 0.90625,
11841
+ "rewards/chosen": -1.0535945892333984,
11842
+ "rewards/margins": 1.4541335105895996,
11843
+ "rewards/rejected": -2.507727861404419,
11844
+ "step": 1546
11845
+ },
11846
+ {
11847
+ "epoch": 1.7928338762214984,
11848
+ "grad_norm": 63.39263653013202,
11849
+ "learning_rate": 5.855768154596363e-09,
11850
+ "logits/chosen": -1.2247127294540405,
11851
+ "logits/rejected": -1.287811040878296,
11852
+ "logps/chosen": -128.2520751953125,
11853
+ "logps/rejected": -145.6575927734375,
11854
+ "loss": 0.4182,
11855
+ "rewards/accuracies": 0.8125,
11856
+ "rewards/chosen": -0.20227603614330292,
11857
+ "rewards/margins": 0.8022910356521606,
11858
+ "rewards/rejected": -1.0045669078826904,
11859
+ "step": 1548
11860
+ },
11861
+ {
11862
+ "epoch": 1.7951501990589938,
11863
+ "grad_norm": 53.1979380263347,
11864
+ "learning_rate": 5.726180632167354e-09,
11865
+ "logits/chosen": -1.2052092552185059,
11866
+ "logits/rejected": -1.2159252166748047,
11867
+ "logps/chosen": -151.31918334960938,
11868
+ "logps/rejected": -184.94479370117188,
11869
+ "loss": 0.4075,
11870
+ "rewards/accuracies": 0.8125,
11871
+ "rewards/chosen": -0.06491108983755112,
11872
+ "rewards/margins": 1.7111616134643555,
11873
+ "rewards/rejected": -1.7760728597640991,
11874
+ "step": 1550
11875
+ },
11876
+ {
11877
+ "epoch": 1.7974665218964894,
11878
+ "grad_norm": 53.36509041975899,
11879
+ "learning_rate": 5.5980008068387655e-09,
11880
+ "logits/chosen": -1.192318081855774,
11881
+ "logits/rejected": -1.2422665357589722,
11882
+ "logps/chosen": -158.2563934326172,
11883
+ "logps/rejected": -208.37709045410156,
11884
+ "loss": 0.3783,
11885
+ "rewards/accuracies": 0.8125,
11886
+ "rewards/chosen": -0.3992021381855011,
11887
+ "rewards/margins": 1.9673078060150146,
11888
+ "rewards/rejected": -2.3665099143981934,
11889
+ "step": 1552
11890
+ },
11891
+ {
11892
+ "epoch": 1.7997828447339848,
11893
+ "grad_norm": 57.43435085106451,
11894
+ "learning_rate": 5.471230592592313e-09,
11895
+ "logits/chosen": -1.2281129360198975,
11896
+ "logits/rejected": -1.1943424940109253,
11897
+ "logps/chosen": -132.93118286132812,
11898
+ "logps/rejected": -142.4124755859375,
11899
+ "loss": 0.3999,
11900
+ "rewards/accuracies": 0.84375,
11901
+ "rewards/chosen": -0.2500740587711334,
11902
+ "rewards/margins": 0.9634323120117188,
11903
+ "rewards/rejected": -1.2135063409805298,
11904
+ "step": 1554
11905
+ },
11906
+ {
11907
+ "epoch": 1.8020991675714804,
11908
+ "grad_norm": 116.95703791110742,
11909
+ "learning_rate": 5.345871882361397e-09,
11910
+ "logits/chosen": -1.222663402557373,
11911
+ "logits/rejected": -1.2307226657867432,
11912
+ "logps/chosen": -195.47381591796875,
11913
+ "logps/rejected": -213.84588623046875,
11914
+ "loss": 0.5455,
11915
+ "rewards/accuracies": 0.71875,
11916
+ "rewards/chosen": -1.06570303440094,
11917
+ "rewards/margins": 1.080770492553711,
11918
+ "rewards/rejected": -2.1464734077453613,
11919
+ "step": 1556
11920
+ },
11921
+ {
11922
+ "epoch": 1.8044154904089758,
11923
+ "grad_norm": 54.00119490171407,
11924
+ "learning_rate": 5.221926548002875e-09,
11925
+ "logits/chosen": -1.1924062967300415,
11926
+ "logits/rejected": -1.269582748413086,
11927
+ "logps/chosen": -165.26943969726562,
11928
+ "logps/rejected": -179.38568115234375,
11929
+ "loss": 0.4258,
11930
+ "rewards/accuracies": 0.78125,
11931
+ "rewards/chosen": 0.21030552685260773,
11932
+ "rewards/margins": 1.1077656745910645,
11933
+ "rewards/rejected": -0.8974601030349731,
11934
+ "step": 1558
11935
+ },
11936
+ {
11937
+ "epoch": 1.8067318132464711,
11938
+ "grad_norm": 47.637201993987425,
11939
+ "learning_rate": 5.099396440269033e-09,
11940
+ "logits/chosen": -1.1668461561203003,
11941
+ "logits/rejected": -1.1675832271575928,
11942
+ "logps/chosen": -132.747314453125,
11943
+ "logps/rejected": -197.4693603515625,
11944
+ "loss": 0.3714,
11945
+ "rewards/accuracies": 0.96875,
11946
+ "rewards/chosen": -0.2991000711917877,
11947
+ "rewards/margins": 2.7344629764556885,
11948
+ "rewards/rejected": -3.0335628986358643,
11949
+ "step": 1560
11950
+ },
11951
+ {
11952
+ "epoch": 1.8090481360839668,
11953
+ "grad_norm": 73.3924537450436,
11954
+ "learning_rate": 4.978283388780002e-09,
11955
+ "logits/chosen": -1.2106759548187256,
11956
+ "logits/rejected": -1.3471499681472778,
11957
+ "logps/chosen": -172.1467742919922,
11958
+ "logps/rejected": -206.36143493652344,
11959
+ "loss": 0.3895,
11960
+ "rewards/accuracies": 0.8125,
11961
+ "rewards/chosen": -0.703016996383667,
11962
+ "rewards/margins": 1.2060117721557617,
11963
+ "rewards/rejected": -1.9090288877487183,
11964
+ "step": 1562
11965
+ },
11966
+ {
11967
+ "epoch": 1.8113644589214621,
11968
+ "grad_norm": 90.51012356320436,
11969
+ "learning_rate": 4.858589201996432e-09,
11970
+ "logits/chosen": -1.0378146171569824,
11971
+ "logits/rejected": -1.1732603311538696,
11972
+ "logps/chosen": -141.2643280029297,
11973
+ "logps/rejected": -164.4271697998047,
11974
+ "loss": 0.4848,
11975
+ "rewards/accuracies": 0.71875,
11976
+ "rewards/chosen": -0.8795535564422607,
11977
+ "rewards/margins": 0.9949630498886108,
11978
+ "rewards/rejected": -1.874516487121582,
11979
+ "step": 1564
11980
+ },
11981
+ {
11982
+ "epoch": 1.8136807817589577,
11983
+ "grad_norm": 56.01168000132923,
11984
+ "learning_rate": 4.740315667192441e-09,
11985
+ "logits/chosen": -1.1176464557647705,
11986
+ "logits/rejected": -1.2511212825775146,
11987
+ "logps/chosen": -103.09518432617188,
11988
+ "logps/rejected": -135.39122009277344,
11989
+ "loss": 0.4393,
11990
+ "rewards/accuracies": 0.78125,
11991
+ "rewards/chosen": -0.2941249907016754,
11992
+ "rewards/margins": 0.9569557905197144,
11993
+ "rewards/rejected": -1.2510807514190674,
11994
+ "step": 1566
11995
+ },
11996
+ {
11997
+ "epoch": 1.8159971045964531,
11998
+ "grad_norm": 48.16666589487942,
11999
+ "learning_rate": 4.623464550429002e-09,
12000
+ "logits/chosen": -1.102777361869812,
12001
+ "logits/rejected": -1.1394641399383545,
12002
+ "logps/chosen": -111.80138397216797,
12003
+ "logps/rejected": -145.2130126953125,
12004
+ "loss": 0.4561,
12005
+ "rewards/accuracies": 0.84375,
12006
+ "rewards/chosen": -0.7176414728164673,
12007
+ "rewards/margins": 1.2812902927398682,
12008
+ "rewards/rejected": -1.998931646347046,
12009
+ "step": 1568
12010
+ },
12011
+ {
12012
+ "epoch": 1.8183134274339485,
12013
+ "grad_norm": 50.97749085021057,
12014
+ "learning_rate": 4.508037596527525e-09,
12015
+ "logits/chosen": -1.1966917514801025,
12016
+ "logits/rejected": -1.2247413396835327,
12017
+ "logps/chosen": -114.48523712158203,
12018
+ "logps/rejected": -128.622802734375,
12019
+ "loss": 0.3797,
12020
+ "rewards/accuracies": 0.78125,
12021
+ "rewards/chosen": -0.19608543813228607,
12022
+ "rewards/margins": 0.9697508215904236,
12023
+ "rewards/rejected": -1.1658360958099365,
12024
+ "step": 1570
12025
+ },
12026
+ {
12027
+ "epoch": 1.8206297502714441,
12028
+ "grad_norm": 54.387837657286084,
12029
+ "learning_rate": 4.39403652904381e-09,
12030
+ "logits/chosen": -1.1147388219833374,
12031
+ "logits/rejected": -1.1594665050506592,
12032
+ "logps/chosen": -114.78770446777344,
12033
+ "logps/rejected": -155.918701171875,
12034
+ "loss": 0.3961,
12035
+ "rewards/accuracies": 0.9375,
12036
+ "rewards/chosen": -0.18746113777160645,
12037
+ "rewards/margins": 1.8321788311004639,
12038
+ "rewards/rejected": -2.0196399688720703,
12039
+ "step": 1572
12040
+ },
12041
+ {
12042
+ "epoch": 1.8229460731089395,
12043
+ "grad_norm": 62.271451803387365,
12044
+ "learning_rate": 4.2814630502422845e-09,
12045
+ "logits/chosen": -1.1847018003463745,
12046
+ "logits/rejected": -1.1410635709762573,
12047
+ "logps/chosen": -178.85458374023438,
12048
+ "logps/rejected": -211.72219848632812,
12049
+ "loss": 0.4279,
12050
+ "rewards/accuracies": 0.75,
12051
+ "rewards/chosen": -0.22784435749053955,
12052
+ "rewards/margins": 2.5258147716522217,
12053
+ "rewards/rejected": -2.7536590099334717,
12054
+ "step": 1574
12055
+ },
12056
+ {
12057
+ "epoch": 1.8252623959464351,
12058
+ "grad_norm": 57.174518848316346,
12059
+ "learning_rate": 4.170318841070708e-09,
12060
+ "logits/chosen": -1.120819330215454,
12061
+ "logits/rejected": -1.1634063720703125,
12062
+ "logps/chosen": -140.26319885253906,
12063
+ "logps/rejected": -207.86880493164062,
12064
+ "loss": 0.4442,
12065
+ "rewards/accuracies": 0.84375,
12066
+ "rewards/chosen": -0.25584009289741516,
12067
+ "rewards/margins": 1.6964097023010254,
12068
+ "rewards/rejected": -1.9522497653961182,
12069
+ "step": 1576
12070
+ },
12071
+ {
12072
+ "epoch": 1.8275787187839305,
12073
+ "grad_norm": 57.29425789262467,
12074
+ "learning_rate": 4.060605561134889e-09,
12075
+ "logits/chosen": -1.3027273416519165,
12076
+ "logits/rejected": -1.2673333883285522,
12077
+ "logps/chosen": -170.17152404785156,
12078
+ "logps/rejected": -188.33880615234375,
12079
+ "loss": 0.4311,
12080
+ "rewards/accuracies": 0.84375,
12081
+ "rewards/chosen": -0.42000892758369446,
12082
+ "rewards/margins": 1.3922333717346191,
12083
+ "rewards/rejected": -1.8122422695159912,
12084
+ "step": 1578
12085
+ },
12086
+ {
12087
+ "epoch": 1.829895041621426,
12088
+ "grad_norm": 65.96165236575662,
12089
+ "learning_rate": 3.952324848674004e-09,
12090
+ "logits/chosen": -1.1435868740081787,
12091
+ "logits/rejected": -1.2456907033920288,
12092
+ "logps/chosen": -118.90472412109375,
12093
+ "logps/rejected": -160.82818603515625,
12094
+ "loss": 0.3963,
12095
+ "rewards/accuracies": 0.875,
12096
+ "rewards/chosen": -0.47032859921455383,
12097
+ "rewards/margins": 1.457180142402649,
12098
+ "rewards/rejected": -1.9275087118148804,
12099
+ "step": 1580
12100
+ },
12101
+ {
12102
+ "epoch": 1.8322113644589213,
12103
+ "grad_norm": 62.96392877654251,
12104
+ "learning_rate": 3.8454783205361774e-09,
12105
+ "logits/chosen": -1.2181570529937744,
12106
+ "logits/rejected": -1.2495853900909424,
12107
+ "logps/chosen": -195.74876403808594,
12108
+ "logps/rejected": -292.0626220703125,
12109
+ "loss": 0.4112,
12110
+ "rewards/accuracies": 0.8125,
12111
+ "rewards/chosen": -0.3267236649990082,
12112
+ "rewards/margins": 4.259873390197754,
12113
+ "rewards/rejected": -4.586597442626953,
12114
+ "step": 1582
12115
+ },
12116
+ {
12117
+ "epoch": 1.834527687296417,
12118
+ "grad_norm": 58.298377548314235,
12119
+ "learning_rate": 3.740067572154238e-09,
12120
+ "logits/chosen": -1.292594075202942,
12121
+ "logits/rejected": -1.3315826654434204,
12122
+ "logps/chosen": -154.32740783691406,
12123
+ "logps/rejected": -175.98606872558594,
12124
+ "loss": 0.4088,
12125
+ "rewards/accuracies": 0.8125,
12126
+ "rewards/chosen": -0.2856728136539459,
12127
+ "rewards/margins": 1.166110634803772,
12128
+ "rewards/rejected": -1.4517834186553955,
12129
+ "step": 1584
12130
+ },
12131
+ {
12132
+ "epoch": 1.8368440101339125,
12133
+ "grad_norm": 59.34894292485851,
12134
+ "learning_rate": 3.6360941775219534e-09,
12135
+ "logits/chosen": -1.2552549839019775,
12136
+ "logits/rejected": -1.3246078491210938,
12137
+ "logps/chosen": -165.2515869140625,
12138
+ "logps/rejected": -189.0300750732422,
12139
+ "loss": 0.3893,
12140
+ "rewards/accuracies": 0.84375,
12141
+ "rewards/chosen": -0.31748124957084656,
12142
+ "rewards/margins": 1.6454672813415527,
12143
+ "rewards/rejected": -1.9629485607147217,
12144
+ "step": 1586
12145
+ },
12146
+ {
12147
+ "epoch": 1.839160332971408,
12148
+ "grad_norm": 72.01635850106565,
12149
+ "learning_rate": 3.53355968917054e-09,
12150
+ "logits/chosen": -1.1828457117080688,
12151
+ "logits/rejected": -1.1595231294631958,
12152
+ "logps/chosen": -188.94688415527344,
12153
+ "logps/rejected": -214.75,
12154
+ "loss": 0.3615,
12155
+ "rewards/accuracies": 0.84375,
12156
+ "rewards/chosen": -0.9493909478187561,
12157
+ "rewards/margins": 1.7555177211761475,
12158
+ "rewards/rejected": -2.704908609390259,
12159
+ "step": 1588
12160
+ },
12161
+ {
12162
+ "epoch": 1.8414766558089033,
12163
+ "grad_norm": 62.6493622965171,
12164
+ "learning_rate": 3.432465638145443e-09,
12165
+ "logits/chosen": -1.2264246940612793,
12166
+ "logits/rejected": -1.2353841066360474,
12167
+ "logps/chosen": -195.12002563476562,
12168
+ "logps/rejected": -207.8896942138672,
12169
+ "loss": 0.3766,
12170
+ "rewards/accuracies": 0.78125,
12171
+ "rewards/chosen": -0.44329333305358887,
12172
+ "rewards/margins": 1.4592864513397217,
12173
+ "rewards/rejected": -1.9025800228118896,
12174
+ "step": 1590
12175
+ },
12176
+ {
12177
+ "epoch": 1.8437929786463987,
12178
+ "grad_norm": 90.93634487708707,
12179
+ "learning_rate": 3.3328135339834917e-09,
12180
+ "logits/chosen": -1.2629611492156982,
12181
+ "logits/rejected": -1.249568223953247,
12182
+ "logps/chosen": -190.23126220703125,
12183
+ "logps/rejected": -229.70721435546875,
12184
+ "loss": 0.4445,
12185
+ "rewards/accuracies": 0.84375,
12186
+ "rewards/chosen": -0.7300775647163391,
12187
+ "rewards/margins": 1.7166606187820435,
12188
+ "rewards/rejected": -2.4467382431030273,
12189
+ "step": 1592
12190
+ },
12191
+ {
12192
+ "epoch": 1.8461093014838943,
12193
+ "grad_norm": 70.40594487298823,
12194
+ "learning_rate": 3.234604864690349e-09,
12195
+ "logits/chosen": -1.1563414335250854,
12196
+ "logits/rejected": -1.1132121086120605,
12197
+ "logps/chosen": -112.93782806396484,
12198
+ "logps/rejected": -115.19876861572266,
12199
+ "loss": 0.4635,
12200
+ "rewards/accuracies": 0.75,
12201
+ "rewards/chosen": -0.16091413795948029,
12202
+ "rewards/margins": 1.0079346895217896,
12203
+ "rewards/rejected": -1.1688487529754639,
12204
+ "step": 1594
12205
+ },
12206
+ {
12207
+ "epoch": 1.84842562432139,
12208
+ "grad_norm": 70.46959430003976,
12209
+ "learning_rate": 3.13784109671833e-09,
12210
+ "logits/chosen": -1.1287944316864014,
12211
+ "logits/rejected": -1.1954846382141113,
12212
+ "logps/chosen": -145.52749633789062,
12213
+ "logps/rejected": -168.68896484375,
12214
+ "loss": 0.4428,
12215
+ "rewards/accuracies": 0.84375,
12216
+ "rewards/chosen": -0.435101717710495,
12217
+ "rewards/margins": 1.1136534214019775,
12218
+ "rewards/rejected": -1.548755168914795,
12219
+ "step": 1596
12220
+ },
12221
+ {
12222
+ "epoch": 1.8507419471588853,
12223
+ "grad_norm": 63.53152924846159,
12224
+ "learning_rate": 3.0425236749444307e-09,
12225
+ "logits/chosen": -1.1079940795898438,
12226
+ "logits/rejected": -1.1838057041168213,
12227
+ "logps/chosen": -104.38517761230469,
12228
+ "logps/rejected": -126.13815307617188,
12229
+ "loss": 0.4119,
12230
+ "rewards/accuracies": 0.8125,
12231
+ "rewards/chosen": -0.20333430171012878,
12232
+ "rewards/margins": 1.2202249765396118,
12233
+ "rewards/rejected": -1.4235591888427734,
12234
+ "step": 1598
12235
+ },
12236
+ {
12237
+ "epoch": 1.8530582699963807,
12238
+ "grad_norm": 66.75495278471351,
12239
+ "learning_rate": 2.9486540226488555e-09,
12240
+ "logits/chosen": -1.1984293460845947,
12241
+ "logits/rejected": -1.1811829805374146,
12242
+ "logps/chosen": -107.63018035888672,
12243
+ "logps/rejected": -130.51141357421875,
12244
+ "loss": 0.4185,
12245
+ "rewards/accuracies": 0.75,
12246
+ "rewards/chosen": -0.44386693835258484,
12247
+ "rewards/margins": 1.2538187503814697,
12248
+ "rewards/rejected": -1.697685718536377,
12249
+ "step": 1600
12250
+ },
12251
+ {
12252
+ "epoch": 1.8530582699963807,
12253
+ "eval_logits/chosen": -1.2096275091171265,
12254
+ "eval_logits/rejected": -1.2041908502578735,
12255
+ "eval_logps/chosen": -144.02456665039062,
12256
+ "eval_logps/rejected": -149.35797119140625,
12257
+ "eval_loss": 0.59078049659729,
12258
+ "eval_rewards/accuracies": 0.7599999904632568,
12259
+ "eval_rewards/chosen": -0.8393388986587524,
12260
+ "eval_rewards/margins": 0.7011021375656128,
12261
+ "eval_rewards/rejected": -1.5404411554336548,
12262
+ "eval_runtime": 26.1157,
12263
+ "eval_samples_per_second": 3.829,
12264
+ "eval_steps_per_second": 0.957,
12265
+ "step": 1600
12266
+ },
12267
+ {
12268
+ "epoch": 1.855374592833876,
12269
+ "grad_norm": 51.25806551926982,
12270
+ "learning_rate": 2.856233541493691e-09,
12271
+ "logits/chosen": -1.1180177927017212,
12272
+ "logits/rejected": -1.153393268585205,
12273
+ "logps/chosen": -144.27127075195312,
12274
+ "logps/rejected": -172.8219757080078,
12275
+ "loss": 0.4103,
12276
+ "rewards/accuracies": 0.875,
12277
+ "rewards/chosen": -0.5550628900527954,
12278
+ "rewards/margins": 1.737399697303772,
12279
+ "rewards/rejected": -2.2924625873565674,
12280
+ "step": 1602
12281
+ },
12282
+ {
12283
+ "epoch": 1.8576909156713717,
12284
+ "grad_norm": 44.574245908096394,
12285
+ "learning_rate": 2.7652636115019554e-09,
12286
+ "logits/chosen": -1.2830660343170166,
12287
+ "logits/rejected": -1.3382513523101807,
12288
+ "logps/chosen": -186.8584747314453,
12289
+ "logps/rejected": -214.39341735839844,
12290
+ "loss": 0.336,
12291
+ "rewards/accuracies": 0.90625,
12292
+ "rewards/chosen": 0.011630617082118988,
12293
+ "rewards/margins": 2.068033218383789,
12294
+ "rewards/rejected": -2.0564029216766357,
12295
+ "step": 1604
12296
+ },
12297
+ {
12298
+ "epoch": 1.8600072385088673,
12299
+ "grad_norm": 88.24929156836725,
12300
+ "learning_rate": 2.6757455910370487e-09,
12301
+ "logits/chosen": -1.1889640092849731,
12302
+ "logits/rejected": -1.178146481513977,
12303
+ "logps/chosen": -169.42417907714844,
12304
+ "logps/rejected": -202.3689422607422,
12305
+ "loss": 0.453,
12306
+ "rewards/accuracies": 0.8125,
12307
+ "rewards/chosen": -0.4166257083415985,
12308
+ "rewards/margins": 1.3977904319763184,
12309
+ "rewards/rejected": -1.8144161701202393,
12310
+ "step": 1606
12311
+ },
12312
+ {
12313
+ "epoch": 1.8623235613463627,
12314
+ "grad_norm": 54.823245822280576,
12315
+ "learning_rate": 2.5876808167825005e-09,
12316
+ "logits/chosen": -1.2597419023513794,
12317
+ "logits/rejected": -1.193768858909607,
12318
+ "logps/chosen": -93.74658203125,
12319
+ "logps/rejected": -92.6616439819336,
12320
+ "loss": 0.3924,
12321
+ "rewards/accuracies": 0.6875,
12322
+ "rewards/chosen": -0.25879502296447754,
12323
+ "rewards/margins": 0.4778652489185333,
12324
+ "rewards/rejected": -0.7366602420806885,
12325
+ "step": 1608
12326
+ },
12327
+ {
12328
+ "epoch": 1.864639884183858,
12329
+ "grad_norm": 74.02926136780609,
12330
+ "learning_rate": 2.5010706037218885e-09,
12331
+ "logits/chosen": -1.2314317226409912,
12332
+ "logits/rejected": -1.2886399030685425,
12333
+ "logps/chosen": -163.94842529296875,
12334
+ "logps/rejected": -195.11390686035156,
12335
+ "loss": 0.3971,
12336
+ "rewards/accuracies": 0.78125,
12337
+ "rewards/chosen": -0.2574860155582428,
12338
+ "rewards/margins": 1.3789194822311401,
12339
+ "rewards/rejected": -1.6364054679870605,
12340
+ "step": 1610
12341
+ },
12342
+ {
12343
+ "epoch": 1.8669562070213535,
12344
+ "grad_norm": 54.30860117915708,
12345
+ "learning_rate": 2.4159162451193094e-09,
12346
+ "logits/chosen": -1.0902681350708008,
12347
+ "logits/rejected": -1.112775206565857,
12348
+ "logps/chosen": -140.3288116455078,
12349
+ "logps/rejected": -188.05210876464844,
12350
+ "loss": 0.4003,
12351
+ "rewards/accuracies": 0.90625,
12352
+ "rewards/chosen": -0.1892092376947403,
12353
+ "rewards/margins": 1.8315831422805786,
12354
+ "rewards/rejected": -2.0207924842834473,
12355
+ "step": 1612
12356
+ },
12357
+ {
12358
+ "epoch": 1.869272529858849,
12359
+ "grad_norm": 62.18685455386846,
12360
+ "learning_rate": 2.3322190125000475e-09,
12361
+ "logits/chosen": -1.0966382026672363,
12362
+ "logits/rejected": -1.1658515930175781,
12363
+ "logps/chosen": -116.89921569824219,
12364
+ "logps/rejected": -151.35015869140625,
12365
+ "loss": 0.4112,
12366
+ "rewards/accuracies": 0.75,
12367
+ "rewards/chosen": -0.4243711233139038,
12368
+ "rewards/margins": 1.4443333148956299,
12369
+ "rewards/rejected": -1.8687043190002441,
12370
+ "step": 1614
12371
+ },
12372
+ {
12373
+ "epoch": 1.8715888526963447,
12374
+ "grad_norm": 55.591911669551806,
12375
+ "learning_rate": 2.24998015563157e-09,
12376
+ "logits/chosen": -1.1172374486923218,
12377
+ "logits/rejected": -1.1134474277496338,
12378
+ "logps/chosen": -106.15010070800781,
12379
+ "logps/rejected": -122.0849609375,
12380
+ "loss": 0.42,
12381
+ "rewards/accuracies": 0.90625,
12382
+ "rewards/chosen": -0.5767372250556946,
12383
+ "rewards/margins": 0.9947463274002075,
12384
+ "rewards/rejected": -1.5714833736419678,
12385
+ "step": 1616
12386
+ },
12387
+ {
12388
+ "epoch": 1.87390517553384,
12389
+ "grad_norm": 69.67114883544231,
12390
+ "learning_rate": 2.169200902504842e-09,
12391
+ "logits/chosen": -1.291989803314209,
12392
+ "logits/rejected": -1.3668596744537354,
12393
+ "logps/chosen": -140.99050903320312,
12394
+ "logps/rejected": -164.55636596679688,
12395
+ "loss": 0.4102,
12396
+ "rewards/accuracies": 0.8125,
12397
+ "rewards/chosen": -0.1543090045452118,
12398
+ "rewards/margins": 1.0028785467147827,
12399
+ "rewards/rejected": -1.1571874618530273,
12400
+ "step": 1618
12401
+ },
12402
+ {
12403
+ "epoch": 1.8762214983713354,
12404
+ "grad_norm": 49.47929313355962,
12405
+ "learning_rate": 2.0898824593160503e-09,
12406
+ "logits/chosen": -1.1244778633117676,
12407
+ "logits/rejected": -1.1484088897705078,
12408
+ "logps/chosen": -115.74507904052734,
12409
+ "logps/rejected": -133.63916015625,
12410
+ "loss": 0.3943,
12411
+ "rewards/accuracies": 0.71875,
12412
+ "rewards/chosen": -0.4605577886104584,
12413
+ "rewards/margins": 0.8462937474250793,
12414
+ "rewards/rejected": -1.3068513870239258,
12415
+ "step": 1620
12416
+ },
12417
+ {
12418
+ "epoch": 1.8785378212088308,
12419
+ "grad_norm": 51.80324368762919,
12420
+ "learning_rate": 2.012026010448542e-09,
12421
+ "logits/chosen": -1.0954941511154175,
12422
+ "logits/rejected": -1.160184383392334,
12423
+ "logps/chosen": -131.16983032226562,
12424
+ "logps/rejected": -178.2694549560547,
12425
+ "loss": 0.4011,
12426
+ "rewards/accuracies": 0.84375,
12427
+ "rewards/chosen": -0.1698540300130844,
12428
+ "rewards/margins": 1.7133314609527588,
12429
+ "rewards/rejected": -1.8831853866577148,
12430
+ "step": 1622
12431
+ },
12432
+ {
12433
+ "epoch": 1.8808541440463264,
12434
+ "grad_norm": 61.62870922068943,
12435
+ "learning_rate": 1.935632718455171e-09,
12436
+ "logits/chosen": -1.167246699333191,
12437
+ "logits/rejected": -1.2080024480819702,
12438
+ "logps/chosen": -154.55758666992188,
12439
+ "logps/rejected": -188.28201293945312,
12440
+ "loss": 0.3673,
12441
+ "rewards/accuracies": 0.84375,
12442
+ "rewards/chosen": -0.5035812854766846,
12443
+ "rewards/margins": 1.6042046546936035,
12444
+ "rewards/rejected": -2.107785701751709,
12445
+ "step": 1624
12446
+ },
12447
+ {
12448
+ "epoch": 1.883170466883822,
12449
+ "grad_norm": 66.67930280370663,
12450
+ "learning_rate": 1.860703724040935e-09,
12451
+ "logits/chosen": -1.1037479639053345,
12452
+ "logits/rejected": -1.0890851020812988,
12453
+ "logps/chosen": -151.0048370361328,
12454
+ "logps/rejected": -170.82943725585938,
12455
+ "loss": 0.4514,
12456
+ "rewards/accuracies": 0.6875,
12457
+ "rewards/chosen": -0.4693964123725891,
12458
+ "rewards/margins": 0.7652498483657837,
12459
+ "rewards/rejected": -1.2346463203430176,
12460
+ "step": 1626
12461
+ },
12462
+ {
12463
+ "epoch": 1.8854867897213174,
12464
+ "grad_norm": 64.45068485207041,
12465
+ "learning_rate": 1.7872401460458874e-09,
12466
+ "logits/chosen": -1.1636077165603638,
12467
+ "logits/rejected": -1.2113772630691528,
12468
+ "logps/chosen": -128.23899841308594,
12469
+ "logps/rejected": -151.98800659179688,
12470
+ "loss": 0.4074,
12471
+ "rewards/accuracies": 0.75,
12472
+ "rewards/chosen": -0.15583369135856628,
12473
+ "rewards/margins": 1.3744523525238037,
12474
+ "rewards/rejected": -1.5302859544754028,
12475
+ "step": 1628
12476
+ },
12477
+ {
12478
+ "epoch": 1.8878031125588128,
12479
+ "grad_norm": 60.44416949725557,
12480
+ "learning_rate": 1.7152430814285302e-09,
12481
+ "logits/chosen": -1.2152669429779053,
12482
+ "logits/rejected": -1.225684404373169,
12483
+ "logps/chosen": -145.33447265625,
12484
+ "logps/rejected": -170.64059448242188,
12485
+ "loss": 0.4299,
12486
+ "rewards/accuracies": 0.875,
12487
+ "rewards/chosen": -0.09620651602745056,
12488
+ "rewards/margins": 1.3490362167358398,
12489
+ "rewards/rejected": -1.4452428817749023,
12490
+ "step": 1630
12491
+ },
12492
+ {
12493
+ "epoch": 1.8901194353963082,
12494
+ "grad_norm": 46.81333988402518,
12495
+ "learning_rate": 1.6447136052493704e-09,
12496
+ "logits/chosen": -1.169386863708496,
12497
+ "logits/rejected": -1.248286247253418,
12498
+ "logps/chosen": -163.55990600585938,
12499
+ "logps/rejected": -216.13929748535156,
12500
+ "loss": 0.3547,
12501
+ "rewards/accuracies": 1.0,
12502
+ "rewards/chosen": -0.5737725496292114,
12503
+ "rewards/margins": 1.9429514408111572,
12504
+ "rewards/rejected": -2.516724109649658,
12505
+ "step": 1632
12506
+ },
12507
+ {
12508
+ "epoch": 1.8924357582338038,
12509
+ "grad_norm": 76.34596117355929,
12510
+ "learning_rate": 1.5756527706548561e-09,
12511
+ "logits/chosen": -1.2912683486938477,
12512
+ "logits/rejected": -1.2929950952529907,
12513
+ "logps/chosen": -192.79061889648438,
12514
+ "logps/rejected": -216.2474365234375,
12515
+ "loss": 0.4647,
12516
+ "rewards/accuracies": 0.8125,
12517
+ "rewards/chosen": -0.6607008576393127,
12518
+ "rewards/margins": 1.0471830368041992,
12519
+ "rewards/rejected": -1.7078838348388672,
12520
+ "step": 1634
12521
+ },
12522
+ {
12523
+ "epoch": 1.8947520810712994,
12524
+ "grad_norm": 50.49082900430503,
12525
+ "learning_rate": 1.5080616088616882e-09,
12526
+ "logits/chosen": -1.1574630737304688,
12527
+ "logits/rejected": -1.2174662351608276,
12528
+ "logps/chosen": -105.63571166992188,
12529
+ "logps/rejected": -138.35476684570312,
12530
+ "loss": 0.4099,
12531
+ "rewards/accuracies": 0.84375,
12532
+ "rewards/chosen": -0.4468805193901062,
12533
+ "rewards/margins": 1.2918894290924072,
12534
+ "rewards/rejected": -1.7387701272964478,
12535
+ "step": 1636
12536
+ },
12537
+ {
12538
+ "epoch": 1.8970684039087948,
12539
+ "grad_norm": 75.9093198815468,
12540
+ "learning_rate": 1.4419411291413885e-09,
12541
+ "logits/chosen": -1.1982598304748535,
12542
+ "logits/rejected": -1.1942592859268188,
12543
+ "logps/chosen": -191.63914489746094,
12544
+ "logps/rejected": -217.94439697265625,
12545
+ "loss": 0.4268,
12546
+ "rewards/accuracies": 0.71875,
12547
+ "rewards/chosen": -0.6322917938232422,
12548
+ "rewards/margins": 1.844104528427124,
12549
+ "rewards/rejected": -2.4763965606689453,
12550
+ "step": 1638
12551
+ },
12552
+ {
12553
+ "epoch": 1.8993847267462902,
12554
+ "grad_norm": 58.34682402089141,
12555
+ "learning_rate": 1.3772923188052787e-09,
12556
+ "logits/chosen": -1.0590007305145264,
12557
+ "logits/rejected": -1.0922439098358154,
12558
+ "logps/chosen": -90.13923645019531,
12559
+ "logps/rejected": -109.63917541503906,
12560
+ "loss": 0.4401,
12561
+ "rewards/accuracies": 0.84375,
12562
+ "rewards/chosen": -0.1902827024459839,
12563
+ "rewards/margins": 1.1345347166061401,
12564
+ "rewards/rejected": -1.3248172998428345,
12565
+ "step": 1640
12566
+ },
12567
+ {
12568
+ "epoch": 1.9017010495837856,
12569
+ "grad_norm": 60.17667883042894,
12570
+ "learning_rate": 1.3141161431896808e-09,
12571
+ "logits/chosen": -1.292588472366333,
12572
+ "logits/rejected": -1.2948338985443115,
12573
+ "logps/chosen": -141.24307250976562,
12574
+ "logps/rejected": -183.1881866455078,
12575
+ "loss": 0.4093,
12576
+ "rewards/accuracies": 0.84375,
12577
+ "rewards/chosen": -0.06378068029880524,
12578
+ "rewards/margins": 1.4755961894989014,
12579
+ "rewards/rejected": -1.539376974105835,
12580
+ "step": 1642
12581
+ },
12582
+ {
12583
+ "epoch": 1.9040173724212812,
12584
+ "grad_norm": 46.419475472935424,
12585
+ "learning_rate": 1.2524135456415286e-09,
12586
+ "logits/chosen": -1.3303675651550293,
12587
+ "logits/rejected": -1.3957011699676514,
12588
+ "logps/chosen": -197.4314727783203,
12589
+ "logps/rejected": -230.6079559326172,
12590
+ "loss": 0.3864,
12591
+ "rewards/accuracies": 0.84375,
12592
+ "rewards/chosen": -0.5181460380554199,
12593
+ "rewards/margins": 1.6645830869674683,
12594
+ "rewards/rejected": -2.1827290058135986,
12595
+ "step": 1644
12596
+ },
12597
+ {
12598
+ "epoch": 1.9063336952587768,
12599
+ "grad_norm": 63.51262338219536,
12600
+ "learning_rate": 1.1921854475043125e-09,
12601
+ "logits/chosen": -1.3057444095611572,
12602
+ "logits/rejected": -1.3598231077194214,
12603
+ "logps/chosen": -140.8748779296875,
12604
+ "logps/rejected": -162.47158813476562,
12605
+ "loss": 0.438,
12606
+ "rewards/accuracies": 0.8125,
12607
+ "rewards/chosen": -0.24470748007297516,
12608
+ "rewards/margins": 1.0919952392578125,
12609
+ "rewards/rejected": -1.3367027044296265,
12610
+ "step": 1646
12611
+ },
12612
+ {
12613
+ "epoch": 1.9086500180962722,
12614
+ "grad_norm": 59.16346811149136,
12615
+ "learning_rate": 1.133432748104257e-09,
12616
+ "logits/chosen": -1.2664871215820312,
12617
+ "logits/rejected": -1.2415859699249268,
12618
+ "logps/chosen": -168.35910034179688,
12619
+ "logps/rejected": -186.53665161132812,
12620
+ "loss": 0.3824,
12621
+ "rewards/accuracies": 0.75,
12622
+ "rewards/chosen": -0.3128247559070587,
12623
+ "rewards/margins": 1.2026112079620361,
12624
+ "rewards/rejected": -1.5154359340667725,
12625
+ "step": 1648
12626
+ },
12627
+ {
12628
+ "epoch": 1.9109663409337676,
12629
+ "grad_norm": 72.05923858294557,
12630
+ "learning_rate": 1.0761563247369322e-09,
12631
+ "logits/chosen": -1.2224782705307007,
12632
+ "logits/rejected": -1.2964767217636108,
12633
+ "logps/chosen": -149.58824157714844,
12634
+ "logps/rejected": -172.06092834472656,
12635
+ "loss": 0.4327,
12636
+ "rewards/accuracies": 0.8125,
12637
+ "rewards/chosen": -0.10623270273208618,
12638
+ "rewards/margins": 1.0352814197540283,
12639
+ "rewards/rejected": -1.1415140628814697,
12640
+ "step": 1650
12641
+ },
12642
+ {
12643
+ "epoch": 1.913282663771263,
12644
+ "grad_norm": 72.25189940099223,
12645
+ "learning_rate": 1.0203570326541622e-09,
12646
+ "logits/chosen": -1.0865113735198975,
12647
+ "logits/rejected": -1.1271172761917114,
12648
+ "logps/chosen": -103.49948120117188,
12649
+ "logps/rejected": -108.95231628417969,
12650
+ "loss": 0.4442,
12651
+ "rewards/accuracies": 0.8125,
12652
+ "rewards/chosen": -0.29463884234428406,
12653
+ "rewards/margins": 0.5380735397338867,
12654
+ "rewards/rejected": -0.8327123522758484,
12655
+ "step": 1652
12656
+ },
12657
+ {
12658
+ "epoch": 1.9155989866087586,
12659
+ "grad_norm": 58.87468402600475,
12660
+ "learning_rate": 9.660357050512158e-10,
12661
+ "logits/chosen": -1.1705281734466553,
12662
+ "logits/rejected": -1.213219165802002,
12663
+ "logps/chosen": -151.6047821044922,
12664
+ "logps/rejected": -167.16241455078125,
12665
+ "loss": 0.4755,
12666
+ "rewards/accuracies": 0.8125,
12667
+ "rewards/chosen": -1.2385808229446411,
12668
+ "rewards/margins": 0.9532268047332764,
12669
+ "rewards/rejected": -2.191807746887207,
12670
+ "step": 1654
12671
+ },
12672
+ {
12673
+ "epoch": 1.9179153094462542,
12674
+ "grad_norm": 61.13297680975046,
12675
+ "learning_rate": 9.131931530544146e-10,
12676
+ "logits/chosen": -1.1430917978286743,
12677
+ "logits/rejected": -1.1427133083343506,
12678
+ "logps/chosen": -152.1170654296875,
12679
+ "logps/rejected": -184.9088592529297,
12680
+ "loss": 0.4595,
12681
+ "rewards/accuracies": 0.8125,
12682
+ "rewards/chosen": -0.958507239818573,
12683
+ "rewards/margins": 1.5942294597625732,
12684
+ "rewards/rejected": -2.552736759185791,
12685
+ "step": 1656
12686
+ },
12687
+ {
12688
+ "epoch": 1.9202316322837496,
12689
+ "grad_norm": 52.83655360025748,
12690
+ "learning_rate": 8.618301657089877e-10,
12691
+ "logits/chosen": -1.190130352973938,
12692
+ "logits/rejected": -1.2142915725708008,
12693
+ "logps/chosen": -161.54466247558594,
12694
+ "logps/rejected": -182.72946166992188,
12695
+ "loss": 0.3894,
12696
+ "rewards/accuracies": 0.84375,
12697
+ "rewards/chosen": -0.6212272047996521,
12698
+ "rewards/margins": 1.291230320930481,
12699
+ "rewards/rejected": -1.9124574661254883,
12700
+ "step": 1658
12701
+ },
12702
+ {
12703
+ "epoch": 1.922547955121245,
12704
+ "grad_norm": 110.57140073148696,
12705
+ "learning_rate": 8.119475099673035e-10,
12706
+ "logits/chosen": -1.1131267547607422,
12707
+ "logits/rejected": -1.2174605131149292,
12708
+ "logps/chosen": -157.0382843017578,
12709
+ "logps/rejected": -187.213134765625,
12710
+ "loss": 0.4364,
12711
+ "rewards/accuracies": 0.78125,
12712
+ "rewards/chosen": -0.5017825961112976,
12713
+ "rewards/margins": 0.9430520534515381,
12714
+ "rewards/rejected": -1.444834589958191,
12715
+ "step": 1660
12716
+ },
12717
+ {
12718
+ "epoch": 1.9248642779587404,
12719
+ "grad_norm": 53.02147974570211,
12720
+ "learning_rate": 7.635459306773784e-10,
12721
+ "logits/chosen": -1.1875760555267334,
12722
+ "logits/rejected": -1.2432739734649658,
12723
+ "logps/chosen": -147.28131103515625,
12724
+ "logps/rejected": -169.49566650390625,
12725
+ "loss": 0.43,
12726
+ "rewards/accuracies": 0.84375,
12727
+ "rewards/chosen": -0.40067267417907715,
12728
+ "rewards/margins": 1.1304292678833008,
12729
+ "rewards/rejected": -1.531101942062378,
12730
+ "step": 1662
12731
+ },
12732
+ {
12733
+ "epoch": 1.927180600796236,
12734
+ "grad_norm": 64.80466315551176,
12735
+ "learning_rate": 7.166261505718418e-10,
12736
+ "logits/chosen": -1.222117304801941,
12737
+ "logits/rejected": -1.2081456184387207,
12738
+ "logps/chosen": -157.27780151367188,
12739
+ "logps/rejected": -173.7355499267578,
12740
+ "loss": 0.3734,
12741
+ "rewards/accuracies": 0.84375,
12742
+ "rewards/chosen": -0.33614563941955566,
12743
+ "rewards/margins": 1.0949398279190063,
12744
+ "rewards/rejected": -1.431085467338562,
12745
+ "step": 1664
12746
+ },
12747
+ {
12748
+ "epoch": 1.9294969236337316,
12749
+ "grad_norm": 53.26534606993647,
12750
+ "learning_rate": 6.711888702570556e-10,
12751
+ "logits/chosen": -1.2844552993774414,
12752
+ "logits/rejected": -1.3053499460220337,
12753
+ "logps/chosen": -167.5966033935547,
12754
+ "logps/rejected": -168.5198974609375,
12755
+ "loss": 0.4155,
12756
+ "rewards/accuracies": 0.71875,
12757
+ "rewards/chosen": -0.15978145599365234,
12758
+ "rewards/margins": 1.0049140453338623,
12759
+ "rewards/rejected": -1.1646955013275146,
12760
+ "step": 1666
12761
+ },
12762
+ {
12763
+ "epoch": 1.931813246471227,
12764
+ "grad_norm": 44.85242610407527,
12765
+ "learning_rate": 6.272347682026779e-10,
12766
+ "logits/chosen": -1.0309640169143677,
12767
+ "logits/rejected": -1.134310007095337,
12768
+ "logps/chosen": -111.43970489501953,
12769
+ "logps/rejected": -144.99891662597656,
12770
+ "loss": 0.3869,
12771
+ "rewards/accuracies": 0.78125,
12772
+ "rewards/chosen": -0.24206304550170898,
12773
+ "rewards/margins": 1.2624578475952148,
12774
+ "rewards/rejected": -1.5045208930969238,
12775
+ "step": 1668
12776
+ },
12777
+ {
12778
+ "epoch": 1.9341295693087224,
12779
+ "grad_norm": 47.25828490220509,
12780
+ "learning_rate": 5.847645007315937e-10,
12781
+ "logits/chosen": -1.1614665985107422,
12782
+ "logits/rejected": -1.254847764968872,
12783
+ "logps/chosen": -137.72381591796875,
12784
+ "logps/rejected": -143.80300903320312,
12785
+ "loss": 0.4304,
12786
+ "rewards/accuracies": 0.6875,
12787
+ "rewards/chosen": -0.6075265407562256,
12788
+ "rewards/margins": 0.8250117301940918,
12789
+ "rewards/rejected": -1.4325382709503174,
12790
+ "step": 1670
12791
+ },
12792
+ {
12793
+ "epoch": 1.9364458921462178,
12794
+ "grad_norm": 47.60965224972502,
12795
+ "learning_rate": 5.437787020100115e-10,
12796
+ "logits/chosen": -1.2086517810821533,
12797
+ "logits/rejected": -1.1928253173828125,
12798
+ "logps/chosen": -162.096923828125,
12799
+ "logps/rejected": -178.24951171875,
12800
+ "loss": 0.3871,
12801
+ "rewards/accuracies": 0.875,
12802
+ "rewards/chosen": -0.37640607357025146,
12803
+ "rewards/margins": 1.857001781463623,
12804
+ "rewards/rejected": -2.233407974243164,
12805
+ "step": 1672
12806
+ },
12807
+ {
12808
+ "epoch": 1.9387622149837134,
12809
+ "grad_norm": 51.270284022735154,
12810
+ "learning_rate": 5.042779840380595e-10,
12811
+ "logits/chosen": -1.2238942384719849,
12812
+ "logits/rejected": -1.195109486579895,
12813
+ "logps/chosen": -119.4543685913086,
12814
+ "logps/rejected": -133.65127563476562,
12815
+ "loss": 0.4054,
12816
+ "rewards/accuracies": 0.65625,
12817
+ "rewards/chosen": -0.3657826781272888,
12818
+ "rewards/margins": 0.8839918375015259,
12819
+ "rewards/rejected": -1.249774694442749,
12820
+ "step": 1674
12821
+ },
12822
+ {
12823
+ "epoch": 1.941078537821209,
12824
+ "grad_norm": 58.278892064452975,
12825
+ "learning_rate": 4.662629366406601e-10,
12826
+ "logits/chosen": -1.2001346349716187,
12827
+ "logits/rejected": -1.1202467679977417,
12828
+ "logps/chosen": -128.72256469726562,
12829
+ "logps/rejected": -139.1036376953125,
12830
+ "loss": 0.4051,
12831
+ "rewards/accuracies": 0.8125,
12832
+ "rewards/chosen": -0.27061766386032104,
12833
+ "rewards/margins": 1.1671736240386963,
12834
+ "rewards/rejected": -1.437791347503662,
12835
+ "step": 1676
12836
+ },
12837
+ {
12838
+ "epoch": 1.9433948606587044,
12839
+ "grad_norm": 46.614126047623216,
12840
+ "learning_rate": 4.2973412745864744e-10,
12841
+ "logits/chosen": -1.167816162109375,
12842
+ "logits/rejected": -1.1992610692977905,
12843
+ "logps/chosen": -129.0093536376953,
12844
+ "logps/rejected": -169.50445556640625,
12845
+ "loss": 0.3959,
12846
+ "rewards/accuracies": 0.96875,
12847
+ "rewards/chosen": -0.2619994878768921,
12848
+ "rewards/margins": 1.6201242208480835,
12849
+ "rewards/rejected": -1.8821238279342651,
12850
+ "step": 1678
12851
+ },
12852
+ {
12853
+ "epoch": 1.9457111834961998,
12854
+ "grad_norm": 75.8267220593344,
12855
+ "learning_rate": 3.946921019403859e-10,
12856
+ "logits/chosen": -1.2351601123809814,
12857
+ "logits/rejected": -1.3069424629211426,
12858
+ "logps/chosen": -130.69961547851562,
12859
+ "logps/rejected": -150.25662231445312,
12860
+ "loss": 0.4755,
12861
+ "rewards/accuracies": 0.59375,
12862
+ "rewards/chosen": -0.33375632762908936,
12863
+ "rewards/margins": 0.7270826697349548,
12864
+ "rewards/rejected": -1.0608389377593994,
12865
+ "step": 1680
12866
+ },
12867
+ {
12868
+ "epoch": 1.9480275063336951,
12869
+ "grad_norm": 57.21603903329357,
12870
+ "learning_rate": 3.61137383333554e-10,
12871
+ "logits/chosen": -1.1741724014282227,
12872
+ "logits/rejected": -1.158752679824829,
12873
+ "logps/chosen": -158.3549346923828,
12874
+ "logps/rejected": -179.17391967773438,
12875
+ "loss": 0.393,
12876
+ "rewards/accuracies": 0.75,
12877
+ "rewards/chosen": -0.5816279649734497,
12878
+ "rewards/margins": 1.2226191759109497,
12879
+ "rewards/rejected": -1.8042471408843994,
12880
+ "step": 1682
12881
+ },
12882
+ {
12883
+ "epoch": 1.9503438291711908,
12884
+ "grad_norm": 56.808299639154505,
12885
+ "learning_rate": 3.2907047267736186e-10,
12886
+ "logits/chosen": -1.240709900856018,
12887
+ "logits/rejected": -1.223825454711914,
12888
+ "logps/chosen": -163.18728637695312,
12889
+ "logps/rejected": -198.8317413330078,
12890
+ "loss": 0.3877,
12891
+ "rewards/accuracies": 0.78125,
12892
+ "rewards/chosen": -0.28957706689834595,
12893
+ "rewards/margins": 1.6465396881103516,
12894
+ "rewards/rejected": -1.9361168146133423,
12895
+ "step": 1684
12896
+ },
12897
+ {
12898
+ "epoch": 1.9526601520086864,
12899
+ "grad_norm": 52.80591879977487,
12900
+ "learning_rate": 2.9849184879506827e-10,
12901
+ "logits/chosen": -1.1730728149414062,
12902
+ "logits/rejected": -1.1454265117645264,
12903
+ "logps/chosen": -136.7293701171875,
12904
+ "logps/rejected": -151.21914672851562,
12905
+ "loss": 0.3944,
12906
+ "rewards/accuracies": 0.78125,
12907
+ "rewards/chosen": -0.25426185131073,
12908
+ "rewards/margins": 1.232313871383667,
12909
+ "rewards/rejected": -1.4865756034851074,
12910
+ "step": 1686
12911
+ },
12912
+ {
12913
+ "epoch": 1.9549764748461818,
12914
+ "grad_norm": 75.29247959139558,
12915
+ "learning_rate": 2.6940196828681983e-10,
12916
+ "logits/chosen": -1.093564510345459,
12917
+ "logits/rejected": -1.0963504314422607,
12918
+ "logps/chosen": -189.71131896972656,
12919
+ "logps/rejected": -234.08973693847656,
12920
+ "loss": 0.4931,
12921
+ "rewards/accuracies": 0.78125,
12922
+ "rewards/chosen": -1.0700315237045288,
12923
+ "rewards/margins": 1.948075532913208,
12924
+ "rewards/rejected": -3.0181069374084473,
12925
+ "step": 1688
12926
+ },
12927
+ {
12928
+ "epoch": 1.9572927976836771,
12929
+ "grad_norm": 70.12384040466765,
12930
+ "learning_rate": 2.418012655228452e-10,
12931
+ "logits/chosen": -1.2431126832962036,
12932
+ "logits/rejected": -1.2657066583633423,
12933
+ "logps/chosen": -99.85934448242188,
12934
+ "logps/rejected": -121.7491226196289,
12935
+ "loss": 0.426,
12936
+ "rewards/accuracies": 0.75,
12937
+ "rewards/chosen": -0.3242354393005371,
12938
+ "rewards/margins": 0.8404097557067871,
12939
+ "rewards/rejected": -1.1646450757980347,
12940
+ "step": 1690
12941
+ },
12942
+ {
12943
+ "epoch": 1.9596091205211725,
12944
+ "grad_norm": 50.23873116829206,
12945
+ "learning_rate": 2.1569015263697143e-10,
12946
+ "logits/chosen": -1.2286624908447266,
12947
+ "logits/rejected": -1.2570605278015137,
12948
+ "logps/chosen": -145.4255828857422,
12949
+ "logps/rejected": -194.1035919189453,
12950
+ "loss": 0.4198,
12951
+ "rewards/accuracies": 0.90625,
12952
+ "rewards/chosen": -0.5209024548530579,
12953
+ "rewards/margins": 1.7652302980422974,
12954
+ "rewards/rejected": -2.286133050918579,
12955
+ "step": 1692
12956
+ },
12957
+ {
12958
+ "epoch": 1.9619254433586681,
12959
+ "grad_norm": 54.25222586547325,
12960
+ "learning_rate": 1.9106901952045119e-10,
12961
+ "logits/chosen": -1.2050321102142334,
12962
+ "logits/rejected": -1.2619915008544922,
12963
+ "logps/chosen": -179.3286590576172,
12964
+ "logps/rejected": -227.4953155517578,
12965
+ "loss": 0.4315,
12966
+ "rewards/accuracies": 0.9375,
12967
+ "rewards/chosen": -0.5577185750007629,
12968
+ "rewards/margins": 1.80524742603302,
12969
+ "rewards/rejected": -2.3629660606384277,
12970
+ "step": 1694
12971
+ },
12972
+ {
12973
+ "epoch": 1.9642417661961638,
12974
+ "grad_norm": 61.997046360221496,
12975
+ "learning_rate": 1.6793823381614501e-10,
12976
+ "logits/chosen": -1.294581651687622,
12977
+ "logits/rejected": -1.247463583946228,
12978
+ "logps/chosen": -144.93246459960938,
12979
+ "logps/rejected": -168.00628662109375,
12980
+ "loss": 0.444,
12981
+ "rewards/accuracies": 0.9375,
12982
+ "rewards/chosen": -0.0931825190782547,
12983
+ "rewards/margins": 1.4015557765960693,
12984
+ "rewards/rejected": -1.4947383403778076,
12985
+ "step": 1696
12986
+ },
12987
+ {
12988
+ "epoch": 1.9665580890336591,
12989
+ "grad_norm": 82.5016461876686,
12990
+ "learning_rate": 1.4629814091307036e-10,
12991
+ "logits/chosen": -1.2317255735397339,
12992
+ "logits/rejected": -1.2456412315368652,
12993
+ "logps/chosen": -156.1422576904297,
12994
+ "logps/rejected": -151.87107849121094,
12995
+ "loss": 0.417,
12996
+ "rewards/accuracies": 0.75,
12997
+ "rewards/chosen": -0.4730032682418823,
12998
+ "rewards/margins": 1.0971145629882812,
12999
+ "rewards/rejected": -1.570117712020874,
13000
+ "step": 1698
13001
+ },
13002
+ {
13003
+ "epoch": 1.9688744118711545,
13004
+ "grad_norm": 58.642096283997354,
13005
+ "learning_rate": 1.261490639411833e-10,
13006
+ "logits/chosen": -1.1768873929977417,
13007
+ "logits/rejected": -1.2999684810638428,
13008
+ "logps/chosen": -109.31826782226562,
13009
+ "logps/rejected": -134.15371704101562,
13010
+ "loss": 0.3986,
13011
+ "rewards/accuracies": 0.78125,
13012
+ "rewards/chosen": -0.2187974750995636,
13013
+ "rewards/margins": 1.001755714416504,
13014
+ "rewards/rejected": -1.2205531597137451,
13015
+ "step": 1700
13016
+ },
13017
+ {
13018
+ "epoch": 1.9688744118711545,
13019
+ "eval_logits/chosen": -1.215119481086731,
13020
+ "eval_logits/rejected": -1.2099292278289795,
13021
+ "eval_logps/chosen": -144.09429931640625,
13022
+ "eval_logps/rejected": -149.15176391601562,
13023
+ "eval_loss": 0.5949785113334656,
13024
+ "eval_rewards/accuracies": 0.7599999904632568,
13025
+ "eval_rewards/chosen": -0.8463126420974731,
13026
+ "eval_rewards/margins": 0.6735073924064636,
13027
+ "eval_rewards/rejected": -1.5198200941085815,
13028
+ "eval_runtime": 22.9886,
13029
+ "eval_samples_per_second": 4.35,
13030
+ "eval_steps_per_second": 1.087,
13031
+ "step": 1700
13032
+ },
13033
+ {
13034
+ "epoch": 1.97119073470865,
13035
+ "grad_norm": 55.677396344114726,
13036
+ "learning_rate": 1.0749130376659366e-10,
13037
+ "logits/chosen": -1.2230623960494995,
13038
+ "logits/rejected": -1.163780689239502,
13039
+ "logps/chosen": -164.1004638671875,
13040
+ "logps/rejected": -179.64088439941406,
13041
+ "loss": 0.3997,
13042
+ "rewards/accuracies": 0.84375,
13043
+ "rewards/chosen": -0.1451684981584549,
13044
+ "rewards/margins": 1.5081610679626465,
13045
+ "rewards/rejected": -1.653329610824585,
13046
+ "step": 1702
13047
+ },
13048
+ {
13049
+ "epoch": 1.9735070575461455,
13050
+ "grad_norm": 71.96257699524784,
13051
+ "learning_rate": 9.032513898705741e-11,
13052
+ "logits/chosen": -1.2779675722122192,
13053
+ "logits/rejected": -1.2883471250534058,
13054
+ "logps/chosen": -133.36380004882812,
13055
+ "logps/rejected": -153.81741333007812,
13056
+ "loss": 0.467,
13057
+ "rewards/accuracies": 0.8125,
13058
+ "rewards/chosen": -0.13987727463245392,
13059
+ "rewards/margins": 1.16806161403656,
13060
+ "rewards/rejected": -1.307938814163208,
13061
+ "step": 1704
13062
+ },
13063
+ {
13064
+ "epoch": 1.975823380383641,
13065
+ "grad_norm": 49.123608283002156,
13066
+ "learning_rate": 7.465082592782445e-11,
13067
+ "logits/chosen": -1.207802414894104,
13068
+ "logits/rejected": -1.1620241403579712,
13069
+ "logps/chosen": -164.59759521484375,
13070
+ "logps/rejected": -208.24905395507812,
13071
+ "loss": 0.3896,
13072
+ "rewards/accuracies": 0.90625,
13073
+ "rewards/chosen": -0.5080645084381104,
13074
+ "rewards/margins": 2.8074049949645996,
13075
+ "rewards/rejected": -3.315469980239868,
13076
+ "step": 1706
13077
+ },
13078
+ {
13079
+ "epoch": 1.9781397032211365,
13080
+ "grad_norm": 51.95036099086183,
13081
+ "learning_rate": 6.04685986378195e-11,
13082
+ "logits/chosen": -1.25301992893219,
13083
+ "logits/rejected": -1.2303074598312378,
13084
+ "logps/chosen": -160.34934997558594,
13085
+ "logps/rejected": -162.81515502929688,
13086
+ "loss": 0.3678,
13087
+ "rewards/accuracies": 0.875,
13088
+ "rewards/chosen": -0.21462872624397278,
13089
+ "rewards/margins": 1.2838810682296753,
13090
+ "rewards/rejected": -1.4985097646713257,
13091
+ "step": 1708
13092
+ },
13093
+ {
13094
+ "epoch": 1.980456026058632,
13095
+ "grad_norm": 55.67392221664361,
13096
+ "learning_rate": 4.777866888611148e-11,
13097
+ "logits/chosen": -1.1367592811584473,
13098
+ "logits/rejected": -1.2029287815093994,
13099
+ "logps/chosen": -170.26568603515625,
13100
+ "logps/rejected": -203.4815216064453,
13101
+ "loss": 0.3303,
13102
+ "rewards/accuracies": 0.90625,
13103
+ "rewards/chosen": -0.21711598336696625,
13104
+ "rewards/margins": 1.6773165464401245,
13105
+ "rewards/rejected": -1.8944324254989624,
13106
+ "step": 1710
13107
+ },
13108
+ {
13109
+ "epoch": 1.9827723488961273,
13110
+ "grad_norm": 77.61747497941103,
13111
+ "learning_rate": 3.658122615880499e-11,
13112
+ "logits/chosen": -1.198671817779541,
13113
+ "logits/rejected": -1.1844216585159302,
13114
+ "logps/chosen": -188.25486755371094,
13115
+ "logps/rejected": -188.4522247314453,
13116
+ "loss": 0.4122,
13117
+ "rewards/accuracies": 0.75,
13118
+ "rewards/chosen": -0.35006386041641235,
13119
+ "rewards/margins": 1.136628270149231,
13120
+ "rewards/rejected": -1.4866920709609985,
13121
+ "step": 1712
13122
+ },
13123
+ {
13124
+ "epoch": 1.985088671733623,
13125
+ "grad_norm": 72.68985609286901,
13126
+ "learning_rate": 2.687643765615366e-11,
13127
+ "logits/chosen": -1.2630504369735718,
13128
+ "logits/rejected": -1.1126054525375366,
13129
+ "logps/chosen": -170.22010803222656,
13130
+ "logps/rejected": -162.9827880859375,
13131
+ "loss": 0.4154,
13132
+ "rewards/accuracies": 0.8125,
13133
+ "rewards/chosen": -0.1824585199356079,
13134
+ "rewards/margins": 1.2966349124908447,
13135
+ "rewards/rejected": -1.479093313217163,
13136
+ "step": 1714
13137
+ },
13138
+ {
13139
+ "epoch": 1.9874049945711183,
13140
+ "grad_norm": 57.324360587243646,
13141
+ "learning_rate": 1.8664448290106606e-11,
13142
+ "logits/chosen": -1.1106977462768555,
13143
+ "logits/rejected": -1.1625827550888062,
13144
+ "logps/chosen": -127.45535278320312,
13145
+ "logps/rejected": -168.37313842773438,
13146
+ "loss": 0.4271,
13147
+ "rewards/accuracies": 0.90625,
13148
+ "rewards/chosen": -0.41478192806243896,
13149
+ "rewards/margins": 1.4449265003204346,
13150
+ "rewards/rejected": -1.8597084283828735,
13151
+ "step": 1716
13152
+ },
13153
+ {
13154
+ "epoch": 1.989721317408614,
13155
+ "grad_norm": 64.91297510204477,
13156
+ "learning_rate": 1.1945380682132355e-11,
13157
+ "logits/chosen": -1.3344897031784058,
13158
+ "logits/rejected": -1.3637080192565918,
13159
+ "logps/chosen": -155.83399963378906,
13160
+ "logps/rejected": -174.8062744140625,
13161
+ "loss": 0.4455,
13162
+ "rewards/accuracies": 0.8125,
13163
+ "rewards/chosen": -0.2664361596107483,
13164
+ "rewards/margins": 1.3320696353912354,
13165
+ "rewards/rejected": -1.5985058546066284,
13166
+ "step": 1718
13167
+ },
13168
+ {
13169
+ "epoch": 1.9920376402461093,
13170
+ "grad_norm": 55.133928870906985,
13171
+ "learning_rate": 6.719335161364803e-12,
13172
+ "logits/chosen": -1.210727334022522,
13173
+ "logits/rejected": -1.2122191190719604,
13174
+ "logps/chosen": -138.89447021484375,
13175
+ "logps/rejected": -185.5248565673828,
13176
+ "loss": 0.407,
13177
+ "rewards/accuracies": 0.84375,
13178
+ "rewards/chosen": -0.43058472871780396,
13179
+ "rewards/margins": 2.1915080547332764,
13180
+ "rewards/rejected": -2.6220927238464355,
13181
+ "step": 1720
13182
+ },
13183
+ {
13184
+ "epoch": 1.9943539630836047,
13185
+ "grad_norm": 59.1945232431432,
13186
+ "learning_rate": 2.9863897631488e-12,
13187
+ "logits/chosen": -1.1495387554168701,
13188
+ "logits/rejected": -1.2423110008239746,
13189
+ "logps/chosen": -168.22439575195312,
13190
+ "logps/rejected": -205.48471069335938,
13191
+ "loss": 0.3821,
13192
+ "rewards/accuracies": 0.875,
13193
+ "rewards/chosen": -0.808469295501709,
13194
+ "rewards/margins": 1.811312198638916,
13195
+ "rewards/rejected": -2.619781494140625,
13196
+ "step": 1722
13197
+ },
13198
+ {
13199
+ "epoch": 1.9966702859211003,
13200
+ "grad_norm": 50.35911703570131,
13201
+ "learning_rate": 7.466002278522232e-13,
13202
+ "logits/chosen": -1.1888153553009033,
13203
+ "logits/rejected": -1.1903085708618164,
13204
+ "logps/chosen": -183.95315551757812,
13205
+ "logps/rejected": -213.3731689453125,
13206
+ "loss": 0.3585,
13207
+ "rewards/accuracies": 0.90625,
13208
+ "rewards/chosen": -0.34876811504364014,
13209
+ "rewards/margins": 1.8841259479522705,
13210
+ "rewards/rejected": -2.232893943786621,
13211
+ "step": 1724
13212
+ },
13213
+ {
13214
+ "epoch": 1.9989866087585957,
13215
+ "grad_norm": 63.15894144617586,
13216
+ "learning_rate": 0.0,
13217
+ "logits/chosen": -1.270525574684143,
13218
+ "logits/rejected": -1.1932576894760132,
13219
+ "logps/chosen": -133.14906311035156,
13220
+ "logps/rejected": -160.24928283691406,
13221
+ "loss": 0.4361,
13222
+ "rewards/accuracies": 0.875,
13223
+ "rewards/chosen": -0.38358262181282043,
13224
+ "rewards/margins": 1.584211826324463,
13225
+ "rewards/rejected": -1.967794418334961,
13226
+ "step": 1726
13227
  }
13228
  ],
13229
  "logging_steps": 2,
 
13238
  "should_evaluate": false,
13239
  "should_log": false,
13240
  "should_save": true,
13241
+ "should_training_stop": true
13242
  },
13243
  "attributes": {}
13244
  }