RyanYr commited on
Commit
733b998
1 Parent(s): 9d86992

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc8db81f9ddaa29cae59ac5881d092fda01db995e0f4da172a5db08a35bf3d56
3
+ size 24090788996
last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb7cf1637269ec43452f1226ff60ceca7e40a620762f2be21ea5790b95de817
3
+ size 24090788996
last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19679cf0f319b67f5f4781a251d3d3b189ba741e42dd93fa0b69838c528b5804
3
+ size 24090788996
last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b57822d3cbbb7035813723d94a43cdbe6ff635b9d800a7a1445860e28eaff6d
3
+ size 24090788996
last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92080d92f7039ba9a28ee0207869fee3670869dc8f88ff1c586400224153afc1
3
+ size 150693
last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2adaa5dad1fb21d06dd1448a46414244004297e4538dc79e31541160fb413a29
3
+ size 150693
last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e100a2fb0ad2e0b16513edb6bb5e410375b4900628822bec6b2e89cd0a4b6eab
3
+ size 150693
last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f100eb6452bbb9cf518d9995cea742dd60e64f07f51aceb70c76caef63912b63
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step300
 
1
+ global_step600
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2281cb21ad854a3db059aa689d7c74452cb2008fb15f3402ad06513a6ed68d7
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48948a0d9104661249099a4014490ab23bd0ad9a31f6471dee9f9f094a3d63bf
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e49cd637191fe742ce29f784563dc89109beb1128aa3fee60b2a9e24ea94cbdd
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b24fa74f6060bb886c4ad908a9e1cbc5b1d32700a032f508c04604951a8181f
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30aa2ab618d4be5f0ba7e005ac657417f4e515f10a9b0fbb2e7b29fd1ca2b390
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f1fb4fda3abf0c7433b1013f78feb462c78a1d36b3a3336e8e2ed0f8c341706
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cc0b3808e3294cd9ec41a837580c875a4c99d9ec14356c982b552e0c92df4b8
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a42d0ba56615ac5b6ca669d046a3ac05a8adeee3b01ebef26cde18b002e96c8
3
  size 1168138808
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70cc56408014c410353d4dd58ae9b03f4be043f5f800324f66fd8e20e99b840e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef002048764051a71fb00f8f978e9ec32b780dc850bdb059af362cc56494234b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49d1438e98cc9c53a6852464635ce62e9788e61eb3646b73e33813f487c4b6ae
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37194a6d48612e1a46a2d5d317ead97c70d9fc4569b0118fcd5f84c3dc9daa5a
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4388add9cec90932f8ff0100d27a0574d98e1bad52ff89d44e31967d2b4fbfde
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c179483659a784aa1ace2427daff48c556a6bcc3c330e6f3274e4dc95e4b49
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a705d6dfaae4f2c1b4b2be6b25a6eb521ffae6fcba21cc1531e97b60037ed079
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56857c9b117629f35af2c3d64f522d33a9d8aa94faa81ec6956380a895118c4
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0db895d97b25045a4a7490f3a2585ba8b172dd50ebe580778062498c071cac97
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d8f9b18fc83c21830420fd2e6d55afd183068e9c7f7ec7447233ce473235b6a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3474484256243214,
5
  "eval_steps": 100,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2305,6 +2305,2304 @@
2305
  "eval_samples_per_second": 4.06,
2306
  "eval_steps_per_second": 1.015,
2307
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2308
  }
2309
  ],
2310
  "logging_steps": 2,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6948968512486428,
5
  "eval_steps": 100,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2305
  "eval_samples_per_second": 4.06,
2306
  "eval_steps_per_second": 1.015,
2307
  "step": 300
2308
+ },
2309
+ {
2310
+ "epoch": 0.3497647484618169,
2311
+ "grad_norm": 79.77982199610355,
2312
+ "learning_rate": 1.9248010649538775e-07,
2313
+ "logits/chosen": -1.3689723014831543,
2314
+ "logits/rejected": -1.4326424598693848,
2315
+ "logps/chosen": -186.77511596679688,
2316
+ "logps/rejected": -194.57289123535156,
2317
+ "loss": 0.677,
2318
+ "rewards/accuracies": 0.625,
2319
+ "rewards/chosen": 0.07687507569789886,
2320
+ "rewards/margins": 0.245405375957489,
2321
+ "rewards/rejected": -0.16853031516075134,
2322
+ "step": 302
2323
+ },
2324
+ {
2325
+ "epoch": 0.3520810712993123,
2326
+ "grad_norm": 72.83330769963189,
2327
+ "learning_rate": 1.923324026065944e-07,
2328
+ "logits/chosen": -1.2983791828155518,
2329
+ "logits/rejected": -1.301888108253479,
2330
+ "logps/chosen": -96.0470199584961,
2331
+ "logps/rejected": -113.31134796142578,
2332
+ "loss": 0.6028,
2333
+ "rewards/accuracies": 0.625,
2334
+ "rewards/chosen": 0.10890144109725952,
2335
+ "rewards/margins": 0.2682499885559082,
2336
+ "rewards/rejected": -0.1593485325574875,
2337
+ "step": 304
2338
+ },
2339
+ {
2340
+ "epoch": 0.3543973941368078,
2341
+ "grad_norm": 100.14984360245958,
2342
+ "learning_rate": 1.9218332000994458e-07,
2343
+ "logits/chosen": -1.4329365491867065,
2344
+ "logits/rejected": -1.4993162155151367,
2345
+ "logps/chosen": -186.0762176513672,
2346
+ "logps/rejected": -214.90533447265625,
2347
+ "loss": 0.6058,
2348
+ "rewards/accuracies": 0.65625,
2349
+ "rewards/chosen": 0.03735332563519478,
2350
+ "rewards/margins": 0.3314560055732727,
2351
+ "rewards/rejected": -0.2941026985645294,
2352
+ "step": 306
2353
+ },
2354
+ {
2355
+ "epoch": 0.3567137169743033,
2356
+ "grad_norm": 72.19620109844895,
2357
+ "learning_rate": 1.9203286093154026e-07,
2358
+ "logits/chosen": -1.2941675186157227,
2359
+ "logits/rejected": -1.259239673614502,
2360
+ "logps/chosen": -109.15111541748047,
2361
+ "logps/rejected": -109.48808288574219,
2362
+ "loss": 0.6171,
2363
+ "rewards/accuracies": 0.59375,
2364
+ "rewards/chosen": 0.24576213955879211,
2365
+ "rewards/margins": 0.18151941895484924,
2366
+ "rewards/rejected": 0.06424272805452347,
2367
+ "step": 308
2368
+ },
2369
+ {
2370
+ "epoch": 0.35903003981179876,
2371
+ "grad_norm": 106.11628012062671,
2372
+ "learning_rate": 1.9188102761803715e-07,
2373
+ "logits/chosen": -1.4155701398849487,
2374
+ "logits/rejected": -1.469191312789917,
2375
+ "logps/chosen": -192.42648315429688,
2376
+ "logps/rejected": -190.7394561767578,
2377
+ "loss": 0.6785,
2378
+ "rewards/accuracies": 0.65625,
2379
+ "rewards/chosen": -0.15398849546909332,
2380
+ "rewards/margins": 0.36678701639175415,
2381
+ "rewards/rejected": -0.5207754373550415,
2382
+ "step": 310
2383
+ },
2384
+ {
2385
+ "epoch": 0.36134636264929426,
2386
+ "grad_norm": 70.19369769339828,
2387
+ "learning_rate": 1.9172782233661094e-07,
2388
+ "logits/chosen": -1.254553198814392,
2389
+ "logits/rejected": -1.1699531078338623,
2390
+ "logps/chosen": -127.07028198242188,
2391
+ "logps/rejected": -145.60787963867188,
2392
+ "loss": 0.6158,
2393
+ "rewards/accuracies": 0.6875,
2394
+ "rewards/chosen": 0.17219696938991547,
2395
+ "rewards/margins": 0.5162093639373779,
2396
+ "rewards/rejected": -0.34401237964630127,
2397
+ "step": 312
2398
+ },
2399
+ {
2400
+ "epoch": 0.3636626854867897,
2401
+ "grad_norm": 90.77874249334138,
2402
+ "learning_rate": 1.915732473749236e-07,
2403
+ "logits/chosen": -1.2084178924560547,
2404
+ "logits/rejected": -1.1874415874481201,
2405
+ "logps/chosen": -166.15135192871094,
2406
+ "logps/rejected": -176.3106231689453,
2407
+ "loss": 0.629,
2408
+ "rewards/accuracies": 0.59375,
2409
+ "rewards/chosen": 0.1258632242679596,
2410
+ "rewards/margins": 0.4471869468688965,
2411
+ "rewards/rejected": -0.3213237524032593,
2412
+ "step": 314
2413
+ },
2414
+ {
2415
+ "epoch": 0.3659790083242852,
2416
+ "grad_norm": 78.58822831789924,
2417
+ "learning_rate": 1.914173050410892e-07,
2418
+ "logits/chosen": -1.3010833263397217,
2419
+ "logits/rejected": -1.3981972932815552,
2420
+ "logps/chosen": -94.9105453491211,
2421
+ "logps/rejected": -108.66416931152344,
2422
+ "loss": 0.6266,
2423
+ "rewards/accuracies": 0.65625,
2424
+ "rewards/chosen": 0.15446007251739502,
2425
+ "rewards/margins": 0.30908384919166565,
2426
+ "rewards/rejected": -0.15462377667427063,
2427
+ "step": 316
2428
+ },
2429
+ {
2430
+ "epoch": 0.36829533116178065,
2431
+ "grad_norm": 80.5668477187345,
2432
+ "learning_rate": 1.9125999766363932e-07,
2433
+ "logits/chosen": -1.4468637704849243,
2434
+ "logits/rejected": -1.4837853908538818,
2435
+ "logps/chosen": -121.05176544189453,
2436
+ "logps/rejected": -132.3884735107422,
2437
+ "loss": 0.6187,
2438
+ "rewards/accuracies": 0.5625,
2439
+ "rewards/chosen": 0.1501280963420868,
2440
+ "rewards/margins": 0.14075569808483124,
2441
+ "rewards/rejected": 0.009372413158416748,
2442
+ "step": 318
2443
+ },
2444
+ {
2445
+ "epoch": 0.37061165399927615,
2446
+ "grad_norm": 90.09297915942425,
2447
+ "learning_rate": 1.9110132759148843e-07,
2448
+ "logits/chosen": -1.239458680152893,
2449
+ "logits/rejected": -1.2513267993927002,
2450
+ "logps/chosen": -119.19309997558594,
2451
+ "logps/rejected": -135.55023193359375,
2452
+ "loss": 0.6107,
2453
+ "rewards/accuracies": 0.71875,
2454
+ "rewards/chosen": -0.07714903354644775,
2455
+ "rewards/margins": 0.40861696004867554,
2456
+ "rewards/rejected": -0.4857659935951233,
2457
+ "step": 320
2458
+ },
2459
+ {
2460
+ "epoch": 0.37292797683677165,
2461
+ "grad_norm": 130.3488780136265,
2462
+ "learning_rate": 1.9094129719389885e-07,
2463
+ "logits/chosen": -1.3481711149215698,
2464
+ "logits/rejected": -1.328981637954712,
2465
+ "logps/chosen": -192.10084533691406,
2466
+ "logps/rejected": -214.688720703125,
2467
+ "loss": 0.635,
2468
+ "rewards/accuracies": 0.625,
2469
+ "rewards/chosen": -0.4045405685901642,
2470
+ "rewards/margins": 0.6289528012275696,
2471
+ "rewards/rejected": -1.0334933996200562,
2472
+ "step": 322
2473
+ },
2474
+ {
2475
+ "epoch": 0.3752442996742671,
2476
+ "grad_norm": 86.21776365076336,
2477
+ "learning_rate": 1.907799088604451e-07,
2478
+ "logits/chosen": -1.1944794654846191,
2479
+ "logits/rejected": -1.154435157775879,
2480
+ "logps/chosen": -86.31254577636719,
2481
+ "logps/rejected": -97.8081283569336,
2482
+ "loss": 0.6424,
2483
+ "rewards/accuracies": 0.6875,
2484
+ "rewards/chosen": 0.0629437267780304,
2485
+ "rewards/margins": 0.25958341360092163,
2486
+ "rewards/rejected": -0.19663970172405243,
2487
+ "step": 324
2488
+ },
2489
+ {
2490
+ "epoch": 0.3775606225117626,
2491
+ "grad_norm": 109.3152948358386,
2492
+ "learning_rate": 1.9061716500097862e-07,
2493
+ "logits/chosen": -1.3203986883163452,
2494
+ "logits/rejected": -1.3523664474487305,
2495
+ "logps/chosen": -152.81573486328125,
2496
+ "logps/rejected": -161.0247039794922,
2497
+ "loss": 0.6101,
2498
+ "rewards/accuracies": 0.53125,
2499
+ "rewards/chosen": -0.4936632812023163,
2500
+ "rewards/margins": 0.04809580743312836,
2501
+ "rewards/rejected": -0.5417591333389282,
2502
+ "step": 326
2503
+ },
2504
+ {
2505
+ "epoch": 0.37987694534925803,
2506
+ "grad_norm": 91.94400981611243,
2507
+ "learning_rate": 1.904530680455914e-07,
2508
+ "logits/chosen": -1.3758294582366943,
2509
+ "logits/rejected": -1.4080578088760376,
2510
+ "logps/chosen": -146.73672485351562,
2511
+ "logps/rejected": -145.2505645751953,
2512
+ "loss": 0.6278,
2513
+ "rewards/accuracies": 0.65625,
2514
+ "rewards/chosen": 0.012471210211515427,
2515
+ "rewards/margins": 0.4439522325992584,
2516
+ "rewards/rejected": -0.4314810335636139,
2517
+ "step": 328
2518
+ },
2519
+ {
2520
+ "epoch": 0.38219326818675353,
2521
+ "grad_norm": 105.04213501880093,
2522
+ "learning_rate": 1.9028762044457992e-07,
2523
+ "logits/chosen": -1.2461824417114258,
2524
+ "logits/rejected": -1.288218379020691,
2525
+ "logps/chosen": -126.72929382324219,
2526
+ "logps/rejected": -151.31341552734375,
2527
+ "loss": 0.6118,
2528
+ "rewards/accuracies": 0.6875,
2529
+ "rewards/chosen": 0.042198315262794495,
2530
+ "rewards/margins": 0.5073456764221191,
2531
+ "rewards/rejected": -0.46514737606048584,
2532
+ "step": 330
2533
+ },
2534
+ {
2535
+ "epoch": 0.38450959102424903,
2536
+ "grad_norm": 131.99962498687907,
2537
+ "learning_rate": 1.901208246684085e-07,
2538
+ "logits/chosen": -1.345144271850586,
2539
+ "logits/rejected": -1.3419792652130127,
2540
+ "logps/chosen": -138.4906768798828,
2541
+ "logps/rejected": -144.3926239013672,
2542
+ "loss": 0.6483,
2543
+ "rewards/accuracies": 0.625,
2544
+ "rewards/chosen": -0.3029904365539551,
2545
+ "rewards/margins": 0.3120897114276886,
2546
+ "rewards/rejected": -0.6150801181793213,
2547
+ "step": 332
2548
+ },
2549
+ {
2550
+ "epoch": 0.3868259138617445,
2551
+ "grad_norm": 192.23629969436513,
2552
+ "learning_rate": 1.8995268320767252e-07,
2553
+ "logits/chosen": -1.3834903240203857,
2554
+ "logits/rejected": -1.4057523012161255,
2555
+ "logps/chosen": -138.5772705078125,
2556
+ "logps/rejected": -148.1931915283203,
2557
+ "loss": 0.7789,
2558
+ "rewards/accuracies": 0.65625,
2559
+ "rewards/chosen": 0.0129515016451478,
2560
+ "rewards/margins": 0.49864012002944946,
2561
+ "rewards/rejected": -0.4856886565685272,
2562
+ "step": 334
2563
+ },
2564
+ {
2565
+ "epoch": 0.38914223669924,
2566
+ "grad_norm": 82.74631507246218,
2567
+ "learning_rate": 1.897831985730609e-07,
2568
+ "logits/chosen": -1.2497293949127197,
2569
+ "logits/rejected": -1.2685260772705078,
2570
+ "logps/chosen": -135.58956909179688,
2571
+ "logps/rejected": -166.16636657714844,
2572
+ "loss": 0.6435,
2573
+ "rewards/accuracies": 0.8125,
2574
+ "rewards/chosen": -0.062045883387327194,
2575
+ "rewards/margins": 0.9767952561378479,
2576
+ "rewards/rejected": -1.0388411283493042,
2577
+ "step": 336
2578
+ },
2579
+ {
2580
+ "epoch": 0.3914585595367354,
2581
+ "grad_norm": 97.75310342784691,
2582
+ "learning_rate": 1.896123732953191e-07,
2583
+ "logits/chosen": -1.2475745677947998,
2584
+ "logits/rejected": -1.2074342966079712,
2585
+ "logps/chosen": -108.48465728759766,
2586
+ "logps/rejected": -131.79908752441406,
2587
+ "loss": 0.6321,
2588
+ "rewards/accuracies": 0.71875,
2589
+ "rewards/chosen": -0.32877668738365173,
2590
+ "rewards/margins": 0.5046026110649109,
2591
+ "rewards/rejected": -0.8333792686462402,
2592
+ "step": 338
2593
+ },
2594
+ {
2595
+ "epoch": 0.3937748823742309,
2596
+ "grad_norm": 104.56753710703906,
2597
+ "learning_rate": 1.8944020992521088e-07,
2598
+ "logits/chosen": -1.331594467163086,
2599
+ "logits/rejected": -1.4218388795852661,
2600
+ "logps/chosen": -122.07364654541016,
2601
+ "logps/rejected": -144.00531005859375,
2602
+ "loss": 0.6138,
2603
+ "rewards/accuracies": 0.78125,
2604
+ "rewards/chosen": 0.15422941744327545,
2605
+ "rewards/margins": 0.4605112373828888,
2606
+ "rewards/rejected": -0.30628180503845215,
2607
+ "step": 340
2608
+ },
2609
+ {
2610
+ "epoch": 0.39609120521172636,
2611
+ "grad_norm": 104.94507394937493,
2612
+ "learning_rate": 1.8926671103348047e-07,
2613
+ "logits/chosen": -1.3103477954864502,
2614
+ "logits/rejected": -1.3303866386413574,
2615
+ "logps/chosen": -118.01762390136719,
2616
+ "logps/rejected": -128.77285766601562,
2617
+ "loss": 0.698,
2618
+ "rewards/accuracies": 0.53125,
2619
+ "rewards/chosen": -0.1733967363834381,
2620
+ "rewards/margins": 0.22825026512145996,
2621
+ "rewards/rejected": -0.40164700150489807,
2622
+ "step": 342
2623
+ },
2624
+ {
2625
+ "epoch": 0.39840752804922186,
2626
+ "grad_norm": 87.41594646239237,
2627
+ "learning_rate": 1.8909187921081416e-07,
2628
+ "logits/chosen": -1.2882866859436035,
2629
+ "logits/rejected": -1.266202449798584,
2630
+ "logps/chosen": -144.56747436523438,
2631
+ "logps/rejected": -142.6608123779297,
2632
+ "loss": 0.6561,
2633
+ "rewards/accuracies": 0.53125,
2634
+ "rewards/chosen": -0.08110320568084717,
2635
+ "rewards/margins": 0.10048308968544006,
2636
+ "rewards/rejected": -0.18158632516860962,
2637
+ "step": 344
2638
+ },
2639
+ {
2640
+ "epoch": 0.40072385088671736,
2641
+ "grad_norm": 166.0088927921291,
2642
+ "learning_rate": 1.8891571706780144e-07,
2643
+ "logits/chosen": -1.3238105773925781,
2644
+ "logits/rejected": -1.3814265727996826,
2645
+ "logps/chosen": -135.59217834472656,
2646
+ "logps/rejected": -158.6577911376953,
2647
+ "loss": 0.6647,
2648
+ "rewards/accuracies": 0.75,
2649
+ "rewards/chosen": -0.2648026645183563,
2650
+ "rewards/margins": 0.6691212058067322,
2651
+ "rewards/rejected": -0.9339239001274109,
2652
+ "step": 346
2653
+ },
2654
+ {
2655
+ "epoch": 0.4030401737242128,
2656
+ "grad_norm": 92.22069522105578,
2657
+ "learning_rate": 1.8873822723489633e-07,
2658
+ "logits/chosen": -1.3072634935379028,
2659
+ "logits/rejected": -1.3363394737243652,
2660
+ "logps/chosen": -179.68614196777344,
2661
+ "logps/rejected": -213.12120056152344,
2662
+ "loss": 0.6272,
2663
+ "rewards/accuracies": 0.75,
2664
+ "rewards/chosen": -0.018616102635860443,
2665
+ "rewards/margins": 0.4234482944011688,
2666
+ "rewards/rejected": -0.44206440448760986,
2667
+ "step": 348
2668
+ },
2669
+ {
2670
+ "epoch": 0.4053564965617083,
2671
+ "grad_norm": 70.97764990334171,
2672
+ "learning_rate": 1.8855941236237774e-07,
2673
+ "logits/chosen": -1.2639405727386475,
2674
+ "logits/rejected": -1.2773693799972534,
2675
+ "logps/chosen": -133.8863067626953,
2676
+ "logps/rejected": -170.3965606689453,
2677
+ "loss": 0.5784,
2678
+ "rewards/accuracies": 0.65625,
2679
+ "rewards/chosen": 0.36297571659088135,
2680
+ "rewards/margins": 0.6825499534606934,
2681
+ "rewards/rejected": -0.3195742070674896,
2682
+ "step": 350
2683
+ },
2684
+ {
2685
+ "epoch": 0.40767281939920375,
2686
+ "grad_norm": 87.3271520781356,
2687
+ "learning_rate": 1.883792751203102e-07,
2688
+ "logits/chosen": -1.2711012363433838,
2689
+ "logits/rejected": -1.2672007083892822,
2690
+ "logps/chosen": -169.25314331054688,
2691
+ "logps/rejected": -167.83010864257812,
2692
+ "loss": 0.608,
2693
+ "rewards/accuracies": 0.65625,
2694
+ "rewards/chosen": -0.06947077065706253,
2695
+ "rewards/margins": 0.39652663469314575,
2696
+ "rewards/rejected": -0.4659973978996277,
2697
+ "step": 352
2698
+ },
2699
+ {
2700
+ "epoch": 0.40998914223669924,
2701
+ "grad_norm": 82.42288813891042,
2702
+ "learning_rate": 1.8819781819850382e-07,
2703
+ "logits/chosen": -1.2538509368896484,
2704
+ "logits/rejected": -1.2403154373168945,
2705
+ "logps/chosen": -112.01508331298828,
2706
+ "logps/rejected": -122.62294006347656,
2707
+ "loss": 0.6,
2708
+ "rewards/accuracies": 0.78125,
2709
+ "rewards/chosen": 0.310922235250473,
2710
+ "rewards/margins": 0.5129318237304688,
2711
+ "rewards/rejected": -0.20200954377651215,
2712
+ "step": 354
2713
+ },
2714
+ {
2715
+ "epoch": 0.41230546507419474,
2716
+ "grad_norm": 92.66996742577295,
2717
+ "learning_rate": 1.880150443064742e-07,
2718
+ "logits/chosen": -1.1228657960891724,
2719
+ "logits/rejected": -1.1974968910217285,
2720
+ "logps/chosen": -129.4398193359375,
2721
+ "logps/rejected": -178.6856689453125,
2722
+ "loss": 0.6907,
2723
+ "rewards/accuracies": 0.71875,
2724
+ "rewards/chosen": 0.1207706406712532,
2725
+ "rewards/margins": 0.8560737371444702,
2726
+ "rewards/rejected": -0.7353031039237976,
2727
+ "step": 356
2728
+ },
2729
+ {
2730
+ "epoch": 0.4146217879116902,
2731
+ "grad_norm": 85.77942086621498,
2732
+ "learning_rate": 1.8783095617340192e-07,
2733
+ "logits/chosen": -1.3269970417022705,
2734
+ "logits/rejected": -1.3102359771728516,
2735
+ "logps/chosen": -138.91845703125,
2736
+ "logps/rejected": -150.00466918945312,
2737
+ "loss": 0.6704,
2738
+ "rewards/accuracies": 0.53125,
2739
+ "rewards/chosen": -0.25449270009994507,
2740
+ "rewards/margins": 0.06581351906061172,
2741
+ "rewards/rejected": -0.32030627131462097,
2742
+ "step": 358
2743
+ },
2744
+ {
2745
+ "epoch": 0.4169381107491857,
2746
+ "grad_norm": 66.74779859823646,
2747
+ "learning_rate": 1.876455565480918e-07,
2748
+ "logits/chosen": -1.395142912864685,
2749
+ "logits/rejected": -1.4558305740356445,
2750
+ "logps/chosen": -138.25567626953125,
2751
+ "logps/rejected": -142.72232055664062,
2752
+ "loss": 0.608,
2753
+ "rewards/accuracies": 0.65625,
2754
+ "rewards/chosen": 0.22209802269935608,
2755
+ "rewards/margins": 0.33867061138153076,
2756
+ "rewards/rejected": -0.11657258868217468,
2757
+ "step": 360
2758
+ },
2759
+ {
2760
+ "epoch": 0.41925443358668113,
2761
+ "grad_norm": 69.35743210486372,
2762
+ "learning_rate": 1.8745884819893192e-07,
2763
+ "logits/chosen": -1.3764009475708008,
2764
+ "logits/rejected": -1.4009249210357666,
2765
+ "logps/chosen": -125.95867919921875,
2766
+ "logps/rejected": -147.38038635253906,
2767
+ "loss": 0.5892,
2768
+ "rewards/accuracies": 0.46875,
2769
+ "rewards/chosen": 0.04307159036397934,
2770
+ "rewards/margins": 0.25595974922180176,
2771
+ "rewards/rejected": -0.21288815140724182,
2772
+ "step": 362
2773
+ },
2774
+ {
2775
+ "epoch": 0.42157075642417663,
2776
+ "grad_norm": 91.06098837601228,
2777
+ "learning_rate": 1.8727083391385219e-07,
2778
+ "logits/chosen": -1.3126693964004517,
2779
+ "logits/rejected": -1.359320044517517,
2780
+ "logps/chosen": -122.15340423583984,
2781
+ "logps/rejected": -152.7900390625,
2782
+ "loss": 0.6084,
2783
+ "rewards/accuracies": 0.65625,
2784
+ "rewards/chosen": 0.16955101490020752,
2785
+ "rewards/margins": 0.33028605580329895,
2786
+ "rewards/rejected": -0.16073507070541382,
2787
+ "step": 364
2788
+ },
2789
+ {
2790
+ "epoch": 0.4238870792616721,
2791
+ "grad_norm": 60.84745087172502,
2792
+ "learning_rate": 1.8708151650028278e-07,
2793
+ "logits/chosen": -1.3809125423431396,
2794
+ "logits/rejected": -1.403237223625183,
2795
+ "logps/chosen": -109.20733642578125,
2796
+ "logps/rejected": -135.84494018554688,
2797
+ "loss": 0.6428,
2798
+ "rewards/accuracies": 0.65625,
2799
+ "rewards/chosen": 0.13176926970481873,
2800
+ "rewards/margins": 0.44196146726608276,
2801
+ "rewards/rejected": -0.31019219756126404,
2802
+ "step": 366
2803
+ },
2804
+ {
2805
+ "epoch": 0.42620340209916757,
2806
+ "grad_norm": 83.0289812455712,
2807
+ "learning_rate": 1.8689089878511214e-07,
2808
+ "logits/chosen": -1.2712593078613281,
2809
+ "logits/rejected": -1.3146370649337769,
2810
+ "logps/chosen": -104.22183990478516,
2811
+ "logps/rejected": -117.42278289794922,
2812
+ "loss": 0.6601,
2813
+ "rewards/accuracies": 0.59375,
2814
+ "rewards/chosen": -0.04589027911424637,
2815
+ "rewards/margins": 0.24963931739330292,
2816
+ "rewards/rejected": -0.2955296039581299,
2817
+ "step": 368
2818
+ },
2819
+ {
2820
+ "epoch": 0.42851972493666307,
2821
+ "grad_norm": 80.18268668813586,
2822
+ "learning_rate": 1.866989836146449e-07,
2823
+ "logits/chosen": -1.367477536201477,
2824
+ "logits/rejected": -1.4047478437423706,
2825
+ "logps/chosen": -156.0530242919922,
2826
+ "logps/rejected": -166.14857482910156,
2827
+ "loss": 0.6033,
2828
+ "rewards/accuracies": 0.65625,
2829
+ "rewards/chosen": 0.03725311905145645,
2830
+ "rewards/margins": 0.22227245569229126,
2831
+ "rewards/rejected": -0.18501931428909302,
2832
+ "step": 370
2833
+ },
2834
+ {
2835
+ "epoch": 0.4308360477741585,
2836
+ "grad_norm": 85.02940823274966,
2837
+ "learning_rate": 1.8650577385455924e-07,
2838
+ "logits/chosen": -1.3402721881866455,
2839
+ "logits/rejected": -1.3483717441558838,
2840
+ "logps/chosen": -129.09817504882812,
2841
+ "logps/rejected": -133.04421997070312,
2842
+ "loss": 0.578,
2843
+ "rewards/accuracies": 0.71875,
2844
+ "rewards/chosen": 0.19856195151805878,
2845
+ "rewards/margins": 0.32389020919799805,
2846
+ "rewards/rejected": -0.12532827258110046,
2847
+ "step": 372
2848
+ },
2849
+ {
2850
+ "epoch": 0.433152370611654,
2851
+ "grad_norm": 84.18493319136046,
2852
+ "learning_rate": 1.8631127238986416e-07,
2853
+ "logits/chosen": -1.3070781230926514,
2854
+ "logits/rejected": -1.3111450672149658,
2855
+ "logps/chosen": -100.65834045410156,
2856
+ "logps/rejected": -119.19929504394531,
2857
+ "loss": 0.57,
2858
+ "rewards/accuracies": 0.59375,
2859
+ "rewards/chosen": 0.15119151771068573,
2860
+ "rewards/margins": 0.44069719314575195,
2861
+ "rewards/rejected": -0.28950563073158264,
2862
+ "step": 374
2863
+ },
2864
+ {
2865
+ "epoch": 0.43546869344914946,
2866
+ "grad_norm": 78.13851817895889,
2867
+ "learning_rate": 1.8611548212485647e-07,
2868
+ "logits/chosen": -1.3796460628509521,
2869
+ "logits/rejected": -1.4454896450042725,
2870
+ "logps/chosen": -137.24407958984375,
2871
+ "logps/rejected": -168.12208557128906,
2872
+ "loss": 0.6024,
2873
+ "rewards/accuracies": 0.6875,
2874
+ "rewards/chosen": 0.2298029363155365,
2875
+ "rewards/margins": 0.5796483159065247,
2876
+ "rewards/rejected": -0.34984540939331055,
2877
+ "step": 376
2878
+ },
2879
+ {
2880
+ "epoch": 0.43778501628664496,
2881
+ "grad_norm": 87.51427473434556,
2882
+ "learning_rate": 1.8591840598307724e-07,
2883
+ "logits/chosen": -1.3684715032577515,
2884
+ "logits/rejected": -1.41554856300354,
2885
+ "logps/chosen": -156.48861694335938,
2886
+ "logps/rejected": -166.43325805664062,
2887
+ "loss": 0.6099,
2888
+ "rewards/accuracies": 0.8125,
2889
+ "rewards/chosen": 0.05118772014975548,
2890
+ "rewards/margins": 0.6047709584236145,
2891
+ "rewards/rejected": -0.5535832047462463,
2892
+ "step": 378
2893
+ },
2894
+ {
2895
+ "epoch": 0.4401013391241404,
2896
+ "grad_norm": 97.94077875373094,
2897
+ "learning_rate": 1.8572004690726835e-07,
2898
+ "logits/chosen": -1.4304860830307007,
2899
+ "logits/rejected": -1.3829154968261719,
2900
+ "logps/chosen": -137.7032928466797,
2901
+ "logps/rejected": -159.42665100097656,
2902
+ "loss": 0.6256,
2903
+ "rewards/accuracies": 0.75,
2904
+ "rewards/chosen": 0.05912143737077713,
2905
+ "rewards/margins": 0.9181233644485474,
2906
+ "rewards/rejected": -0.8590019941329956,
2907
+ "step": 380
2908
+ },
2909
+ {
2910
+ "epoch": 0.4424176619616359,
2911
+ "grad_norm": 117.47560157505089,
2912
+ "learning_rate": 1.8552040785932843e-07,
2913
+ "logits/chosen": -1.2082271575927734,
2914
+ "logits/rejected": -1.32054603099823,
2915
+ "logps/chosen": -129.3510284423828,
2916
+ "logps/rejected": -139.3075714111328,
2917
+ "loss": 0.6776,
2918
+ "rewards/accuracies": 0.625,
2919
+ "rewards/chosen": 0.08690177649259567,
2920
+ "rewards/margins": 0.17026250064373016,
2921
+ "rewards/rejected": -0.08336074650287628,
2922
+ "step": 382
2923
+ },
2924
+ {
2925
+ "epoch": 0.4447339847991314,
2926
+ "grad_norm": 83.36856549076099,
2927
+ "learning_rate": 1.8531949182026864e-07,
2928
+ "logits/chosen": -1.213942289352417,
2929
+ "logits/rejected": -1.2501431703567505,
2930
+ "logps/chosen": -87.4649658203125,
2931
+ "logps/rejected": -105.31576538085938,
2932
+ "loss": 0.6473,
2933
+ "rewards/accuracies": 0.6875,
2934
+ "rewards/chosen": -0.07069863379001617,
2935
+ "rewards/margins": 0.28086185455322266,
2936
+ "rewards/rejected": -0.3515605032444,
2937
+ "step": 384
2938
+ },
2939
+ {
2940
+ "epoch": 0.44705030763662684,
2941
+ "grad_norm": 93.35125079656054,
2942
+ "learning_rate": 1.851173017901682e-07,
2943
+ "logits/chosen": -1.2774831056594849,
2944
+ "logits/rejected": -1.3458952903747559,
2945
+ "logps/chosen": -134.04624938964844,
2946
+ "logps/rejected": -148.02565002441406,
2947
+ "loss": 0.6516,
2948
+ "rewards/accuracies": 0.5,
2949
+ "rewards/chosen": -0.0875653326511383,
2950
+ "rewards/margins": 0.029335327446460724,
2951
+ "rewards/rejected": -0.11690068244934082,
2952
+ "step": 386
2953
+ },
2954
+ {
2955
+ "epoch": 0.44936663047412234,
2956
+ "grad_norm": 94.38591902404973,
2957
+ "learning_rate": 1.8491384078812957e-07,
2958
+ "logits/chosen": -1.3489183187484741,
2959
+ "logits/rejected": -1.3692617416381836,
2960
+ "logps/chosen": -158.86729431152344,
2961
+ "logps/rejected": -175.22946166992188,
2962
+ "loss": 0.6085,
2963
+ "rewards/accuracies": 0.75,
2964
+ "rewards/chosen": 0.01937798410654068,
2965
+ "rewards/margins": 0.4567859470844269,
2966
+ "rewards/rejected": -0.4374079406261444,
2967
+ "step": 388
2968
+ },
2969
+ {
2970
+ "epoch": 0.4516829533116178,
2971
+ "grad_norm": 82.79409553577226,
2972
+ "learning_rate": 1.847091118522333e-07,
2973
+ "logits/chosen": -1.2354220151901245,
2974
+ "logits/rejected": -1.1955327987670898,
2975
+ "logps/chosen": -100.98146057128906,
2976
+ "logps/rejected": -106.97394561767578,
2977
+ "loss": 0.6118,
2978
+ "rewards/accuracies": 0.75,
2979
+ "rewards/chosen": 0.024054907262325287,
2980
+ "rewards/margins": 0.4244306981563568,
2981
+ "rewards/rejected": -0.40037575364112854,
2982
+ "step": 390
2983
+ },
2984
+ {
2985
+ "epoch": 0.4539992761491133,
2986
+ "grad_norm": 68.23646218496863,
2987
+ "learning_rate": 1.8450311803949288e-07,
2988
+ "logits/chosen": -1.4198896884918213,
2989
+ "logits/rejected": -1.339991807937622,
2990
+ "logps/chosen": -96.33162689208984,
2991
+ "logps/rejected": -106.24251556396484,
2992
+ "loss": 0.626,
2993
+ "rewards/accuracies": 0.71875,
2994
+ "rewards/chosen": 0.14840683341026306,
2995
+ "rewards/margins": 0.3869977295398712,
2996
+ "rewards/rejected": -0.23859092593193054,
2997
+ "step": 392
2998
+ },
2999
+ {
3000
+ "epoch": 0.4563155989866088,
3001
+ "grad_norm": 90.98509885957323,
3002
+ "learning_rate": 1.842958624258088e-07,
3003
+ "logits/chosen": -1.4057539701461792,
3004
+ "logits/rejected": -1.4758132696151733,
3005
+ "logps/chosen": -122.16340637207031,
3006
+ "logps/rejected": -123.98712158203125,
3007
+ "loss": 0.6429,
3008
+ "rewards/accuracies": 0.5,
3009
+ "rewards/chosen": 0.053804248571395874,
3010
+ "rewards/margins": 0.13191546499729156,
3011
+ "rewards/rejected": -0.0781112089753151,
3012
+ "step": 394
3013
+ },
3014
+ {
3015
+ "epoch": 0.4586319218241042,
3016
+ "grad_norm": 90.51866810043896,
3017
+ "learning_rate": 1.8408734810592286e-07,
3018
+ "logits/chosen": -1.3948010206222534,
3019
+ "logits/rejected": -1.4117646217346191,
3020
+ "logps/chosen": -170.54193115234375,
3021
+ "logps/rejected": -179.2427978515625,
3022
+ "loss": 0.5834,
3023
+ "rewards/accuracies": 0.71875,
3024
+ "rewards/chosen": 0.059171393513679504,
3025
+ "rewards/margins": 0.42534855008125305,
3026
+ "rewards/rejected": -0.36617720127105713,
3027
+ "step": 396
3028
+ },
3029
+ {
3030
+ "epoch": 0.4609482446615997,
3031
+ "grad_norm": 93.16409936228983,
3032
+ "learning_rate": 1.838775781933718e-07,
3033
+ "logits/chosen": -1.2591919898986816,
3034
+ "logits/rejected": -1.278662085533142,
3035
+ "logps/chosen": -133.6868133544922,
3036
+ "logps/rejected": -160.47731018066406,
3037
+ "loss": 0.6789,
3038
+ "rewards/accuracies": 0.5625,
3039
+ "rewards/chosen": -0.12275616079568863,
3040
+ "rewards/margins": 0.09390115737915039,
3041
+ "rewards/rejected": -0.21665732562541962,
3042
+ "step": 398
3043
+ },
3044
+ {
3045
+ "epoch": 0.46326456749909517,
3046
+ "grad_norm": 85.0019450300031,
3047
+ "learning_rate": 1.8366655582044093e-07,
3048
+ "logits/chosen": -1.295358419418335,
3049
+ "logits/rejected": -1.3356658220291138,
3050
+ "logps/chosen": -82.7631607055664,
3051
+ "logps/rejected": -102.0246810913086,
3052
+ "loss": 0.6151,
3053
+ "rewards/accuracies": 0.65625,
3054
+ "rewards/chosen": -0.2104763686656952,
3055
+ "rewards/margins": 0.3289705812931061,
3056
+ "rewards/rejected": -0.5394470691680908,
3057
+ "step": 400
3058
+ },
3059
+ {
3060
+ "epoch": 0.46326456749909517,
3061
+ "eval_logits/chosen": -1.3069441318511963,
3062
+ "eval_logits/rejected": -1.3023654222488403,
3063
+ "eval_logps/chosen": -140.2086944580078,
3064
+ "eval_logps/rejected": -139.57632446289062,
3065
+ "eval_loss": 0.6798678040504456,
3066
+ "eval_rewards/accuracies": 0.6000000238418579,
3067
+ "eval_rewards/chosen": -0.4577521085739136,
3068
+ "eval_rewards/margins": 0.10452325642108917,
3069
+ "eval_rewards/rejected": -0.5622754096984863,
3070
+ "eval_runtime": 26.7292,
3071
+ "eval_samples_per_second": 3.741,
3072
+ "eval_steps_per_second": 0.935,
3073
+ "step": 400
3074
+ },
3075
+ {
3076
+ "epoch": 0.46558089033659067,
3077
+ "grad_norm": 107.62190686868198,
3078
+ "learning_rate": 1.834542841381173e-07,
3079
+ "logits/chosen": -1.4000458717346191,
3080
+ "logits/rejected": -1.4169011116027832,
3081
+ "logps/chosen": -187.33409118652344,
3082
+ "logps/rejected": -207.20140075683594,
3083
+ "loss": 0.5555,
3084
+ "rewards/accuracies": 0.53125,
3085
+ "rewards/chosen": -0.11752481758594513,
3086
+ "rewards/margins": 0.4881589412689209,
3087
+ "rewards/rejected": -0.6056837439537048,
3088
+ "step": 402
3089
+ },
3090
+ {
3091
+ "epoch": 0.4678972131740861,
3092
+ "grad_norm": 81.84841168291128,
3093
+ "learning_rate": 1.8324076631604262e-07,
3094
+ "logits/chosen": -1.2451642751693726,
3095
+ "logits/rejected": -1.2964147329330444,
3096
+ "logps/chosen": -136.41270446777344,
3097
+ "logps/rejected": -156.53018188476562,
3098
+ "loss": 0.6282,
3099
+ "rewards/accuracies": 0.625,
3100
+ "rewards/chosen": -0.1907982975244522,
3101
+ "rewards/margins": 0.6749911308288574,
3102
+ "rewards/rejected": -0.8657894134521484,
3103
+ "step": 404
3104
+ },
3105
+ {
3106
+ "epoch": 0.4702135360115816,
3107
+ "grad_norm": 70.51232634632699,
3108
+ "learning_rate": 1.8302600554246598e-07,
3109
+ "logits/chosen": -1.2217371463775635,
3110
+ "logits/rejected": -1.2302532196044922,
3111
+ "logps/chosen": -109.1505355834961,
3112
+ "logps/rejected": -124.399169921875,
3113
+ "loss": 0.5908,
3114
+ "rewards/accuracies": 0.71875,
3115
+ "rewards/chosen": -0.011890493333339691,
3116
+ "rewards/margins": 0.41306906938552856,
3117
+ "rewards/rejected": -0.42495957016944885,
3118
+ "step": 406
3119
+ },
3120
+ {
3121
+ "epoch": 0.4725298588490771,
3122
+ "grad_norm": 157.73563743497198,
3123
+ "learning_rate": 1.8281000502419624e-07,
3124
+ "logits/chosen": -1.316713809967041,
3125
+ "logits/rejected": -1.3389533758163452,
3126
+ "logps/chosen": -123.63529968261719,
3127
+ "logps/rejected": -128.41409301757812,
3128
+ "loss": 0.6549,
3129
+ "rewards/accuracies": 0.59375,
3130
+ "rewards/chosen": -0.4301506578922272,
3131
+ "rewards/margins": 0.16745811700820923,
3132
+ "rewards/rejected": -0.597608745098114,
3133
+ "step": 408
3134
+ },
3135
+ {
3136
+ "epoch": 0.47484618168657255,
3137
+ "grad_norm": 107.85099770446011,
3138
+ "learning_rate": 1.8259276798655412e-07,
3139
+ "logits/chosen": -1.3569673299789429,
3140
+ "logits/rejected": -1.3319692611694336,
3141
+ "logps/chosen": -149.51708984375,
3142
+ "logps/rejected": -185.8908233642578,
3143
+ "loss": 0.6863,
3144
+ "rewards/accuracies": 0.59375,
3145
+ "rewards/chosen": -0.11509159207344055,
3146
+ "rewards/margins": 0.3641398549079895,
3147
+ "rewards/rejected": -0.47923144698143005,
3148
+ "step": 410
3149
+ },
3150
+ {
3151
+ "epoch": 0.47716250452406805,
3152
+ "grad_norm": 109.31239844961944,
3153
+ "learning_rate": 1.8237429767332405e-07,
3154
+ "logits/chosen": -1.3673866987228394,
3155
+ "logits/rejected": -1.4460492134094238,
3156
+ "logps/chosen": -144.90838623046875,
3157
+ "logps/rejected": -157.9684295654297,
3158
+ "loss": 0.6105,
3159
+ "rewards/accuracies": 0.9375,
3160
+ "rewards/chosen": -0.08836071193218231,
3161
+ "rewards/margins": 0.6367740631103516,
3162
+ "rewards/rejected": -0.7251348495483398,
3163
+ "step": 412
3164
+ },
3165
+ {
3166
+ "epoch": 0.4794788273615635,
3167
+ "grad_norm": 79.38161196529609,
3168
+ "learning_rate": 1.8215459734670573e-07,
3169
+ "logits/chosen": -1.341538667678833,
3170
+ "logits/rejected": -1.371129035949707,
3171
+ "logps/chosen": -135.0418243408203,
3172
+ "logps/rejected": -181.38201904296875,
3173
+ "loss": 0.6121,
3174
+ "rewards/accuracies": 0.8125,
3175
+ "rewards/chosen": 0.04946846514940262,
3176
+ "rewards/margins": 0.8221450448036194,
3177
+ "rewards/rejected": -0.7726765871047974,
3178
+ "step": 414
3179
+ },
3180
+ {
3181
+ "epoch": 0.481795150199059,
3182
+ "grad_norm": 98.8037188643182,
3183
+ "learning_rate": 1.8193367028726547e-07,
3184
+ "logits/chosen": -1.1779212951660156,
3185
+ "logits/rejected": -1.2224653959274292,
3186
+ "logps/chosen": -91.48204040527344,
3187
+ "logps/rejected": -109.18719482421875,
3188
+ "loss": 0.6932,
3189
+ "rewards/accuracies": 0.625,
3190
+ "rewards/chosen": 0.05973606929183006,
3191
+ "rewards/margins": 0.11246003955602646,
3192
+ "rewards/rejected": -0.0527239665389061,
3193
+ "step": 416
3194
+ },
3195
+ {
3196
+ "epoch": 0.4841114730365545,
3197
+ "grad_norm": 75.72657378652657,
3198
+ "learning_rate": 1.8171151979388712e-07,
3199
+ "logits/chosen": -1.2831331491470337,
3200
+ "logits/rejected": -1.3463534116744995,
3201
+ "logps/chosen": -155.19076538085938,
3202
+ "logps/rejected": -191.88758850097656,
3203
+ "loss": 0.612,
3204
+ "rewards/accuracies": 0.625,
3205
+ "rewards/chosen": -0.32773423194885254,
3206
+ "rewards/margins": 0.40678921341896057,
3207
+ "rewards/rejected": -0.7345234751701355,
3208
+ "step": 418
3209
+ },
3210
+ {
3211
+ "epoch": 0.48642779587404994,
3212
+ "grad_norm": 88.72825200499656,
3213
+ "learning_rate": 1.8148814918372285e-07,
3214
+ "logits/chosen": -1.2322022914886475,
3215
+ "logits/rejected": -1.2740528583526611,
3216
+ "logps/chosen": -125.58689880371094,
3217
+ "logps/rejected": -145.04537963867188,
3218
+ "loss": 0.6336,
3219
+ "rewards/accuracies": 0.5625,
3220
+ "rewards/chosen": -0.02973347157239914,
3221
+ "rewards/margins": 0.28559258580207825,
3222
+ "rewards/rejected": -0.3153260350227356,
3223
+ "step": 420
3224
+ },
3225
+ {
3226
+ "epoch": 0.48874411871154544,
3227
+ "grad_norm": 84.14984078776182,
3228
+ "learning_rate": 1.8126356179214365e-07,
3229
+ "logits/chosen": -1.3616023063659668,
3230
+ "logits/rejected": -1.3728755712509155,
3231
+ "logps/chosen": -113.55232238769531,
3232
+ "logps/rejected": -120.91179656982422,
3233
+ "loss": 0.6093,
3234
+ "rewards/accuracies": 0.5625,
3235
+ "rewards/chosen": -0.14902538061141968,
3236
+ "rewards/margins": 0.14539653062820435,
3237
+ "rewards/rejected": -0.294421911239624,
3238
+ "step": 422
3239
+ },
3240
+ {
3241
+ "epoch": 0.4910604415490409,
3242
+ "grad_norm": 102.58844455062285,
3243
+ "learning_rate": 1.8103776097268942e-07,
3244
+ "logits/chosen": -1.3973523378372192,
3245
+ "logits/rejected": -1.4224525690078735,
3246
+ "logps/chosen": -146.35865783691406,
3247
+ "logps/rejected": -155.32872009277344,
3248
+ "loss": 0.5969,
3249
+ "rewards/accuracies": 0.6875,
3250
+ "rewards/chosen": -0.18615968525409698,
3251
+ "rewards/margins": 0.1702008694410324,
3252
+ "rewards/rejected": -0.3563604950904846,
3253
+ "step": 424
3254
+ },
3255
+ {
3256
+ "epoch": 0.4933767643865364,
3257
+ "grad_norm": 97.6281549014596,
3258
+ "learning_rate": 1.8081075009701908e-07,
3259
+ "logits/chosen": -1.3393031358718872,
3260
+ "logits/rejected": -1.3568938970565796,
3261
+ "logps/chosen": -156.75132751464844,
3262
+ "logps/rejected": -183.3557891845703,
3263
+ "loss": 0.5524,
3264
+ "rewards/accuracies": 0.6875,
3265
+ "rewards/chosen": 0.006147988140583038,
3266
+ "rewards/margins": 0.6164807677268982,
3267
+ "rewards/rejected": -0.6103328466415405,
3268
+ "step": 426
3269
+ },
3270
+ {
3271
+ "epoch": 0.4956930872240318,
3272
+ "grad_norm": 79.28532180545582,
3273
+ "learning_rate": 1.8058253255486004e-07,
3274
+ "logits/chosen": -1.479441523551941,
3275
+ "logits/rejected": -1.455161213874817,
3276
+ "logps/chosen": -149.18377685546875,
3277
+ "logps/rejected": -175.40121459960938,
3278
+ "loss": 0.6324,
3279
+ "rewards/accuracies": 0.5,
3280
+ "rewards/chosen": -0.16930466890335083,
3281
+ "rewards/margins": 0.3928312659263611,
3282
+ "rewards/rejected": -0.5621359348297119,
3283
+ "step": 428
3284
+ },
3285
+ {
3286
+ "epoch": 0.4980094100615273,
3287
+ "grad_norm": 75.50103825872334,
3288
+ "learning_rate": 1.8035311175395766e-07,
3289
+ "logits/chosen": -1.279894232749939,
3290
+ "logits/rejected": -1.366225004196167,
3291
+ "logps/chosen": -149.7015838623047,
3292
+ "logps/rejected": -169.37600708007812,
3293
+ "loss": 0.634,
3294
+ "rewards/accuracies": 0.625,
3295
+ "rewards/chosen": 0.10414651781320572,
3296
+ "rewards/margins": 0.31277552247047424,
3297
+ "rewards/rejected": -0.20862898230552673,
3298
+ "step": 430
3299
+ },
3300
+ {
3301
+ "epoch": 0.5003257328990228,
3302
+ "grad_norm": 110.71149959510932,
3303
+ "learning_rate": 1.8012249112002445e-07,
3304
+ "logits/chosen": -1.3446143865585327,
3305
+ "logits/rejected": -1.346205234527588,
3306
+ "logps/chosen": -135.6072998046875,
3307
+ "logps/rejected": -148.6031951904297,
3308
+ "loss": 0.6534,
3309
+ "rewards/accuracies": 0.53125,
3310
+ "rewards/chosen": -0.04478984698653221,
3311
+ "rewards/margins": 0.20214848220348358,
3312
+ "rewards/rejected": -0.2469383329153061,
3313
+ "step": 432
3314
+ },
3315
+ {
3316
+ "epoch": 0.5026420557365183,
3317
+ "grad_norm": 99.01624284418935,
3318
+ "learning_rate": 1.7989067409668867e-07,
3319
+ "logits/chosen": -1.3353965282440186,
3320
+ "logits/rejected": -1.3816275596618652,
3321
+ "logps/chosen": -83.31758117675781,
3322
+ "logps/rejected": -101.72441101074219,
3323
+ "loss": 0.65,
3324
+ "rewards/accuracies": 0.75,
3325
+ "rewards/chosen": 0.16134825348854065,
3326
+ "rewards/margins": 0.24150311946868896,
3327
+ "rewards/rejected": -0.0801548883318901,
3328
+ "step": 434
3329
+ },
3330
+ {
3331
+ "epoch": 0.5049583785740137,
3332
+ "grad_norm": 85.01833595262721,
3333
+ "learning_rate": 1.7965766414544326e-07,
3334
+ "logits/chosen": -1.3208928108215332,
3335
+ "logits/rejected": -1.4323692321777344,
3336
+ "logps/chosen": -170.11387634277344,
3337
+ "logps/rejected": -190.21917724609375,
3338
+ "loss": 0.5937,
3339
+ "rewards/accuracies": 0.6875,
3340
+ "rewards/chosen": -0.06436862796545029,
3341
+ "rewards/margins": 0.4921523928642273,
3342
+ "rewards/rejected": -0.5565209984779358,
3343
+ "step": 436
3344
+ },
3345
+ {
3346
+ "epoch": 0.5072747014115092,
3347
+ "grad_norm": 78.22902080084621,
3348
+ "learning_rate": 1.794234647455938e-07,
3349
+ "logits/chosen": -1.5033388137817383,
3350
+ "logits/rejected": -1.4195587635040283,
3351
+ "logps/chosen": -167.2239227294922,
3352
+ "logps/rejected": -187.388427734375,
3353
+ "loss": 0.5993,
3354
+ "rewards/accuracies": 0.71875,
3355
+ "rewards/chosen": -0.00830845721065998,
3356
+ "rewards/margins": 0.36755993962287903,
3357
+ "rewards/rejected": -0.37586843967437744,
3358
+ "step": 438
3359
+ },
3360
+ {
3361
+ "epoch": 0.5095910242490047,
3362
+ "grad_norm": 93.03449604866357,
3363
+ "learning_rate": 1.7918807939420688e-07,
3364
+ "logits/chosen": -1.2785309553146362,
3365
+ "logits/rejected": -1.3855379819869995,
3366
+ "logps/chosen": -106.93773651123047,
3367
+ "logps/rejected": -136.5991668701172,
3368
+ "loss": 0.5876,
3369
+ "rewards/accuracies": 0.6875,
3370
+ "rewards/chosen": 0.07631123065948486,
3371
+ "rewards/margins": 0.45985180139541626,
3372
+ "rewards/rejected": -0.3835405707359314,
3373
+ "step": 440
3374
+ },
3375
+ {
3376
+ "epoch": 0.5119073470865002,
3377
+ "grad_norm": 98.9210182597883,
3378
+ "learning_rate": 1.7895151160605755e-07,
3379
+ "logits/chosen": -1.4166314601898193,
3380
+ "logits/rejected": -1.3835158348083496,
3381
+ "logps/chosen": -187.5051727294922,
3382
+ "logps/rejected": -196.1830596923828,
3383
+ "loss": 0.5841,
3384
+ "rewards/accuracies": 0.59375,
3385
+ "rewards/chosen": -0.13197794556617737,
3386
+ "rewards/margins": 0.36578553915023804,
3387
+ "rewards/rejected": -0.49776342511177063,
3388
+ "step": 442
3389
+ },
3390
+ {
3391
+ "epoch": 0.5142236699239957,
3392
+ "grad_norm": 83.90736102267026,
3393
+ "learning_rate": 1.7871376491357716e-07,
3394
+ "logits/chosen": -1.3803664445877075,
3395
+ "logits/rejected": -1.3876008987426758,
3396
+ "logps/chosen": -147.97230529785156,
3397
+ "logps/rejected": -158.1250762939453,
3398
+ "loss": 0.6266,
3399
+ "rewards/accuracies": 0.65625,
3400
+ "rewards/chosen": 0.05537159740924835,
3401
+ "rewards/margins": 0.3145188093185425,
3402
+ "rewards/rejected": -0.2591472268104553,
3403
+ "step": 444
3404
+ },
3405
+ {
3406
+ "epoch": 0.5165399927614911,
3407
+ "grad_norm": 83.18792426148275,
3408
+ "learning_rate": 1.7847484286680036e-07,
3409
+ "logits/chosen": -1.2037944793701172,
3410
+ "logits/rejected": -1.3015272617340088,
3411
+ "logps/chosen": -116.46647644042969,
3412
+ "logps/rejected": -133.59059143066406,
3413
+ "loss": 0.6422,
3414
+ "rewards/accuracies": 0.71875,
3415
+ "rewards/chosen": -0.2899719476699829,
3416
+ "rewards/margins": 0.47784021496772766,
3417
+ "rewards/rejected": -0.767812192440033,
3418
+ "step": 446
3419
+ },
3420
+ {
3421
+ "epoch": 0.5188563155989866,
3422
+ "grad_norm": 86.96480601224319,
3423
+ "learning_rate": 1.782347490333123e-07,
3424
+ "logits/chosen": -1.3997318744659424,
3425
+ "logits/rejected": -1.3888890743255615,
3426
+ "logps/chosen": -168.47235107421875,
3427
+ "logps/rejected": -173.81881713867188,
3428
+ "loss": 0.5883,
3429
+ "rewards/accuracies": 0.65625,
3430
+ "rewards/chosen": 0.2631508708000183,
3431
+ "rewards/margins": 0.40129777789115906,
3432
+ "rewards/rejected": -0.13814686238765717,
3433
+ "step": 448
3434
+ },
3435
+ {
3436
+ "epoch": 0.5211726384364821,
3437
+ "grad_norm": 68.31450376756777,
3438
+ "learning_rate": 1.7799348699819518e-07,
3439
+ "logits/chosen": -1.3524158000946045,
3440
+ "logits/rejected": -1.3299603462219238,
3441
+ "logps/chosen": -121.39910888671875,
3442
+ "logps/rejected": -131.10423278808594,
3443
+ "loss": 0.5874,
3444
+ "rewards/accuracies": 0.75,
3445
+ "rewards/chosen": 0.28284794092178345,
3446
+ "rewards/margins": 0.3859240412712097,
3447
+ "rewards/rejected": -0.10307610780000687,
3448
+ "step": 450
3449
+ },
3450
+ {
3451
+ "epoch": 0.5234889612739776,
3452
+ "grad_norm": 111.76852046416136,
3453
+ "learning_rate": 1.7775106036397474e-07,
3454
+ "logits/chosen": -1.2830047607421875,
3455
+ "logits/rejected": -1.3414244651794434,
3456
+ "logps/chosen": -130.662353515625,
3457
+ "logps/rejected": -179.69061279296875,
3458
+ "loss": 0.5992,
3459
+ "rewards/accuracies": 0.75,
3460
+ "rewards/chosen": 0.22120808064937592,
3461
+ "rewards/margins": 1.2555629014968872,
3462
+ "rewards/rejected": -1.0343549251556396,
3463
+ "step": 452
3464
+ },
3465
+ {
3466
+ "epoch": 0.525805284111473,
3467
+ "grad_norm": 67.76003255019495,
3468
+ "learning_rate": 1.775074727505667e-07,
3469
+ "logits/chosen": -1.1533057689666748,
3470
+ "logits/rejected": -1.294029951095581,
3471
+ "logps/chosen": -149.02322387695312,
3472
+ "logps/rejected": -172.8708953857422,
3473
+ "loss": 0.5608,
3474
+ "rewards/accuracies": 0.5,
3475
+ "rewards/chosen": -0.04835113137960434,
3476
+ "rewards/margins": 0.28162479400634766,
3477
+ "rewards/rejected": -0.3299759328365326,
3478
+ "step": 454
3479
+ },
3480
+ {
3481
+ "epoch": 0.5281216069489685,
3482
+ "grad_norm": 85.70884472679678,
3483
+ "learning_rate": 1.7726272779522228e-07,
3484
+ "logits/chosen": -1.2949302196502686,
3485
+ "logits/rejected": -1.387807846069336,
3486
+ "logps/chosen": -159.39170837402344,
3487
+ "logps/rejected": -189.28244018554688,
3488
+ "loss": 0.6753,
3489
+ "rewards/accuracies": 0.71875,
3490
+ "rewards/chosen": 0.12795251607894897,
3491
+ "rewards/margins": 0.3282526135444641,
3492
+ "rewards/rejected": -0.20030008256435394,
3493
+ "step": 456
3494
+ },
3495
+ {
3496
+ "epoch": 0.530437929786464,
3497
+ "grad_norm": 69.96049512457706,
3498
+ "learning_rate": 1.7701682915247437e-07,
3499
+ "logits/chosen": -1.1357134580612183,
3500
+ "logits/rejected": -1.2111129760742188,
3501
+ "logps/chosen": -168.00326538085938,
3502
+ "logps/rejected": -185.22506713867188,
3503
+ "loss": 0.6102,
3504
+ "rewards/accuracies": 0.71875,
3505
+ "rewards/chosen": -0.46182161569595337,
3506
+ "rewards/margins": 0.4477265477180481,
3507
+ "rewards/rejected": -0.9095481634140015,
3508
+ "step": 458
3509
+ },
3510
+ {
3511
+ "epoch": 0.5327542526239595,
3512
+ "grad_norm": 101.3186304412605,
3513
+ "learning_rate": 1.7676978049408259e-07,
3514
+ "logits/chosen": -1.3433293104171753,
3515
+ "logits/rejected": -1.3274402618408203,
3516
+ "logps/chosen": -129.25802612304688,
3517
+ "logps/rejected": -149.58999633789062,
3518
+ "loss": 0.6877,
3519
+ "rewards/accuracies": 0.65625,
3520
+ "rewards/chosen": -0.09710556268692017,
3521
+ "rewards/margins": 0.5576457977294922,
3522
+ "rewards/rejected": -0.6547513604164124,
3523
+ "step": 460
3524
+ },
3525
+ {
3526
+ "epoch": 0.535070575461455,
3527
+ "grad_norm": 101.53493027467981,
3528
+ "learning_rate": 1.7652158550897863e-07,
3529
+ "logits/chosen": -1.2119991779327393,
3530
+ "logits/rejected": -1.254407525062561,
3531
+ "logps/chosen": -124.32587432861328,
3532
+ "logps/rejected": -141.7906036376953,
3533
+ "loss": 0.6527,
3534
+ "rewards/accuracies": 0.625,
3535
+ "rewards/chosen": -0.07976742088794708,
3536
+ "rewards/margins": 0.11116158217191696,
3537
+ "rewards/rejected": -0.19092898070812225,
3538
+ "step": 462
3539
+ },
3540
+ {
3541
+ "epoch": 0.5373868982989504,
3542
+ "grad_norm": 72.67582060276438,
3543
+ "learning_rate": 1.7627224790321116e-07,
3544
+ "logits/chosen": -1.3650070428848267,
3545
+ "logits/rejected": -1.3934192657470703,
3546
+ "logps/chosen": -111.0053939819336,
3547
+ "logps/rejected": -128.06703186035156,
3548
+ "loss": 0.6384,
3549
+ "rewards/accuracies": 0.625,
3550
+ "rewards/chosen": 0.09877490997314453,
3551
+ "rewards/margins": 0.22652901709079742,
3552
+ "rewards/rejected": -0.1277541220188141,
3553
+ "step": 464
3554
+ },
3555
+ {
3556
+ "epoch": 0.5397032211364459,
3557
+ "grad_norm": 104.80291110492522,
3558
+ "learning_rate": 1.7602177139989042e-07,
3559
+ "logits/chosen": -1.2948188781738281,
3560
+ "logits/rejected": -1.3249576091766357,
3561
+ "logps/chosen": -113.75486755371094,
3562
+ "logps/rejected": -135.57427978515625,
3563
+ "loss": 0.6462,
3564
+ "rewards/accuracies": 0.59375,
3565
+ "rewards/chosen": 0.04134136065840721,
3566
+ "rewards/margins": 0.30063849687576294,
3567
+ "rewards/rejected": -0.25929710268974304,
3568
+ "step": 466
3569
+ },
3570
+ {
3571
+ "epoch": 0.5420195439739414,
3572
+ "grad_norm": 78.82420405990091,
3573
+ "learning_rate": 1.7577015973913274e-07,
3574
+ "logits/chosen": -1.2992827892303467,
3575
+ "logits/rejected": -1.3570318222045898,
3576
+ "logps/chosen": -131.05203247070312,
3577
+ "logps/rejected": -151.40420532226562,
3578
+ "loss": 0.6198,
3579
+ "rewards/accuracies": 0.71875,
3580
+ "rewards/chosen": 0.12259967625141144,
3581
+ "rewards/margins": 0.3709834814071655,
3582
+ "rewards/rejected": -0.2483838051557541,
3583
+ "step": 468
3584
+ },
3585
+ {
3586
+ "epoch": 0.5443358668114369,
3587
+ "grad_norm": 72.33642230267687,
3588
+ "learning_rate": 1.755174166780045e-07,
3589
+ "logits/chosen": -1.1955764293670654,
3590
+ "logits/rejected": -1.304951786994934,
3591
+ "logps/chosen": -132.34945678710938,
3592
+ "logps/rejected": -160.3063201904297,
3593
+ "loss": 0.5581,
3594
+ "rewards/accuracies": 0.78125,
3595
+ "rewards/chosen": 0.14929035305976868,
3596
+ "rewards/margins": 0.9690365791320801,
3597
+ "rewards/rejected": -0.8197463154792786,
3598
+ "step": 470
3599
+ },
3600
+ {
3601
+ "epoch": 0.5466521896489324,
3602
+ "grad_norm": 95.99345130843376,
3603
+ "learning_rate": 1.7526354599046632e-07,
3604
+ "logits/chosen": -1.3738641738891602,
3605
+ "logits/rejected": -1.4558396339416504,
3606
+ "logps/chosen": -124.96098327636719,
3607
+ "logps/rejected": -148.17123413085938,
3608
+ "loss": 0.6421,
3609
+ "rewards/accuracies": 0.6875,
3610
+ "rewards/chosen": 0.09812385588884354,
3611
+ "rewards/margins": 0.24952289462089539,
3612
+ "rewards/rejected": -0.15139903128147125,
3613
+ "step": 472
3614
+ },
3615
+ {
3616
+ "epoch": 0.5489685124864278,
3617
+ "grad_norm": 80.16504208727451,
3618
+ "learning_rate": 1.7500855146731648e-07,
3619
+ "logits/chosen": -1.2267169952392578,
3620
+ "logits/rejected": -1.2515380382537842,
3621
+ "logps/chosen": -148.4540557861328,
3622
+ "logps/rejected": -180.935791015625,
3623
+ "loss": 0.6187,
3624
+ "rewards/accuracies": 0.59375,
3625
+ "rewards/chosen": -0.06260286271572113,
3626
+ "rewards/margins": 1.7078866958618164,
3627
+ "rewards/rejected": -1.7704894542694092,
3628
+ "step": 474
3629
+ },
3630
+ {
3631
+ "epoch": 0.5512848353239233,
3632
+ "grad_norm": 113.11770155446688,
3633
+ "learning_rate": 1.747524369161343e-07,
3634
+ "logits/chosen": -1.3779189586639404,
3635
+ "logits/rejected": -1.3472117185592651,
3636
+ "logps/chosen": -137.42312622070312,
3637
+ "logps/rejected": -141.66329956054688,
3638
+ "loss": 0.6569,
3639
+ "rewards/accuracies": 0.59375,
3640
+ "rewards/chosen": -0.24386143684387207,
3641
+ "rewards/margins": 0.2617953419685364,
3642
+ "rewards/rejected": -0.5056568384170532,
3643
+ "step": 476
3644
+ },
3645
+ {
3646
+ "epoch": 0.5536011581614187,
3647
+ "grad_norm": 119.83970905986772,
3648
+ "learning_rate": 1.744952061612234e-07,
3649
+ "logits/chosen": -1.4478602409362793,
3650
+ "logits/rejected": -1.470253348350525,
3651
+ "logps/chosen": -162.07476806640625,
3652
+ "logps/rejected": -187.415283203125,
3653
+ "loss": 0.6087,
3654
+ "rewards/accuracies": 0.6875,
3655
+ "rewards/chosen": 0.03821418434381485,
3656
+ "rewards/margins": 0.7775447368621826,
3657
+ "rewards/rejected": -0.739330530166626,
3658
+ "step": 478
3659
+ },
3660
+ {
3661
+ "epoch": 0.5559174809989142,
3662
+ "grad_norm": 82.18148965783794,
3663
+ "learning_rate": 1.7423686304355468e-07,
3664
+ "logits/chosen": -1.4132378101348877,
3665
+ "logits/rejected": -1.4143118858337402,
3666
+ "logps/chosen": -135.87957763671875,
3667
+ "logps/rejected": -154.1642608642578,
3668
+ "loss": 0.604,
3669
+ "rewards/accuracies": 0.65625,
3670
+ "rewards/chosen": -0.36500078439712524,
3671
+ "rewards/margins": 0.42764222621917725,
3672
+ "rewards/rejected": -0.7926430106163025,
3673
+ "step": 480
3674
+ },
3675
+ {
3676
+ "epoch": 0.5582338038364097,
3677
+ "grad_norm": 80.80323897214724,
3678
+ "learning_rate": 1.7397741142070867e-07,
3679
+ "logits/chosen": -1.3779712915420532,
3680
+ "logits/rejected": -1.3945672512054443,
3681
+ "logps/chosen": -172.9818115234375,
3682
+ "logps/rejected": -181.16062927246094,
3683
+ "loss": 0.5964,
3684
+ "rewards/accuracies": 0.6875,
3685
+ "rewards/chosen": 0.04211435094475746,
3686
+ "rewards/margins": 0.4079417586326599,
3687
+ "rewards/rejected": -0.36582741141319275,
3688
+ "step": 482
3689
+ },
3690
+ {
3691
+ "epoch": 0.5605501266739051,
3692
+ "grad_norm": 76.61028661180849,
3693
+ "learning_rate": 1.737168551668182e-07,
3694
+ "logits/chosen": -1.190808653831482,
3695
+ "logits/rejected": -1.271024465560913,
3696
+ "logps/chosen": -131.51797485351562,
3697
+ "logps/rejected": -167.06590270996094,
3698
+ "loss": 0.5975,
3699
+ "rewards/accuracies": 0.75,
3700
+ "rewards/chosen": -0.07135076820850372,
3701
+ "rewards/margins": 0.8735796213150024,
3702
+ "rewards/rejected": -0.9449302554130554,
3703
+ "step": 484
3704
+ },
3705
+ {
3706
+ "epoch": 0.5628664495114006,
3707
+ "grad_norm": 80.91548302041826,
3708
+ "learning_rate": 1.7345519817251053e-07,
3709
+ "logits/chosen": -1.3176366090774536,
3710
+ "logits/rejected": -1.331200122833252,
3711
+ "logps/chosen": -145.1810760498047,
3712
+ "logps/rejected": -171.1893768310547,
3713
+ "loss": 0.6177,
3714
+ "rewards/accuracies": 0.625,
3715
+ "rewards/chosen": -0.01799055188894272,
3716
+ "rewards/margins": 0.4896019399166107,
3717
+ "rewards/rejected": -0.507592499256134,
3718
+ "step": 486
3719
+ },
3720
+ {
3721
+ "epoch": 0.5651827723488961,
3722
+ "grad_norm": 89.40658710689003,
3723
+ "learning_rate": 1.7319244434484895e-07,
3724
+ "logits/chosen": -1.2093366384506226,
3725
+ "logits/rejected": -1.1616159677505493,
3726
+ "logps/chosen": -140.53761291503906,
3727
+ "logps/rejected": -141.9064483642578,
3728
+ "loss": 0.6064,
3729
+ "rewards/accuracies": 0.6875,
3730
+ "rewards/chosen": -0.2011549472808838,
3731
+ "rewards/margins": 0.3880937099456787,
3732
+ "rewards/rejected": -0.5892486572265625,
3733
+ "step": 488
3734
+ },
3735
+ {
3736
+ "epoch": 0.5674990951863916,
3737
+ "grad_norm": 76.26303147749239,
3738
+ "learning_rate": 1.7292859760727492e-07,
3739
+ "logits/chosen": -1.2799924612045288,
3740
+ "logits/rejected": -1.296557903289795,
3741
+ "logps/chosen": -117.47547912597656,
3742
+ "logps/rejected": -129.87294006347656,
3743
+ "loss": 0.6132,
3744
+ "rewards/accuracies": 0.71875,
3745
+ "rewards/chosen": -0.10736295580863953,
3746
+ "rewards/margins": 0.3569309115409851,
3747
+ "rewards/rejected": -0.464293897151947,
3748
+ "step": 490
3749
+ },
3750
+ {
3751
+ "epoch": 0.5698154180238871,
3752
+ "grad_norm": 95.13972864679343,
3753
+ "learning_rate": 1.7266366189954905e-07,
3754
+ "logits/chosen": -1.348731517791748,
3755
+ "logits/rejected": -1.3340685367584229,
3756
+ "logps/chosen": -150.54696655273438,
3757
+ "logps/rejected": -185.81204223632812,
3758
+ "loss": 0.6421,
3759
+ "rewards/accuracies": 0.59375,
3760
+ "rewards/chosen": -0.1124522015452385,
3761
+ "rewards/margins": 0.7442688941955566,
3762
+ "rewards/rejected": -0.856721043586731,
3763
+ "step": 492
3764
+ },
3765
+ {
3766
+ "epoch": 0.5721317408613825,
3767
+ "grad_norm": 106.06131234014966,
3768
+ "learning_rate": 1.7239764117769258e-07,
3769
+ "logits/chosen": -1.3093186616897583,
3770
+ "logits/rejected": -1.3834538459777832,
3771
+ "logps/chosen": -193.04637145996094,
3772
+ "logps/rejected": -233.44293212890625,
3773
+ "loss": 0.6109,
3774
+ "rewards/accuracies": 0.78125,
3775
+ "rewards/chosen": -0.16808415949344635,
3776
+ "rewards/margins": 1.2206228971481323,
3777
+ "rewards/rejected": -1.388707160949707,
3778
+ "step": 494
3779
+ },
3780
+ {
3781
+ "epoch": 0.574448063698878,
3782
+ "grad_norm": 96.07655487217647,
3783
+ "learning_rate": 1.7213053941392816e-07,
3784
+ "logits/chosen": -1.330100417137146,
3785
+ "logits/rejected": -1.354781150817871,
3786
+ "logps/chosen": -157.2327880859375,
3787
+ "logps/rejected": -173.35081481933594,
3788
+ "loss": 0.572,
3789
+ "rewards/accuracies": 0.625,
3790
+ "rewards/chosen": 0.055093757808208466,
3791
+ "rewards/margins": 0.6396900415420532,
3792
+ "rewards/rejected": -0.5845962166786194,
3793
+ "step": 496
3794
+ },
3795
+ {
3796
+ "epoch": 0.5767643865363735,
3797
+ "grad_norm": 90.87905253835972,
3798
+ "learning_rate": 1.7186236059662046e-07,
3799
+ "logits/chosen": -1.4015512466430664,
3800
+ "logits/rejected": -1.4518334865570068,
3801
+ "logps/chosen": -132.65196228027344,
3802
+ "logps/rejected": -143.88650512695312,
3803
+ "loss": 0.6587,
3804
+ "rewards/accuracies": 0.59375,
3805
+ "rewards/chosen": -0.07620470970869064,
3806
+ "rewards/margins": 0.09850985556840897,
3807
+ "rewards/rejected": -0.1747145652770996,
3808
+ "step": 498
3809
+ },
3810
+ {
3811
+ "epoch": 0.579080709373869,
3812
+ "grad_norm": 58.33509354958709,
3813
+ "learning_rate": 1.7159310873021693e-07,
3814
+ "logits/chosen": -1.464751124382019,
3815
+ "logits/rejected": -1.4334102869033813,
3816
+ "logps/chosen": -111.387939453125,
3817
+ "logps/rejected": -117.49159240722656,
3818
+ "loss": 0.5577,
3819
+ "rewards/accuracies": 0.78125,
3820
+ "rewards/chosen": 0.29490286111831665,
3821
+ "rewards/margins": 0.8083434700965881,
3822
+ "rewards/rejected": -0.5134405493736267,
3823
+ "step": 500
3824
+ },
3825
+ {
3826
+ "epoch": 0.579080709373869,
3827
+ "eval_logits/chosen": -1.3100072145462036,
3828
+ "eval_logits/rejected": -1.304487943649292,
3829
+ "eval_logps/chosen": -139.44586181640625,
3830
+ "eval_logps/rejected": -139.48992919921875,
3831
+ "eval_loss": 0.654407262802124,
3832
+ "eval_rewards/accuracies": 0.6000000238418579,
3833
+ "eval_rewards/chosen": -0.38146913051605225,
3834
+ "eval_rewards/margins": 0.17216716706752777,
3835
+ "eval_rewards/rejected": -0.5536363124847412,
3836
+ "eval_runtime": 24.0833,
3837
+ "eval_samples_per_second": 4.152,
3838
+ "eval_steps_per_second": 1.038,
3839
+ "step": 500
3840
+ },
3841
+ {
3842
+ "epoch": 0.5813970322113644,
3843
+ "grad_norm": 85.71410602529986,
3844
+ "learning_rate": 1.7132278783518754e-07,
3845
+ "logits/chosen": -1.2767977714538574,
3846
+ "logits/rejected": -1.3142091035842896,
3847
+ "logps/chosen": -132.83477783203125,
3848
+ "logps/rejected": -152.29600524902344,
3849
+ "loss": 0.6423,
3850
+ "rewards/accuracies": 0.4375,
3851
+ "rewards/chosen": -0.3583824038505554,
3852
+ "rewards/margins": 0.2208695262670517,
3853
+ "rewards/rejected": -0.5792520046234131,
3854
+ "step": 502
3855
+ },
3856
+ {
3857
+ "epoch": 0.5837133550488599,
3858
+ "grad_norm": 74.14800888172827,
3859
+ "learning_rate": 1.7105140194796522e-07,
3860
+ "logits/chosen": -1.3712527751922607,
3861
+ "logits/rejected": -1.425230860710144,
3862
+ "logps/chosen": -175.75039672851562,
3863
+ "logps/rejected": -202.72731018066406,
3864
+ "loss": 0.5921,
3865
+ "rewards/accuracies": 0.71875,
3866
+ "rewards/chosen": -0.03632951155304909,
3867
+ "rewards/margins": 0.9486851692199707,
3868
+ "rewards/rejected": -0.9850146174430847,
3869
+ "step": 504
3870
+ },
3871
+ {
3872
+ "epoch": 0.5860296778863554,
3873
+ "grad_norm": 71.00059592227518,
3874
+ "learning_rate": 1.707789551208852e-07,
3875
+ "logits/chosen": -1.2654979228973389,
3876
+ "logits/rejected": -1.3367087841033936,
3877
+ "logps/chosen": -107.92752075195312,
3878
+ "logps/rejected": -137.77261352539062,
3879
+ "loss": 0.5964,
3880
+ "rewards/accuracies": 0.75,
3881
+ "rewards/chosen": 0.22204995155334473,
3882
+ "rewards/margins": 0.47908443212509155,
3883
+ "rewards/rejected": -0.2570344805717468,
3884
+ "step": 506
3885
+ },
3886
+ {
3887
+ "epoch": 0.5883460007238509,
3888
+ "grad_norm": 80.94401296109848,
3889
+ "learning_rate": 1.705054514221248e-07,
3890
+ "logits/chosen": -1.359083652496338,
3891
+ "logits/rejected": -1.262428879737854,
3892
+ "logps/chosen": -128.09751892089844,
3893
+ "logps/rejected": -112.98042297363281,
3894
+ "loss": 0.5995,
3895
+ "rewards/accuracies": 0.71875,
3896
+ "rewards/chosen": -0.007966872304677963,
3897
+ "rewards/margins": 0.3093283772468567,
3898
+ "rewards/rejected": -0.31729522347450256,
3899
+ "step": 508
3900
+ },
3901
+ {
3902
+ "epoch": 0.5906623235613464,
3903
+ "grad_norm": 117.75539638738908,
3904
+ "learning_rate": 1.7023089493564246e-07,
3905
+ "logits/chosen": -1.3026072978973389,
3906
+ "logits/rejected": -1.3078409433364868,
3907
+ "logps/chosen": -157.6989288330078,
3908
+ "logps/rejected": -171.07347106933594,
3909
+ "loss": 0.6652,
3910
+ "rewards/accuracies": 0.65625,
3911
+ "rewards/chosen": -0.17687593400478363,
3912
+ "rewards/margins": 0.22165895998477936,
3913
+ "rewards/rejected": -0.398534893989563,
3914
+ "step": 510
3915
+ },
3916
+ {
3917
+ "epoch": 0.5929786463988418,
3918
+ "grad_norm": 86.54731628654646,
3919
+ "learning_rate": 1.6995528976111692e-07,
3920
+ "logits/chosen": -1.3644428253173828,
3921
+ "logits/rejected": -1.359837532043457,
3922
+ "logps/chosen": -118.70327758789062,
3923
+ "logps/rejected": -129.3509979248047,
3924
+ "loss": 0.6307,
3925
+ "rewards/accuracies": 0.625,
3926
+ "rewards/chosen": 0.16805267333984375,
3927
+ "rewards/margins": 0.45584040880203247,
3928
+ "rewards/rejected": -0.2877877354621887,
3929
+ "step": 512
3930
+ },
3931
+ {
3932
+ "epoch": 0.5952949692363373,
3933
+ "grad_norm": 87.28044950941617,
3934
+ "learning_rate": 1.6967864001388587e-07,
3935
+ "logits/chosen": -1.383012294769287,
3936
+ "logits/rejected": -1.372816562652588,
3937
+ "logps/chosen": -112.56473541259766,
3938
+ "logps/rejected": -113.43563842773438,
3939
+ "loss": 0.5892,
3940
+ "rewards/accuracies": 0.6875,
3941
+ "rewards/chosen": 0.07795768231153488,
3942
+ "rewards/margins": 0.3844006359577179,
3943
+ "rewards/rejected": -0.30644291639328003,
3944
+ "step": 514
3945
+ },
3946
+ {
3947
+ "epoch": 0.5976112920738328,
3948
+ "grad_norm": 93.89694914049578,
3949
+ "learning_rate": 1.6940094982488465e-07,
3950
+ "logits/chosen": -1.3544152975082397,
3951
+ "logits/rejected": -1.4398796558380127,
3952
+ "logps/chosen": -174.69073486328125,
3953
+ "logps/rejected": -213.37953186035156,
3954
+ "loss": 0.6402,
3955
+ "rewards/accuracies": 0.71875,
3956
+ "rewards/chosen": 0.2562227249145508,
3957
+ "rewards/margins": 0.7131789922714233,
3958
+ "rewards/rejected": -0.45695626735687256,
3959
+ "step": 516
3960
+ },
3961
+ {
3962
+ "epoch": 0.5999276149113283,
3963
+ "grad_norm": 93.38206179293249,
3964
+ "learning_rate": 1.6912222334058434e-07,
3965
+ "logits/chosen": -1.3199559450149536,
3966
+ "logits/rejected": -1.3303453922271729,
3967
+ "logps/chosen": -113.59899139404297,
3968
+ "logps/rejected": -145.6167449951172,
3969
+ "loss": 0.5803,
3970
+ "rewards/accuracies": 0.6875,
3971
+ "rewards/chosen": 0.020612459629774094,
3972
+ "rewards/margins": 0.46917960047721863,
3973
+ "rewards/rejected": -0.4485671818256378,
3974
+ "step": 518
3975
+ },
3976
+ {
3977
+ "epoch": 0.6022439377488238,
3978
+ "grad_norm": 105.46320125024906,
3979
+ "learning_rate": 1.6884246472293017e-07,
3980
+ "logits/chosen": -1.2990922927856445,
3981
+ "logits/rejected": -1.32880437374115,
3982
+ "logps/chosen": -156.3465576171875,
3983
+ "logps/rejected": -181.81884765625,
3984
+ "loss": 0.5906,
3985
+ "rewards/accuracies": 0.65625,
3986
+ "rewards/chosen": 0.04846584051847458,
3987
+ "rewards/margins": 0.4274147152900696,
3988
+ "rewards/rejected": -0.3789488971233368,
3989
+ "step": 520
3990
+ },
3991
+ {
3992
+ "epoch": 0.6045602605863192,
3993
+ "grad_norm": 75.97476536999818,
3994
+ "learning_rate": 1.68561678149279e-07,
3995
+ "logits/chosen": -1.324131727218628,
3996
+ "logits/rejected": -1.3583768606185913,
3997
+ "logps/chosen": -158.01376342773438,
3998
+ "logps/rejected": -170.33180236816406,
3999
+ "loss": 0.62,
4000
+ "rewards/accuracies": 0.65625,
4001
+ "rewards/chosen": -0.15412873029708862,
4002
+ "rewards/margins": 0.3743273615837097,
4003
+ "rewards/rejected": -0.5284560322761536,
4004
+ "step": 522
4005
+ },
4006
+ {
4007
+ "epoch": 0.6068765834238147,
4008
+ "grad_norm": 104.26484808062503,
4009
+ "learning_rate": 1.6827986781233728e-07,
4010
+ "logits/chosen": -1.244482159614563,
4011
+ "logits/rejected": -1.3129115104675293,
4012
+ "logps/chosen": -168.09619140625,
4013
+ "logps/rejected": -183.01235961914062,
4014
+ "loss": 0.6265,
4015
+ "rewards/accuracies": 0.6875,
4016
+ "rewards/chosen": -0.21132177114486694,
4017
+ "rewards/margins": 0.3104555606842041,
4018
+ "rewards/rejected": -0.521777331829071,
4019
+ "step": 524
4020
+ },
4021
+ {
4022
+ "epoch": 0.6091929062613102,
4023
+ "grad_norm": 114.78689524134293,
4024
+ "learning_rate": 1.6799703792009824e-07,
4025
+ "logits/chosen": -1.5139933824539185,
4026
+ "logits/rejected": -1.4369456768035889,
4027
+ "logps/chosen": -179.37973022460938,
4028
+ "logps/rejected": -178.68380737304688,
4029
+ "loss": 0.6447,
4030
+ "rewards/accuracies": 0.75,
4031
+ "rewards/chosen": 0.1471785008907318,
4032
+ "rewards/margins": 0.5135056376457214,
4033
+ "rewards/rejected": -0.366327166557312,
4034
+ "step": 526
4035
+ },
4036
+ {
4037
+ "epoch": 0.6115092290988057,
4038
+ "grad_norm": 71.37258403318782,
4039
+ "learning_rate": 1.6771319269577914e-07,
4040
+ "logits/chosen": -1.246570348739624,
4041
+ "logits/rejected": -1.2840875387191772,
4042
+ "logps/chosen": -128.31802368164062,
4043
+ "logps/rejected": -164.46771240234375,
4044
+ "loss": 0.5578,
4045
+ "rewards/accuracies": 0.625,
4046
+ "rewards/chosen": -0.00241958349943161,
4047
+ "rewards/margins": 0.5187560319900513,
4048
+ "rewards/rejected": -0.5211755633354187,
4049
+ "step": 528
4050
+ },
4051
+ {
4052
+ "epoch": 0.6138255519363012,
4053
+ "grad_norm": 69.57805371439099,
4054
+ "learning_rate": 1.6742833637775812e-07,
4055
+ "logits/chosen": -1.323167085647583,
4056
+ "logits/rejected": -1.3477709293365479,
4057
+ "logps/chosen": -146.45350646972656,
4058
+ "logps/rejected": -181.66311645507812,
4059
+ "loss": 0.5717,
4060
+ "rewards/accuracies": 0.6875,
4061
+ "rewards/chosen": -0.19867736101150513,
4062
+ "rewards/margins": 0.9410180449485779,
4063
+ "rewards/rejected": -1.1396952867507935,
4064
+ "step": 530
4065
+ },
4066
+ {
4067
+ "epoch": 0.6161418747737966,
4068
+ "grad_norm": 171.030005968529,
4069
+ "learning_rate": 1.6714247321951105e-07,
4070
+ "logits/chosen": -1.380966067314148,
4071
+ "logits/rejected": -1.4481279850006104,
4072
+ "logps/chosen": -164.24951171875,
4073
+ "logps/rejected": -182.36082458496094,
4074
+ "loss": 0.6188,
4075
+ "rewards/accuracies": 0.75,
4076
+ "rewards/chosen": -0.18503104150295258,
4077
+ "rewards/margins": 0.3646969497203827,
4078
+ "rewards/rejected": -0.5497279167175293,
4079
+ "step": 532
4080
+ },
4081
+ {
4082
+ "epoch": 0.6184581976112921,
4083
+ "grad_norm": 92.73704263508813,
4084
+ "learning_rate": 1.668556074895479e-07,
4085
+ "logits/chosen": -1.3130195140838623,
4086
+ "logits/rejected": -1.3079559803009033,
4087
+ "logps/chosen": -163.1666717529297,
4088
+ "logps/rejected": -171.744873046875,
4089
+ "loss": 0.6301,
4090
+ "rewards/accuracies": 0.625,
4091
+ "rewards/chosen": -0.020428307354450226,
4092
+ "rewards/margins": 0.3073387145996094,
4093
+ "rewards/rejected": -0.327767014503479,
4094
+ "step": 534
4095
+ },
4096
+ {
4097
+ "epoch": 0.6207745204487876,
4098
+ "grad_norm": 80.77491644213549,
4099
+ "learning_rate": 1.6656774347134907e-07,
4100
+ "logits/chosen": -1.3122167587280273,
4101
+ "logits/rejected": -1.3465042114257812,
4102
+ "logps/chosen": -122.46326446533203,
4103
+ "logps/rejected": -140.5079803466797,
4104
+ "loss": 0.651,
4105
+ "rewards/accuracies": 0.59375,
4106
+ "rewards/chosen": -0.09150812029838562,
4107
+ "rewards/margins": 0.42649781703948975,
4108
+ "rewards/rejected": -0.518005907535553,
4109
+ "step": 536
4110
+ },
4111
+ {
4112
+ "epoch": 0.6230908432862831,
4113
+ "grad_norm": 84.46218412236821,
4114
+ "learning_rate": 1.6627888546330136e-07,
4115
+ "logits/chosen": -1.4094092845916748,
4116
+ "logits/rejected": -1.4629356861114502,
4117
+ "logps/chosen": -185.64651489257812,
4118
+ "logps/rejected": -204.06578063964844,
4119
+ "loss": 0.5885,
4120
+ "rewards/accuracies": 0.59375,
4121
+ "rewards/chosen": -0.06263245642185211,
4122
+ "rewards/margins": 0.5876613855361938,
4123
+ "rewards/rejected": -0.6502938270568848,
4124
+ "step": 538
4125
+ },
4126
+ {
4127
+ "epoch": 0.6254071661237784,
4128
+ "grad_norm": 81.10930034348203,
4129
+ "learning_rate": 1.659890377786339e-07,
4130
+ "logits/chosen": -1.3104676008224487,
4131
+ "logits/rejected": -1.2645026445388794,
4132
+ "logps/chosen": -160.73683166503906,
4133
+ "logps/rejected": -208.7481689453125,
4134
+ "loss": 0.5968,
4135
+ "rewards/accuracies": 0.71875,
4136
+ "rewards/chosen": -0.17388193309307098,
4137
+ "rewards/margins": 1.4726815223693848,
4138
+ "rewards/rejected": -1.6465635299682617,
4139
+ "step": 540
4140
+ },
4141
+ {
4142
+ "epoch": 0.627723488961274,
4143
+ "grad_norm": 70.09647822541486,
4144
+ "learning_rate": 1.656982047453536e-07,
4145
+ "logits/chosen": -1.3550140857696533,
4146
+ "logits/rejected": -1.3001039028167725,
4147
+ "logps/chosen": -152.96685791015625,
4148
+ "logps/rejected": -185.17835998535156,
4149
+ "loss": 0.5726,
4150
+ "rewards/accuracies": 0.65625,
4151
+ "rewards/chosen": -0.005441240966320038,
4152
+ "rewards/margins": 0.716077983379364,
4153
+ "rewards/rejected": -0.7215193510055542,
4154
+ "step": 542
4155
+ },
4156
+ {
4157
+ "epoch": 0.6300398117987694,
4158
+ "grad_norm": 114.90916867192477,
4159
+ "learning_rate": 1.6540639070618066e-07,
4160
+ "logits/chosen": -1.3001914024353027,
4161
+ "logits/rejected": -1.3512235879898071,
4162
+ "logps/chosen": -162.81076049804688,
4163
+ "logps/rejected": -186.64080810546875,
4164
+ "loss": 0.5977,
4165
+ "rewards/accuracies": 0.78125,
4166
+ "rewards/chosen": -0.11464500427246094,
4167
+ "rewards/margins": 0.6204842329025269,
4168
+ "rewards/rejected": -0.735129177570343,
4169
+ "step": 544
4170
+ },
4171
+ {
4172
+ "epoch": 0.6323561346362649,
4173
+ "grad_norm": 114.79194762522887,
4174
+ "learning_rate": 1.6511360001848367e-07,
4175
+ "logits/chosen": -1.1840189695358276,
4176
+ "logits/rejected": -1.2202097177505493,
4177
+ "logps/chosen": -133.49606323242188,
4178
+ "logps/rejected": -157.7266387939453,
4179
+ "loss": 0.6263,
4180
+ "rewards/accuracies": 0.90625,
4181
+ "rewards/chosen": 0.1880410611629486,
4182
+ "rewards/margins": 0.6589545607566833,
4183
+ "rewards/rejected": -0.47091349959373474,
4184
+ "step": 546
4185
+ },
4186
+ {
4187
+ "epoch": 0.6346724574737604,
4188
+ "grad_norm": 93.62652784226147,
4189
+ "learning_rate": 1.6481983705421448e-07,
4190
+ "logits/chosen": -1.306709885597229,
4191
+ "logits/rejected": -1.378722071647644,
4192
+ "logps/chosen": -105.61914825439453,
4193
+ "logps/rejected": -134.32098388671875,
4194
+ "loss": 0.6102,
4195
+ "rewards/accuracies": 0.71875,
4196
+ "rewards/chosen": 0.14075130224227905,
4197
+ "rewards/margins": 0.543403685092926,
4198
+ "rewards/rejected": -0.4026523530483246,
4199
+ "step": 548
4200
+ },
4201
+ {
4202
+ "epoch": 0.6369887803112558,
4203
+ "grad_norm": 73.81068535979944,
4204
+ "learning_rate": 1.6452510619984298e-07,
4205
+ "logits/chosen": -1.2993462085723877,
4206
+ "logits/rejected": -1.3260908126831055,
4207
+ "logps/chosen": -113.44615936279297,
4208
+ "logps/rejected": -114.76972961425781,
4209
+ "loss": 0.6459,
4210
+ "rewards/accuracies": 0.625,
4211
+ "rewards/chosen": -0.17589446902275085,
4212
+ "rewards/margins": 0.14515961706638336,
4213
+ "rewards/rejected": -0.3210541009902954,
4214
+ "step": 550
4215
+ },
4216
+ {
4217
+ "epoch": 0.6393051031487513,
4218
+ "grad_norm": 67.36004183948565,
4219
+ "learning_rate": 1.642294118562917e-07,
4220
+ "logits/chosen": -1.365562915802002,
4221
+ "logits/rejected": -1.3586573600769043,
4222
+ "logps/chosen": -124.21674346923828,
4223
+ "logps/rejected": -129.41188049316406,
4224
+ "loss": 0.6016,
4225
+ "rewards/accuracies": 0.5625,
4226
+ "rewards/chosen": -0.026413168758153915,
4227
+ "rewards/margins": 0.1695682406425476,
4228
+ "rewards/rejected": -0.19598142802715302,
4229
+ "step": 552
4230
+ },
4231
+ {
4232
+ "epoch": 0.6416214259862468,
4233
+ "grad_norm": 109.88457230810822,
4234
+ "learning_rate": 1.6393275843886988e-07,
4235
+ "logits/chosen": -1.2051353454589844,
4236
+ "logits/rejected": -1.1777970790863037,
4237
+ "logps/chosen": -169.16192626953125,
4238
+ "logps/rejected": -165.87405395507812,
4239
+ "loss": 0.7026,
4240
+ "rewards/accuracies": 0.5625,
4241
+ "rewards/chosen": -0.2536877989768982,
4242
+ "rewards/margins": -0.15165254473686218,
4243
+ "rewards/rejected": -0.10203523933887482,
4244
+ "step": 554
4245
+ },
4246
+ {
4247
+ "epoch": 0.6439377488237423,
4248
+ "grad_norm": 75.11287220032575,
4249
+ "learning_rate": 1.636351503772077e-07,
4250
+ "logits/chosen": -1.4200119972229004,
4251
+ "logits/rejected": -1.4631352424621582,
4252
+ "logps/chosen": -198.4412841796875,
4253
+ "logps/rejected": -217.67779541015625,
4254
+ "loss": 0.5673,
4255
+ "rewards/accuracies": 0.65625,
4256
+ "rewards/chosen": 0.11143307387828827,
4257
+ "rewards/margins": 0.5114270448684692,
4258
+ "rewards/rejected": -0.39999401569366455,
4259
+ "step": 556
4260
+ },
4261
+ {
4262
+ "epoch": 0.6462540716612378,
4263
+ "grad_norm": 76.6884503172935,
4264
+ "learning_rate": 1.6333659211519013e-07,
4265
+ "logits/chosen": -1.250978708267212,
4266
+ "logits/rejected": -1.3204269409179688,
4267
+ "logps/chosen": -122.6414794921875,
4268
+ "logps/rejected": -148.8828887939453,
4269
+ "loss": 0.577,
4270
+ "rewards/accuracies": 0.84375,
4271
+ "rewards/chosen": 0.0017823921516537666,
4272
+ "rewards/margins": 0.8125737905502319,
4273
+ "rewards/rejected": -0.8107913732528687,
4274
+ "step": 558
4275
+ },
4276
+ {
4277
+ "epoch": 0.6485703944987332,
4278
+ "grad_norm": 97.61750654608176,
4279
+ "learning_rate": 1.630370881108905e-07,
4280
+ "logits/chosen": -1.4659614562988281,
4281
+ "logits/rejected": -1.410871148109436,
4282
+ "logps/chosen": -158.7913055419922,
4283
+ "logps/rejected": -179.6865692138672,
4284
+ "loss": 0.6643,
4285
+ "rewards/accuracies": 0.59375,
4286
+ "rewards/chosen": 0.09925530850887299,
4287
+ "rewards/margins": 0.4364185631275177,
4288
+ "rewards/rejected": -0.33716320991516113,
4289
+ "step": 560
4290
+ },
4291
+ {
4292
+ "epoch": 0.6508867173362287,
4293
+ "grad_norm": 81.27814399323799,
4294
+ "learning_rate": 1.627366428365039e-07,
4295
+ "logits/chosen": -1.3590463399887085,
4296
+ "logits/rejected": -1.402485966682434,
4297
+ "logps/chosen": -154.32345581054688,
4298
+ "logps/rejected": -171.76239013671875,
4299
+ "loss": 0.6142,
4300
+ "rewards/accuracies": 0.6875,
4301
+ "rewards/chosen": -0.35029542446136475,
4302
+ "rewards/margins": 0.5618267059326172,
4303
+ "rewards/rejected": -0.9121222496032715,
4304
+ "step": 562
4305
+ },
4306
+ {
4307
+ "epoch": 0.6532030401737242,
4308
+ "grad_norm": 94.5819842213993,
4309
+ "learning_rate": 1.6243526077828058e-07,
4310
+ "logits/chosen": -1.368080973625183,
4311
+ "logits/rejected": -1.3609378337860107,
4312
+ "logps/chosen": -144.53123474121094,
4313
+ "logps/rejected": -149.87338256835938,
4314
+ "loss": 0.6336,
4315
+ "rewards/accuracies": 0.90625,
4316
+ "rewards/chosen": 0.22654207050800323,
4317
+ "rewards/margins": 0.999248206615448,
4318
+ "rewards/rejected": -0.7727060914039612,
4319
+ "step": 564
4320
+ },
4321
+ {
4322
+ "epoch": 0.6555193630112197,
4323
+ "grad_norm": 77.01269214216319,
4324
+ "learning_rate": 1.6213294643645882e-07,
4325
+ "logits/chosen": -1.2939796447753906,
4326
+ "logits/rejected": -1.3303455114364624,
4327
+ "logps/chosen": -136.7617645263672,
4328
+ "logps/rejected": -174.4918670654297,
4329
+ "loss": 0.5761,
4330
+ "rewards/accuracies": 0.59375,
4331
+ "rewards/chosen": -0.4167702794075012,
4332
+ "rewards/margins": 0.5882107615470886,
4333
+ "rewards/rejected": -1.0049810409545898,
4334
+ "step": 566
4335
+ },
4336
+ {
4337
+ "epoch": 0.6578356858487152,
4338
+ "grad_norm": 93.03042212849894,
4339
+ "learning_rate": 1.618297043251977e-07,
4340
+ "logits/chosen": -1.346666932106018,
4341
+ "logits/rejected": -1.4271106719970703,
4342
+ "logps/chosen": -123.94332885742188,
4343
+ "logps/rejected": -144.5997772216797,
4344
+ "loss": 0.5749,
4345
+ "rewards/accuracies": 0.625,
4346
+ "rewards/chosen": -0.06060848757624626,
4347
+ "rewards/margins": 0.27999287843704224,
4348
+ "rewards/rejected": -0.3406013548374176,
4349
+ "step": 568
4350
+ },
4351
+ {
4352
+ "epoch": 0.6601520086862106,
4353
+ "grad_norm": 78.42710600355083,
4354
+ "learning_rate": 1.6152553897250987e-07,
4355
+ "logits/chosen": -1.1860871315002441,
4356
+ "logits/rejected": -1.2453413009643555,
4357
+ "logps/chosen": -119.57302856445312,
4358
+ "logps/rejected": -140.7362518310547,
4359
+ "loss": 0.5865,
4360
+ "rewards/accuracies": 0.71875,
4361
+ "rewards/chosen": -0.14186443388462067,
4362
+ "rewards/margins": 0.509893000125885,
4363
+ "rewards/rejected": -0.6517573595046997,
4364
+ "step": 570
4365
+ },
4366
+ {
4367
+ "epoch": 0.6624683315237061,
4368
+ "grad_norm": 79.87983043947283,
4369
+ "learning_rate": 1.6122045492019374e-07,
4370
+ "logits/chosen": -1.2810924053192139,
4371
+ "logits/rejected": -1.3013286590576172,
4372
+ "logps/chosen": -124.72441101074219,
4373
+ "logps/rejected": -155.4320831298828,
4374
+ "loss": 0.6214,
4375
+ "rewards/accuracies": 0.71875,
4376
+ "rewards/chosen": -0.17904864251613617,
4377
+ "rewards/margins": 0.5865851640701294,
4378
+ "rewards/rejected": -0.7656337022781372,
4379
+ "step": 572
4380
+ },
4381
+ {
4382
+ "epoch": 0.6647846543612016,
4383
+ "grad_norm": 83.06538656604619,
4384
+ "learning_rate": 1.6091445672376577e-07,
4385
+ "logits/chosen": -1.2900563478469849,
4386
+ "logits/rejected": -1.3495041131973267,
4387
+ "logps/chosen": -132.9668426513672,
4388
+ "logps/rejected": -158.88734436035156,
4389
+ "loss": 0.72,
4390
+ "rewards/accuracies": 0.71875,
4391
+ "rewards/chosen": 0.11348069459199905,
4392
+ "rewards/margins": 0.5080840587615967,
4393
+ "rewards/rejected": -0.3946034610271454,
4394
+ "step": 574
4395
+ },
4396
+ {
4397
+ "epoch": 0.6671009771986971,
4398
+ "grad_norm": 81.11691381402433,
4399
+ "learning_rate": 1.6060754895239242e-07,
4400
+ "logits/chosen": -1.3639813661575317,
4401
+ "logits/rejected": -1.3099185228347778,
4402
+ "logps/chosen": -129.63088989257812,
4403
+ "logps/rejected": -138.73565673828125,
4404
+ "loss": 0.5229,
4405
+ "rewards/accuracies": 0.625,
4406
+ "rewards/chosen": -0.1598489135503769,
4407
+ "rewards/margins": 0.4970959722995758,
4408
+ "rewards/rejected": -0.6569448709487915,
4409
+ "step": 576
4410
+ },
4411
+ {
4412
+ "epoch": 0.6694173000361926,
4413
+ "grad_norm": 97.23417815050283,
4414
+ "learning_rate": 1.6029973618882188e-07,
4415
+ "logits/chosen": -1.4283655881881714,
4416
+ "logits/rejected": -1.4487836360931396,
4417
+ "logps/chosen": -133.85809326171875,
4418
+ "logps/rejected": -156.39669799804688,
4419
+ "loss": 0.5988,
4420
+ "rewards/accuracies": 0.59375,
4421
+ "rewards/chosen": 0.003942415118217468,
4422
+ "rewards/margins": 0.2732374668121338,
4423
+ "rewards/rejected": -0.26929500699043274,
4424
+ "step": 578
4425
+ },
4426
+ {
4427
+ "epoch": 0.671733622873688,
4428
+ "grad_norm": 66.172594585089,
4429
+ "learning_rate": 1.599910230293158e-07,
4430
+ "logits/chosen": -1.2562668323516846,
4431
+ "logits/rejected": -1.3157635927200317,
4432
+ "logps/chosen": -166.57229614257812,
4433
+ "logps/rejected": -168.74867248535156,
4434
+ "loss": 0.5781,
4435
+ "rewards/accuracies": 0.78125,
4436
+ "rewards/chosen": 0.08036249130964279,
4437
+ "rewards/margins": 0.8519478440284729,
4438
+ "rewards/rejected": -0.7715852856636047,
4439
+ "step": 580
4440
+ },
4441
+ {
4442
+ "epoch": 0.6740499457111835,
4443
+ "grad_norm": 74.76192509954373,
4444
+ "learning_rate": 1.596814140835805e-07,
4445
+ "logits/chosen": -1.256306767463684,
4446
+ "logits/rejected": -1.3627066612243652,
4447
+ "logps/chosen": -163.5234375,
4448
+ "logps/rejected": -197.15310668945312,
4449
+ "loss": 0.5397,
4450
+ "rewards/accuracies": 0.8125,
4451
+ "rewards/chosen": 0.26100030541419983,
4452
+ "rewards/margins": 0.756481409072876,
4453
+ "rewards/rejected": -0.495481014251709,
4454
+ "step": 582
4455
+ },
4456
+ {
4457
+ "epoch": 0.676366268548679,
4458
+ "grad_norm": 70.48378313519777,
4459
+ "learning_rate": 1.5937091397469813e-07,
4460
+ "logits/chosen": -1.395608901977539,
4461
+ "logits/rejected": -1.4762039184570312,
4462
+ "logps/chosen": -147.21681213378906,
4463
+ "logps/rejected": -167.70651245117188,
4464
+ "loss": 0.6367,
4465
+ "rewards/accuracies": 0.53125,
4466
+ "rewards/chosen": 0.1700185090303421,
4467
+ "rewards/margins": 0.24376149475574493,
4468
+ "rewards/rejected": -0.07374300062656403,
4469
+ "step": 584
4470
+ },
4471
+ {
4472
+ "epoch": 0.6786825913861745,
4473
+ "grad_norm": 71.83056408251632,
4474
+ "learning_rate": 1.5905952733905773e-07,
4475
+ "logits/chosen": -1.3281779289245605,
4476
+ "logits/rejected": -1.37840735912323,
4477
+ "logps/chosen": -150.58189392089844,
4478
+ "logps/rejected": -174.99461364746094,
4479
+ "loss": 0.6312,
4480
+ "rewards/accuracies": 0.65625,
4481
+ "rewards/chosen": 0.022576339542865753,
4482
+ "rewards/margins": 0.32831788063049316,
4483
+ "rewards/rejected": -0.3057415187358856,
4484
+ "step": 586
4485
+ },
4486
+ {
4487
+ "epoch": 0.6809989142236699,
4488
+ "grad_norm": 83.87757428866225,
4489
+ "learning_rate": 1.5874725882628598e-07,
4490
+ "logits/chosen": -1.2740365266799927,
4491
+ "logits/rejected": -1.3645150661468506,
4492
+ "logps/chosen": -119.89724731445312,
4493
+ "logps/rejected": -143.9542694091797,
4494
+ "loss": 0.6469,
4495
+ "rewards/accuracies": 0.625,
4496
+ "rewards/chosen": -0.08816975355148315,
4497
+ "rewards/margins": 0.3666497468948364,
4498
+ "rewards/rejected": -0.4548195004463196,
4499
+ "step": 588
4500
+ },
4501
+ {
4502
+ "epoch": 0.6833152370611654,
4503
+ "grad_norm": 95.15763805819658,
4504
+ "learning_rate": 1.5843411309917773e-07,
4505
+ "logits/chosen": -1.1707677841186523,
4506
+ "logits/rejected": -1.2354707717895508,
4507
+ "logps/chosen": -137.05491638183594,
4508
+ "logps/rejected": -164.23329162597656,
4509
+ "loss": 0.6325,
4510
+ "rewards/accuracies": 0.75,
4511
+ "rewards/chosen": -0.04570431262254715,
4512
+ "rewards/margins": 0.837788999080658,
4513
+ "rewards/rejected": -0.8834933638572693,
4514
+ "step": 590
4515
+ },
4516
+ {
4517
+ "epoch": 0.6856315598986609,
4518
+ "grad_norm": 89.31999210372004,
4519
+ "learning_rate": 1.5812009483362641e-07,
4520
+ "logits/chosen": -1.3425350189208984,
4521
+ "logits/rejected": -1.3389382362365723,
4522
+ "logps/chosen": -114.12051391601562,
4523
+ "logps/rejected": -130.07469177246094,
4524
+ "loss": 0.6058,
4525
+ "rewards/accuracies": 0.71875,
4526
+ "rewards/chosen": 0.10132614523172379,
4527
+ "rewards/margins": 0.26681679487228394,
4528
+ "rewards/rejected": -0.16549064218997955,
4529
+ "step": 592
4530
+ },
4531
+ {
4532
+ "epoch": 0.6879478827361564,
4533
+ "grad_norm": 86.55981599933118,
4534
+ "learning_rate": 1.5780520871855416e-07,
4535
+ "logits/chosen": -1.4263215065002441,
4536
+ "logits/rejected": -1.4661970138549805,
4537
+ "logps/chosen": -146.5399932861328,
4538
+ "logps/rejected": -162.22738647460938,
4539
+ "loss": 0.6227,
4540
+ "rewards/accuracies": 0.625,
4541
+ "rewards/chosen": 0.1263137012720108,
4542
+ "rewards/margins": 0.7135946750640869,
4543
+ "rewards/rejected": -0.5872809886932373,
4544
+ "step": 594
4545
+ },
4546
+ {
4547
+ "epoch": 0.6902642055736519,
4548
+ "grad_norm": 77.11466861321054,
4549
+ "learning_rate": 1.5748945945584194e-07,
4550
+ "logits/chosen": -1.1571879386901855,
4551
+ "logits/rejected": -1.2603471279144287,
4552
+ "logps/chosen": -138.8977813720703,
4553
+ "logps/rejected": -177.0740509033203,
4554
+ "loss": 0.6776,
4555
+ "rewards/accuracies": 0.625,
4556
+ "rewards/chosen": -0.02539961040019989,
4557
+ "rewards/margins": 0.39455336332321167,
4558
+ "rewards/rejected": -0.41995295882225037,
4559
+ "step": 596
4560
+ },
4561
+ {
4562
+ "epoch": 0.6925805284111473,
4563
+ "grad_norm": 81.2459965814331,
4564
+ "learning_rate": 1.5717285176025912e-07,
4565
+ "logits/chosen": -1.2991225719451904,
4566
+ "logits/rejected": -1.38021981716156,
4567
+ "logps/chosen": -151.61753845214844,
4568
+ "logps/rejected": -176.51548767089844,
4569
+ "loss": 0.5917,
4570
+ "rewards/accuracies": 0.78125,
4571
+ "rewards/chosen": 0.20256918668746948,
4572
+ "rewards/margins": 0.4604591131210327,
4573
+ "rewards/rejected": -0.25788992643356323,
4574
+ "step": 598
4575
+ },
4576
+ {
4577
+ "epoch": 0.6948968512486428,
4578
+ "grad_norm": 70.92510708156665,
4579
+ "learning_rate": 1.568553903593933e-07,
4580
+ "logits/chosen": -1.444725513458252,
4581
+ "logits/rejected": -1.3993281126022339,
4582
+ "logps/chosen": -111.9288330078125,
4583
+ "logps/rejected": -113.10049438476562,
4584
+ "loss": 0.6366,
4585
+ "rewards/accuracies": 0.53125,
4586
+ "rewards/chosen": -0.008920304477214813,
4587
+ "rewards/margins": 0.010683823376893997,
4588
+ "rewards/rejected": -0.019604135304689407,
4589
+ "step": 600
4590
+ },
4591
+ {
4592
+ "epoch": 0.6948968512486428,
4593
+ "eval_logits/chosen": -1.3430299758911133,
4594
+ "eval_logits/rejected": -1.3360421657562256,
4595
+ "eval_logps/chosen": -137.48741149902344,
4596
+ "eval_logps/rejected": -138.31024169921875,
4597
+ "eval_loss": 0.6260569095611572,
4598
+ "eval_rewards/accuracies": 0.6399999856948853,
4599
+ "eval_rewards/chosen": -0.185623899102211,
4600
+ "eval_rewards/margins": 0.25004515051841736,
4601
+ "eval_rewards/rejected": -0.43566906452178955,
4602
+ "eval_runtime": 24.9391,
4603
+ "eval_samples_per_second": 4.01,
4604
+ "eval_steps_per_second": 1.002,
4605
+ "step": 600
4606
  }
4607
  ],
4608
  "logging_steps": 2,