File size: 13,740 Bytes
0354e7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 100,
  "global_step": 478,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0020920502092050207,
      "grad_norm": 9.710838317871094,
      "learning_rate": 1.0416666666666666e-08,
      "logits/chosen": -3.161454916000366,
      "logits/rejected": -3.0680501461029053,
      "logps/chosen": -437.20782470703125,
      "logps/rejected": -343.8380432128906,
      "loss": 0.6931,
      "rewards/accuracies": 0.0,
      "rewards/chosen": 0.0,
      "rewards/margins": 0.0,
      "rewards/rejected": 0.0,
      "step": 1
    },
    {
      "epoch": 0.05230125523012552,
      "grad_norm": 8.52570629119873,
      "learning_rate": 2.604166666666667e-07,
      "logits/chosen": -3.30265212059021,
      "logits/rejected": -3.2195777893066406,
      "logps/chosen": -432.7400207519531,
      "logps/rejected": -391.2707824707031,
      "loss": 0.6931,
      "rewards/accuracies": 0.5234375,
      "rewards/chosen": 0.0006945470231585205,
      "rewards/margins": 0.0007254942320287228,
      "rewards/rejected": -3.094731437158771e-05,
      "step": 25
    },
    {
      "epoch": 0.10460251046025104,
      "grad_norm": 9.973247528076172,
      "learning_rate": 4.999733114418725e-07,
      "logits/chosen": -3.299149751663208,
      "logits/rejected": -3.2438504695892334,
      "logps/chosen": -446.8968811035156,
      "logps/rejected": -404.587158203125,
      "loss": 0.6914,
      "rewards/accuracies": 0.5799999833106995,
      "rewards/chosen": 0.005268932785838842,
      "rewards/margins": 0.002896952675655484,
      "rewards/rejected": 0.002371980343014002,
      "step": 50
    },
    {
      "epoch": 0.15690376569037656,
      "grad_norm": 9.357086181640625,
      "learning_rate": 4.951516761176343e-07,
      "logits/chosen": -3.302896738052368,
      "logits/rejected": -3.225714683532715,
      "logps/chosen": -480.6836853027344,
      "logps/rejected": -436.4853515625,
      "loss": 0.6871,
      "rewards/accuracies": 0.5975000262260437,
      "rewards/chosen": 0.013074235059320927,
      "rewards/margins": 0.012715624645352364,
      "rewards/rejected": 0.0003586093371268362,
      "step": 75
    },
    {
      "epoch": 0.20920502092050208,
      "grad_norm": 8.75236988067627,
      "learning_rate": 4.821741763807186e-07,
      "logits/chosen": -3.2598507404327393,
      "logits/rejected": -3.2092175483703613,
      "logps/chosen": -426.1629638671875,
      "logps/rejected": -389.50421142578125,
      "loss": 0.6795,
      "rewards/accuracies": 0.612500011920929,
      "rewards/chosen": 0.003723819274455309,
      "rewards/margins": 0.02804265171289444,
      "rewards/rejected": -0.02431883104145527,
      "step": 100
    },
    {
      "epoch": 0.20920502092050208,
      "eval_logits/chosen": -3.313567638397217,
      "eval_logits/rejected": -3.2565112113952637,
      "eval_logps/chosen": -423.0196533203125,
      "eval_logps/rejected": -407.8036804199219,
      "eval_loss": 0.6759119629859924,
      "eval_rewards/accuracies": 0.62890625,
      "eval_rewards/chosen": 0.0017230990342795849,
      "eval_rewards/margins": 0.0345395989716053,
      "eval_rewards/rejected": -0.03281649947166443,
      "eval_runtime": 8.2733,
      "eval_samples_per_second": 241.742,
      "eval_steps_per_second": 3.868,
      "step": 100
    },
    {
      "epoch": 0.2615062761506276,
      "grad_norm": 8.889654159545898,
      "learning_rate": 4.614725560802639e-07,
      "logits/chosen": -3.2867064476013184,
      "logits/rejected": -3.2023117542266846,
      "logps/chosen": -430.924560546875,
      "logps/rejected": -382.1640319824219,
      "loss": 0.6731,
      "rewards/accuracies": 0.6474999785423279,
      "rewards/chosen": -0.01004675030708313,
      "rewards/margins": 0.04363078624010086,
      "rewards/rejected": -0.05367753654718399,
      "step": 125
    },
    {
      "epoch": 0.3138075313807531,
      "grad_norm": 9.882610321044922,
      "learning_rate": 4.337355301007335e-07,
      "logits/chosen": -3.222916841506958,
      "logits/rejected": -3.1899585723876953,
      "logps/chosen": -443.3583679199219,
      "logps/rejected": -414.344970703125,
      "loss": 0.667,
      "rewards/accuracies": 0.637499988079071,
      "rewards/chosen": -0.017106110230088234,
      "rewards/margins": 0.06052257865667343,
      "rewards/rejected": -0.07762870192527771,
      "step": 150
    },
    {
      "epoch": 0.36610878661087864,
      "grad_norm": 9.968969345092773,
      "learning_rate": 3.9988587174999306e-07,
      "logits/chosen": -3.2201168537139893,
      "logits/rejected": -3.131910800933838,
      "logps/chosen": -477.7035217285156,
      "logps/rejected": -405.7104797363281,
      "loss": 0.6574,
      "rewards/accuracies": 0.6399999856948853,
      "rewards/chosen": -0.03587143123149872,
      "rewards/margins": 0.09045815467834473,
      "rewards/rejected": -0.12632958590984344,
      "step": 175
    },
    {
      "epoch": 0.41841004184100417,
      "grad_norm": 9.230700492858887,
      "learning_rate": 3.610497133404795e-07,
      "logits/chosen": -3.23995304107666,
      "logits/rejected": -3.1550867557525635,
      "logps/chosen": -431.6617126464844,
      "logps/rejected": -396.62774658203125,
      "loss": 0.6584,
      "rewards/accuracies": 0.6349999904632568,
      "rewards/chosen": -0.06991340965032578,
      "rewards/margins": 0.08443903923034668,
      "rewards/rejected": -0.15435244143009186,
      "step": 200
    },
    {
      "epoch": 0.41841004184100417,
      "eval_logits/chosen": -3.2767982482910156,
      "eval_logits/rejected": -3.2240023612976074,
      "eval_logps/chosen": -429.85614013671875,
      "eval_logps/rejected": -420.6952209472656,
      "eval_loss": 0.6533502340316772,
      "eval_rewards/accuracies": 0.64453125,
      "eval_rewards/chosen": -0.06664139777421951,
      "eval_rewards/margins": 0.09509073942899704,
      "eval_rewards/rejected": -0.16173213720321655,
      "eval_runtime": 8.2763,
      "eval_samples_per_second": 241.653,
      "eval_steps_per_second": 3.866,
      "step": 200
    },
    {
      "epoch": 0.4707112970711297,
      "grad_norm": 10.79430103302002,
      "learning_rate": 3.185190812915646e-07,
      "logits/chosen": -3.1671783924102783,
      "logits/rejected": -3.118861436843872,
      "logps/chosen": -446.6968994140625,
      "logps/rejected": -410.1864013671875,
      "loss": 0.6518,
      "rewards/accuracies": 0.6449999809265137,
      "rewards/chosen": -0.08074235171079636,
      "rewards/margins": 0.10100732743740082,
      "rewards/rejected": -0.18174967169761658,
      "step": 225
    },
    {
      "epoch": 0.5230125523012552,
      "grad_norm": 10.086767196655273,
      "learning_rate": 2.7370891215954565e-07,
      "logits/chosen": -3.1980080604553223,
      "logits/rejected": -3.1623446941375732,
      "logps/chosen": -440.4261474609375,
      "logps/rejected": -439.9627685546875,
      "loss": 0.6476,
      "rewards/accuracies": 0.637499988079071,
      "rewards/chosen": -0.07635506987571716,
      "rewards/margins": 0.11709018051624298,
      "rewards/rejected": -0.19344525039196014,
      "step": 250
    },
    {
      "epoch": 0.5753138075313807,
      "grad_norm": 9.709197044372559,
      "learning_rate": 2.2810997961375938e-07,
      "logits/chosen": -3.216128349304199,
      "logits/rejected": -3.1448960304260254,
      "logps/chosen": -425.5062561035156,
      "logps/rejected": -391.5084228515625,
      "loss": 0.6461,
      "rewards/accuracies": 0.6800000071525574,
      "rewards/chosen": -0.072402223944664,
      "rewards/margins": 0.1306913197040558,
      "rewards/rejected": -0.2030935436487198,
      "step": 275
    },
    {
      "epoch": 0.6276150627615062,
      "grad_norm": 10.980072975158691,
      "learning_rate": 1.8323929841460178e-07,
      "logits/chosen": -3.2006824016571045,
      "logits/rejected": -3.1383109092712402,
      "logps/chosen": -443.0799865722656,
      "logps/rejected": -436.2774658203125,
      "loss": 0.6494,
      "rewards/accuracies": 0.625,
      "rewards/chosen": -0.1245601624250412,
      "rewards/margins": 0.10581608861684799,
      "rewards/rejected": -0.2303762584924698,
      "step": 300
    },
    {
      "epoch": 0.6276150627615062,
      "eval_logits/chosen": -3.2553329467773438,
      "eval_logits/rejected": -3.2049574851989746,
      "eval_logps/chosen": -433.9639892578125,
      "eval_logps/rejected": -428.6236572265625,
      "eval_loss": 0.6438009142875671,
      "eval_rewards/accuracies": 0.62109375,
      "eval_rewards/chosen": -0.10771973431110382,
      "eval_rewards/margins": 0.13329659402370453,
      "eval_rewards/rejected": -0.24101632833480835,
      "eval_runtime": 8.3559,
      "eval_samples_per_second": 239.351,
      "eval_steps_per_second": 3.83,
      "step": 300
    },
    {
      "epoch": 0.6799163179916318,
      "grad_norm": 9.95361614227295,
      "learning_rate": 1.4058965538597032e-07,
      "logits/chosen": -3.2326276302337646,
      "logits/rejected": -3.198971748352051,
      "logps/chosen": -443.37371826171875,
      "logps/rejected": -433.8561706542969,
      "loss": 0.6392,
      "rewards/accuracies": 0.6399999856948853,
      "rewards/chosen": -0.11981040984392166,
      "rewards/margins": 0.12150833755731583,
      "rewards/rejected": -0.2413187474012375,
      "step": 325
    },
    {
      "epoch": 0.7322175732217573,
      "grad_norm": 10.580938339233398,
      "learning_rate": 1.0157994641835734e-07,
      "logits/chosen": -3.1840951442718506,
      "logits/rejected": -3.0981223583221436,
      "logps/chosen": -430.7939453125,
      "logps/rejected": -403.5060119628906,
      "loss": 0.6447,
      "rewards/accuracies": 0.6225000023841858,
      "rewards/chosen": -0.15974169969558716,
      "rewards/margins": 0.09658970683813095,
      "rewards/rejected": -0.2563314139842987,
      "step": 350
    },
    {
      "epoch": 0.7845188284518828,
      "grad_norm": 8.326279640197754,
      "learning_rate": 6.75079717232744e-08,
      "logits/chosen": -3.2011187076568604,
      "logits/rejected": -3.0969460010528564,
      "logps/chosen": -465.22344970703125,
      "logps/rejected": -392.5793762207031,
      "loss": 0.6411,
      "rewards/accuracies": 0.6549999713897705,
      "rewards/chosen": -0.11683624982833862,
      "rewards/margins": 0.14286838471889496,
      "rewards/rejected": -0.2597046196460724,
      "step": 375
    },
    {
      "epoch": 0.8368200836820083,
      "grad_norm": 8.851872444152832,
      "learning_rate": 3.9507259776993954e-08,
      "logits/chosen": -3.221069097518921,
      "logits/rejected": -3.1744155883789062,
      "logps/chosen": -449.8016052246094,
      "logps/rejected": -409.0178527832031,
      "loss": 0.6428,
      "rewards/accuracies": 0.6524999737739563,
      "rewards/chosen": -0.08884063363075256,
      "rewards/margins": 0.13940051198005676,
      "rewards/rejected": -0.22824116051197052,
      "step": 400
    },
    {
      "epoch": 0.8368200836820083,
      "eval_logits/chosen": -3.254263401031494,
      "eval_logits/rejected": -3.204622507095337,
      "eval_logps/chosen": -433.20001220703125,
      "eval_logps/rejected": -428.88836669921875,
      "eval_loss": 0.6415477395057678,
      "eval_rewards/accuracies": 0.62109375,
      "eval_rewards/chosen": -0.10007989406585693,
      "eval_rewards/margins": 0.14358317852020264,
      "eval_rewards/rejected": -0.24366310238838196,
      "eval_runtime": 8.7287,
      "eval_samples_per_second": 229.13,
      "eval_steps_per_second": 3.666,
      "step": 400
    },
    {
      "epoch": 0.8891213389121339,
      "grad_norm": 10.138018608093262,
      "learning_rate": 1.850935636255496e-08,
      "logits/chosen": -3.1941397190093994,
      "logits/rejected": -3.1335413455963135,
      "logps/chosen": -463.3765869140625,
      "logps/rejected": -418.0171203613281,
      "loss": 0.6393,
      "rewards/accuracies": 0.6399999856948853,
      "rewards/chosen": -0.11999661475419998,
      "rewards/margins": 0.13007262349128723,
      "rewards/rejected": -0.2500692307949066,
      "step": 425
    },
    {
      "epoch": 0.9414225941422594,
      "grad_norm": 9.687002182006836,
      "learning_rate": 5.212833302556258e-09,
      "logits/chosen": -3.202920913696289,
      "logits/rejected": -3.14504337310791,
      "logps/chosen": -442.259765625,
      "logps/rejected": -406.7275085449219,
      "loss": 0.6419,
      "rewards/accuracies": 0.6050000190734863,
      "rewards/chosen": -0.13094988465309143,
      "rewards/margins": 0.1017264574766159,
      "rewards/rejected": -0.23267632722854614,
      "step": 450
    },
    {
      "epoch": 0.9937238493723849,
      "grad_norm": 8.862220764160156,
      "learning_rate": 6.004792024680294e-11,
      "logits/chosen": -3.1527392864227295,
      "logits/rejected": -3.1062843799591064,
      "logps/chosen": -445.8639831542969,
      "logps/rejected": -418.4268493652344,
      "loss": 0.6377,
      "rewards/accuracies": 0.6575000286102295,
      "rewards/chosen": -0.11145105212926865,
      "rewards/margins": 0.15232053399085999,
      "rewards/rejected": -0.26377159357070923,
      "step": 475
    },
    {
      "epoch": 1.0,
      "step": 478,
      "total_flos": 0.0,
      "train_loss": 0.6571792745689967,
      "train_runtime": 784.6622,
      "train_samples_per_second": 77.913,
      "train_steps_per_second": 0.609
    }
  ],
  "logging_steps": 25,
  "max_steps": 478,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}