File size: 10,750 Bytes
aff606b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 100,
"global_step": 195,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.5000000000000004e-07,
"logits/chosen": -2.3828954696655273,
"logits/rejected": -2.2103500366210938,
"logps/chosen": -351.30865478515625,
"logps/rejected": -310.087646484375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05,
"learning_rate": 2.5e-06,
"logits/chosen": -2.15350341796875,
"logits/rejected": -2.057192087173462,
"logps/chosen": -291.661865234375,
"logps/rejected": -299.000244140625,
"loss": 0.6901,
"rewards/accuracies": 0.5208333134651184,
"rewards/chosen": -0.00531815318390727,
"rewards/margins": 0.006059659644961357,
"rewards/rejected": -0.01137781422585249,
"step": 10
},
{
"epoch": 0.1,
"learning_rate": 5e-06,
"logits/chosen": -1.7294094562530518,
"logits/rejected": -1.6358362436294556,
"logps/chosen": -349.6874084472656,
"logps/rejected": -371.9268798828125,
"loss": 0.6485,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.4694043695926666,
"rewards/margins": 0.10101622343063354,
"rewards/rejected": -0.5704206228256226,
"step": 20
},
{
"epoch": 0.15,
"learning_rate": 4.959823971496575e-06,
"logits/chosen": -1.3581154346466064,
"logits/rejected": -1.2683781385421753,
"logps/chosen": -328.9931945800781,
"logps/rejected": -367.46417236328125,
"loss": 0.6227,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -0.3038724958896637,
"rewards/margins": 0.21112871170043945,
"rewards/rejected": -0.5150011777877808,
"step": 30
},
{
"epoch": 0.2,
"learning_rate": 4.8405871765993435e-06,
"logits/chosen": -1.0669946670532227,
"logits/rejected": -0.6835187673568726,
"logps/chosen": -354.5089416503906,
"logps/rejected": -383.76422119140625,
"loss": 0.5903,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.4168413579463959,
"rewards/margins": 0.37733370065689087,
"rewards/rejected": -0.7941750884056091,
"step": 40
},
{
"epoch": 0.26,
"learning_rate": 4.646121984004666e-06,
"logits/chosen": -0.7538890838623047,
"logits/rejected": -0.34662288427352905,
"logps/chosen": -361.6393737792969,
"logps/rejected": -380.0272521972656,
"loss": 0.59,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.479973167181015,
"rewards/margins": 0.41160327196121216,
"rewards/rejected": -0.8915762901306152,
"step": 50
},
{
"epoch": 0.31,
"learning_rate": 4.382678665009028e-06,
"logits/chosen": -0.7476059198379517,
"logits/rejected": -0.4476490020751953,
"logps/chosen": -321.3587951660156,
"logps/rejected": -371.570556640625,
"loss": 0.5849,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.3474940359592438,
"rewards/margins": 0.410334974527359,
"rewards/rejected": -0.7578290104866028,
"step": 60
},
{
"epoch": 0.36,
"learning_rate": 4.058724504646834e-06,
"logits/chosen": -0.5304551124572754,
"logits/rejected": 0.0068548740819096565,
"logps/chosen": -372.43316650390625,
"logps/rejected": -403.68011474609375,
"loss": 0.5931,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.6598731875419617,
"rewards/margins": 0.44455790519714355,
"rewards/rejected": -1.10443115234375,
"step": 70
},
{
"epoch": 0.41,
"learning_rate": 3.684671656182497e-06,
"logits/chosen": -0.7597763538360596,
"logits/rejected": -0.30586355924606323,
"logps/chosen": -384.52679443359375,
"logps/rejected": -407.8342590332031,
"loss": 0.5901,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": -0.5050551891326904,
"rewards/margins": 0.4280626177787781,
"rewards/rejected": -0.9331178665161133,
"step": 80
},
{
"epoch": 0.46,
"learning_rate": 3.272542485937369e-06,
"logits/chosen": -0.6929324865341187,
"logits/rejected": -0.2577061057090759,
"logps/chosen": -342.2850036621094,
"logps/rejected": -383.3541259765625,
"loss": 0.5777,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.5730774998664856,
"rewards/margins": 0.38491758704185486,
"rewards/rejected": -0.9579952359199524,
"step": 90
},
{
"epoch": 0.51,
"learning_rate": 2.835583164544139e-06,
"logits/chosen": -0.8866588473320007,
"logits/rejected": -0.39027491211891174,
"logps/chosen": -326.8385925292969,
"logps/rejected": -374.0113525390625,
"loss": 0.5616,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.4010140001773834,
"rewards/margins": 0.5182436108589172,
"rewards/rejected": -0.9192575216293335,
"step": 100
},
{
"epoch": 0.51,
"eval_logits/chosen": -0.6312460899353027,
"eval_logits/rejected": -0.05466047301888466,
"eval_logps/chosen": -351.8179016113281,
"eval_logps/rejected": -391.2296447753906,
"eval_loss": 0.5503215789794922,
"eval_rewards/accuracies": 0.7139999866485596,
"eval_rewards/chosen": -0.6370265483856201,
"eval_rewards/margins": 0.559273362159729,
"eval_rewards/rejected": -1.1962999105453491,
"eval_runtime": 396.9424,
"eval_samples_per_second": 5.039,
"eval_steps_per_second": 0.63,
"step": 100
},
{
"epoch": 0.56,
"learning_rate": 2.3878379241237136e-06,
"logits/chosen": -0.5016804933547974,
"logits/rejected": -0.17540986835956573,
"logps/chosen": -381.13250732421875,
"logps/rejected": -445.72344970703125,
"loss": 0.568,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.8623906970024109,
"rewards/margins": 0.4544965624809265,
"rewards/rejected": -1.3168871402740479,
"step": 110
},
{
"epoch": 0.61,
"learning_rate": 1.9436976651092143e-06,
"logits/chosen": -0.5300595164299011,
"logits/rejected": -0.16820363700389862,
"logps/chosen": -343.6223449707031,
"logps/rejected": -390.7423400878906,
"loss": 0.5643,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.5444029569625854,
"rewards/margins": 0.469682514667511,
"rewards/rejected": -1.0140855312347412,
"step": 120
},
{
"epoch": 0.67,
"learning_rate": 1.5174374208651913e-06,
"logits/chosen": -0.7855179309844971,
"logits/rejected": -0.3058822751045227,
"logps/chosen": -345.6529235839844,
"logps/rejected": -394.87310791015625,
"loss": 0.5708,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.4353795647621155,
"rewards/margins": 0.5461079478263855,
"rewards/rejected": -0.9814874529838562,
"step": 130
},
{
"epoch": 0.72,
"learning_rate": 1.122757546369744e-06,
"logits/chosen": -0.5981294512748718,
"logits/rejected": 0.1230069175362587,
"logps/chosen": -365.549560546875,
"logps/rejected": -422.6253356933594,
"loss": 0.553,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.6209978461265564,
"rewards/margins": 0.6843111515045166,
"rewards/rejected": -1.3053090572357178,
"step": 140
},
{
"epoch": 0.77,
"learning_rate": 7.723433775328385e-07,
"logits/chosen": -0.31050771474838257,
"logits/rejected": 0.17936445772647858,
"logps/chosen": -382.371826171875,
"logps/rejected": -423.1304626464844,
"loss": 0.567,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.7858445048332214,
"rewards/margins": 0.49087247252464294,
"rewards/rejected": -1.276716947555542,
"step": 150
},
{
"epoch": 0.82,
"learning_rate": 4.774575140626317e-07,
"logits/chosen": -0.3516121506690979,
"logits/rejected": 0.05455173924565315,
"logps/chosen": -383.17694091796875,
"logps/rejected": -414.36322021484375,
"loss": 0.5914,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.7312983870506287,
"rewards/margins": 0.4293293058872223,
"rewards/rejected": -1.160627841949463,
"step": 160
},
{
"epoch": 0.87,
"learning_rate": 2.4757783024395244e-07,
"logits/chosen": -0.3427812159061432,
"logits/rejected": 0.1810428947210312,
"logps/chosen": -328.9323425292969,
"logps/rejected": -408.287353515625,
"loss": 0.5436,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.5952991247177124,
"rewards/margins": 0.7224096059799194,
"rewards/rejected": -1.3177087306976318,
"step": 170
},
{
"epoch": 0.92,
"learning_rate": 9.00928482603669e-08,
"logits/chosen": -0.4558378756046295,
"logits/rejected": 0.023540988564491272,
"logps/chosen": -353.60015869140625,
"logps/rejected": -400.02459716796875,
"loss": 0.5798,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.6851487159729004,
"rewards/margins": 0.5590900778770447,
"rewards/rejected": -1.2442388534545898,
"step": 180
},
{
"epoch": 0.97,
"learning_rate": 1.006426501190233e-08,
"logits/chosen": -0.32187455892562866,
"logits/rejected": 0.002347037196159363,
"logps/chosen": -363.7867126464844,
"logps/rejected": -415.94952392578125,
"loss": 0.5752,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.6913945078849792,
"rewards/margins": 0.4907089173793793,
"rewards/rejected": -1.1821033954620361,
"step": 190
},
{
"epoch": 1.0,
"step": 195,
"total_flos": 0.0,
"train_loss": 0.5863106256876236,
"train_runtime": 9239.1596,
"train_samples_per_second": 2.706,
"train_steps_per_second": 0.021
}
],
"logging_steps": 10,
"max_steps": 195,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|