File size: 8,469 Bytes
606d6fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9962825278810409,
"eval_steps": 100,
"global_step": 134,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 7.393402431483719,
"learning_rate": 3.571428571428571e-08,
"logits/chosen": -0.5970903635025024,
"logits/rejected": -0.02967279776930809,
"logps/chosen": -254.73361206054688,
"logps/rejected": -449.335693359375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.07,
"grad_norm": 6.97808175581924,
"learning_rate": 3.5714285714285716e-07,
"logits/chosen": -0.502315104007721,
"logits/rejected": -0.24948199093341827,
"logps/chosen": -339.6366271972656,
"logps/rejected": -657.9154663085938,
"loss": 0.6929,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.0001597129157744348,
"rewards/margins": 0.00033807966974563897,
"rewards/rejected": -0.0004977926146239042,
"step": 10
},
{
"epoch": 0.15,
"grad_norm": 7.671337358423795,
"learning_rate": 4.969220851487844e-07,
"logits/chosen": -0.5676344037055969,
"logits/rejected": -0.3287120759487152,
"logps/chosen": -378.62664794921875,
"logps/rejected": -670.591552734375,
"loss": 0.6827,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.0052469945512712,
"rewards/margins": 0.02313617616891861,
"rewards/rejected": -0.028383171185851097,
"step": 20
},
{
"epoch": 0.22,
"grad_norm": 6.338334841496394,
"learning_rate": 4.783863644106502e-07,
"logits/chosen": -0.5812798738479614,
"logits/rejected": -0.34431666135787964,
"logps/chosen": -353.8559875488281,
"logps/rejected": -717.6322021484375,
"loss": 0.6376,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.018838122487068176,
"rewards/margins": 0.11194054782390594,
"rewards/rejected": -0.13077868521213531,
"step": 30
},
{
"epoch": 0.3,
"grad_norm": 5.540070129895064,
"learning_rate": 4.442864903642427e-07,
"logits/chosen": -0.4757254719734192,
"logits/rejected": -0.3771602213382721,
"logps/chosen": -361.31365966796875,
"logps/rejected": -770.7361450195312,
"loss": 0.5666,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.04972491413354874,
"rewards/margins": 0.309120774269104,
"rewards/rejected": -0.35884565114974976,
"step": 40
},
{
"epoch": 0.37,
"grad_norm": 6.028650135342188,
"learning_rate": 3.9694631307311825e-07,
"logits/chosen": -0.5247567892074585,
"logits/rejected": -0.45507222414016724,
"logps/chosen": -293.3636169433594,
"logps/rejected": -845.0416870117188,
"loss": 0.4561,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.1027413159608841,
"rewards/margins": 0.9158787727355957,
"rewards/rejected": -1.0186201333999634,
"step": 50
},
{
"epoch": 0.45,
"grad_norm": 5.965290480598111,
"learning_rate": 3.39591987386325e-07,
"logits/chosen": -0.5522093772888184,
"logits/rejected": -0.4290170669555664,
"logps/chosen": -363.2244873046875,
"logps/rejected": -951.5051879882812,
"loss": 0.3684,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.2073672115802765,
"rewards/margins": 2.0674309730529785,
"rewards/rejected": -2.2747981548309326,
"step": 60
},
{
"epoch": 0.52,
"grad_norm": 5.3960424532927345,
"learning_rate": 2.761321158169134e-07,
"logits/chosen": -0.47125476598739624,
"logits/rejected": -0.4486091136932373,
"logps/chosen": -341.56646728515625,
"logps/rejected": -1014.1658935546875,
"loss": 0.338,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.219242125749588,
"rewards/margins": 3.1757972240448,
"rewards/rejected": -3.3950393199920654,
"step": 70
},
{
"epoch": 0.59,
"grad_norm": 5.338829151255127,
"learning_rate": 2.1089138373994222e-07,
"logits/chosen": -0.5217522382736206,
"logits/rejected": -0.5397945642471313,
"logps/chosen": -321.6473083496094,
"logps/rejected": -1235.2142333984375,
"loss": 0.2868,
"rewards/accuracies": 0.90625,
"rewards/chosen": -0.31155842542648315,
"rewards/margins": 4.753512382507324,
"rewards/rejected": -5.065071105957031,
"step": 80
},
{
"epoch": 0.67,
"grad_norm": 4.274164409019951,
"learning_rate": 1.4831583923104998e-07,
"logits/chosen": -0.44177961349487305,
"logits/rejected": -0.528927743434906,
"logps/chosen": -327.2131042480469,
"logps/rejected": -1242.676513671875,
"loss": 0.2479,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -0.2681874930858612,
"rewards/margins": 5.103245735168457,
"rewards/rejected": -5.371432781219482,
"step": 90
},
{
"epoch": 0.74,
"grad_norm": 3.847681923589658,
"learning_rate": 9.266990223754067e-08,
"logits/chosen": -0.3910934329032898,
"logits/rejected": -0.5766850709915161,
"logps/chosen": -417.525390625,
"logps/rejected": -1386.257080078125,
"loss": 0.2497,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.5247961282730103,
"rewards/margins": 5.741795539855957,
"rewards/rejected": -6.266592979431152,
"step": 100
},
{
"epoch": 0.74,
"eval_logits/chosen": -0.7826768159866333,
"eval_logits/rejected": -0.5636682510375977,
"eval_logps/chosen": -311.038330078125,
"eval_logps/rejected": -748.6944580078125,
"eval_loss": 0.3023545444011688,
"eval_rewards/accuracies": 0.949999988079071,
"eval_rewards/chosen": -0.08790449053049088,
"eval_rewards/margins": 1.8343137502670288,
"eval_rewards/rejected": -1.9222180843353271,
"eval_runtime": 15.884,
"eval_samples_per_second": 9.569,
"eval_steps_per_second": 0.315,
"step": 100
},
{
"epoch": 0.82,
"grad_norm": 4.194490591135143,
"learning_rate": 4.774575140626316e-08,
"logits/chosen": -0.42816129326820374,
"logits/rejected": -0.4562205374240875,
"logps/chosen": -361.47930908203125,
"logps/rejected": -1313.421630859375,
"loss": 0.2458,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.5410887002944946,
"rewards/margins": 5.888722896575928,
"rewards/rejected": -6.429811954498291,
"step": 110
},
{
"epoch": 0.89,
"grad_norm": 3.9693871459743573,
"learning_rate": 1.6604893375699592e-08,
"logits/chosen": -0.4562758803367615,
"logits/rejected": -0.5703433156013489,
"logps/chosen": -393.4559631347656,
"logps/rejected": -1514.4947509765625,
"loss": 0.2112,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.518619954586029,
"rewards/margins": 7.754377841949463,
"rewards/rejected": -8.272997856140137,
"step": 120
},
{
"epoch": 0.97,
"grad_norm": 4.192543496278868,
"learning_rate": 1.3695261579316775e-09,
"logits/chosen": -0.39360299706459045,
"logits/rejected": -0.4867175221443176,
"logps/chosen": -386.438232421875,
"logps/rejected": -1469.9609375,
"loss": 0.2181,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.6078141927719116,
"rewards/margins": 7.127106666564941,
"rewards/rejected": -7.734920501708984,
"step": 130
},
{
"epoch": 1.0,
"step": 134,
"total_flos": 0.0,
"train_loss": 0.3946254751575527,
"train_runtime": 1910.8543,
"train_samples_per_second": 4.489,
"train_steps_per_second": 0.07
}
],
"logging_steps": 10,
"max_steps": 134,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}
|