|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 142, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.818629616705012, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"logits/chosen": 0.34349873661994934, |
|
"logits/rejected": -0.45936429500579834, |
|
"logps/chosen": -645.8182373046875, |
|
"logps/rejected": -960.7478637695312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.7015477071804805, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": 0.2855316400527954, |
|
"logits/rejected": -0.5074439644813538, |
|
"logps/chosen": -515.9026489257812, |
|
"logps/rejected": -943.4110107421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": 9.945142664946616e-05, |
|
"rewards/margins": -0.0002705494989641011, |
|
"rewards/rejected": 0.00037000092561356723, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.8654438865386216, |
|
"learning_rate": 4.980901968537757e-07, |
|
"logits/chosen": 0.22748669981956482, |
|
"logits/rejected": -0.6011324524879456, |
|
"logps/chosen": -554.2417602539062, |
|
"logps/rejected": -920.9329833984375, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0033154767006635666, |
|
"rewards/margins": 0.003788186237215996, |
|
"rewards/rejected": -0.00047270936192944646, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 4.007801368289745, |
|
"learning_rate": 4.829863985848586e-07, |
|
"logits/chosen": 0.36973315477371216, |
|
"logits/rejected": -0.5559585094451904, |
|
"logps/chosen": -484.40032958984375, |
|
"logps/rejected": -927.1837158203125, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.012849527411162853, |
|
"rewards/margins": 0.01983761414885521, |
|
"rewards/rejected": -0.006988088134676218, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.8106369880633078, |
|
"learning_rate": 4.5369832536975604e-07, |
|
"logits/chosen": 0.1922769993543625, |
|
"logits/rejected": -0.5385528802871704, |
|
"logps/chosen": -541.2227783203125, |
|
"logps/rejected": -886.4552001953125, |
|
"loss": 0.6726, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.03025916777551174, |
|
"rewards/margins": 0.04895365983247757, |
|
"rewards/rejected": -0.018694492056965828, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.566281633809558, |
|
"learning_rate": 4.120090430254013e-07, |
|
"logits/chosen": 0.16315576434135437, |
|
"logits/rejected": -0.6133657097816467, |
|
"logps/chosen": -582.3228149414062, |
|
"logps/rejected": -962.0523681640625, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.038968753069639206, |
|
"rewards/margins": 0.06734822690486908, |
|
"rewards/rejected": -0.02837948501110077, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.941251849542362, |
|
"learning_rate": 3.6045660633384666e-07, |
|
"logits/chosen": 0.25063854455947876, |
|
"logits/rejected": -0.7097708582878113, |
|
"logps/chosen": -553.4964599609375, |
|
"logps/rejected": -958.4275512695312, |
|
"loss": 0.6382, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.054004330188035965, |
|
"rewards/margins": 0.16838756203651428, |
|
"rewards/rejected": -0.1143832579255104, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.6354849237348916, |
|
"learning_rate": 3.021795415808338e-07, |
|
"logits/chosen": 0.2733720541000366, |
|
"logits/rejected": -0.47119006514549255, |
|
"logps/chosen": -546.3372192382812, |
|
"logps/rejected": -853.4588012695312, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.05035785585641861, |
|
"rewards/margins": 0.14240628480911255, |
|
"rewards/rejected": -0.09204842895269394, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.178631940798131, |
|
"learning_rate": 2.407257722268487e-07, |
|
"logits/chosen": 0.17784562706947327, |
|
"logits/rejected": -0.545692503452301, |
|
"logps/chosen": -513.2195434570312, |
|
"logps/rejected": -911.9866943359375, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.056872475892305374, |
|
"rewards/margins": 0.31286248564720154, |
|
"rewards/rejected": -0.25598999857902527, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.917354076328557, |
|
"learning_rate": 1.798366203674768e-07, |
|
"logits/chosen": 0.3149748742580414, |
|
"logits/rejected": -0.6348077654838562, |
|
"logps/chosen": -531.9932861328125, |
|
"logps/rejected": -937.4884643554688, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.052259791642427444, |
|
"rewards/margins": 0.3956056237220764, |
|
"rewards/rejected": -0.3433458209037781, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 4.051764538146448, |
|
"learning_rate": 1.232190340325567e-07, |
|
"logits/chosen": 0.29592758417129517, |
|
"logits/rejected": -0.5284489989280701, |
|
"logps/chosen": -476.4107360839844, |
|
"logps/rejected": -907.6671142578125, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.07296852767467499, |
|
"rewards/margins": 0.3048384487628937, |
|
"rewards/rejected": -0.2318699061870575, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_logits/chosen": -0.001659675850532949, |
|
"eval_logits/rejected": -1.119870662689209, |
|
"eval_logps/chosen": -485.54742431640625, |
|
"eval_logps/rejected": -1095.5645751953125, |
|
"eval_loss": 0.6539692282676697, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": 0.0512981191277504, |
|
"eval_rewards/margins": 0.10516992956399918, |
|
"eval_rewards/rejected": -0.05387180671095848, |
|
"eval_runtime": 191.4237, |
|
"eval_samples_per_second": 9.027, |
|
"eval_steps_per_second": 0.282, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 4.049764432150976, |
|
"learning_rate": 7.431990718775249e-08, |
|
"logits/chosen": 0.20402272045612335, |
|
"logits/rejected": -0.6543243527412415, |
|
"logps/chosen": -562.2398681640625, |
|
"logps/rejected": -969.9613037109375, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 0.03413739800453186, |
|
"rewards/margins": 0.4432193338871002, |
|
"rewards/rejected": -0.40908199548721313, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.618779944055989, |
|
"learning_rate": 3.6116231897871026e-08, |
|
"logits/chosen": 0.23138892650604248, |
|
"logits/rejected": -0.6778791546821594, |
|
"logps/chosen": -575.7042236328125, |
|
"logps/rejected": -981.9583129882812, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.045423753559589386, |
|
"rewards/margins": 0.4969947934150696, |
|
"rewards/rejected": -0.451570987701416, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 4.683249145311086, |
|
"learning_rate": 1.0933858244968147e-08, |
|
"logits/chosen": 0.1574718952178955, |
|
"logits/rejected": -0.5027952194213867, |
|
"logps/chosen": -536.2589111328125, |
|
"logps/rejected": -921.5875854492188, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.050459813326597214, |
|
"rewards/margins": 0.41949304938316345, |
|
"rewards/rejected": -0.3690332770347595, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 4.206754290452048, |
|
"learning_rate": 3.058959464346811e-10, |
|
"logits/chosen": 0.16254039108753204, |
|
"logits/rejected": -0.5900125503540039, |
|
"logps/chosen": -552.6573486328125, |
|
"logps/rejected": -974.9669799804688, |
|
"loss": 0.5848, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.05284979194402695, |
|
"rewards/margins": 0.2659812569618225, |
|
"rewards/rejected": -0.21313147246837616, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 142, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6277184326883772, |
|
"train_runtime": 2301.3874, |
|
"train_samples_per_second": 3.942, |
|
"train_steps_per_second": 0.062 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 142, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|