|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.968, |
|
"eval_steps": 100, |
|
"global_step": 248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"logits/chosen": 0.10287265479564667, |
|
"logits/rejected": 0.011988319456577301, |
|
"logps/chosen": -192.40402221679688, |
|
"logps/rejected": -100.80304718017578, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": 0.16615836322307587, |
|
"logits/rejected": 0.08426308631896973, |
|
"logps/chosen": -182.21653747558594, |
|
"logps/rejected": -161.99514770507812, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0011453586630523205, |
|
"rewards/margins": 0.0010826066136360168, |
|
"rewards/rejected": -0.0022279650438576937, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": 0.22168031334877014, |
|
"logits/rejected": 0.2861151099205017, |
|
"logps/chosen": -176.96937561035156, |
|
"logps/rejected": -135.0897674560547, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.0021877787075936794, |
|
"rewards/margins": -0.000618638179730624, |
|
"rewards/rejected": -0.0015691407024860382, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.993800445762451e-06, |
|
"logits/chosen": 0.2504645884037018, |
|
"logits/rejected": 0.2488332986831665, |
|
"logps/chosen": -169.92291259765625, |
|
"logps/rejected": -150.56483459472656, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.0013141350354999304, |
|
"rewards/margins": -0.00041974737541750073, |
|
"rewards/rejected": -0.0008943876018747687, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.944388344834205e-06, |
|
"logits/chosen": 0.20393869280815125, |
|
"logits/rejected": 0.26991668343544006, |
|
"logps/chosen": -172.50244140625, |
|
"logps/rejected": -155.56478881835938, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0012658500345423818, |
|
"rewards/margins": -0.0002958738768938929, |
|
"rewards/rejected": 0.0015617238823324442, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.8465431931347904e-06, |
|
"logits/chosen": 0.23398549854755402, |
|
"logits/rejected": 0.17997679114341736, |
|
"logps/chosen": -198.8681182861328, |
|
"logps/rejected": -161.630615234375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0012974137207493186, |
|
"rewards/margins": 0.0008641968597657979, |
|
"rewards/rejected": -0.0021616104058921337, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.702203692102539e-06, |
|
"logits/chosen": 0.24316315352916718, |
|
"logits/rejected": 0.29181909561157227, |
|
"logps/chosen": -187.0422821044922, |
|
"logps/rejected": -162.91946411132812, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 3.171586286043748e-05, |
|
"rewards/margins": -0.0004161152464803308, |
|
"rewards/rejected": 0.00044783117482438684, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.514229781074239e-06, |
|
"logits/chosen": 0.23100349307060242, |
|
"logits/rejected": 0.16395993530750275, |
|
"logps/chosen": -195.23422241210938, |
|
"logps/rejected": -167.4615020751953, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 4.4724576582666487e-05, |
|
"rewards/margins": 0.0011552829528227448, |
|
"rewards/rejected": -0.0011105581652373075, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.286345970517195e-06, |
|
"logits/chosen": 0.1470017284154892, |
|
"logits/rejected": 0.1968422830104828, |
|
"logps/chosen": -202.30715942382812, |
|
"logps/rejected": -162.376953125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0012948224321007729, |
|
"rewards/margins": 0.0006303158006630838, |
|
"rewards/rejected": 0.00066450668964535, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.023067544670082e-06, |
|
"logits/chosen": 0.2470308095216751, |
|
"logits/rejected": 0.2212987244129181, |
|
"logps/chosen": -173.95779418945312, |
|
"logps/rejected": -155.6954803466797, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.0012559869792312384, |
|
"rewards/margins": -0.000755336950533092, |
|
"rewards/rejected": -0.0005006499122828245, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.7296110958116845e-06, |
|
"logits/chosen": 0.19465205073356628, |
|
"logits/rejected": 0.17263346910476685, |
|
"logps/chosen": -177.3841552734375, |
|
"logps/rejected": -143.33177185058594, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.0003903825709130615, |
|
"rewards/margins": 8.392091694986448e-05, |
|
"rewards/rejected": -0.0004743034369312227, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -0.007278905715793371, |
|
"eval_logits/rejected": 0.09087153524160385, |
|
"eval_logps/chosen": -306.3416748046875, |
|
"eval_logps/rejected": -278.65435791015625, |
|
"eval_loss": 0.0015715558547526598, |
|
"eval_rewards/accuracies": 0.4959999918937683, |
|
"eval_rewards/chosen": -0.0007231617928482592, |
|
"eval_rewards/margins": 0.00012325971329119056, |
|
"eval_rewards/rejected": -0.0008464214624837041, |
|
"eval_runtime": 933.3559, |
|
"eval_samples_per_second": 2.143, |
|
"eval_steps_per_second": 0.536, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.4117911628292944e-06, |
|
"logits/chosen": 0.29057276248931885, |
|
"logits/rejected": 0.19747909903526306, |
|
"logps/chosen": -173.1153564453125, |
|
"logps/rejected": -151.88233947753906, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.00022493198048323393, |
|
"rewards/margins": -0.000448818871518597, |
|
"rewards/rejected": 0.00022388689103536308, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.075905022087675e-06, |
|
"logits/chosen": 0.1814224272966385, |
|
"logits/rejected": 0.2010256052017212, |
|
"logps/chosen": -169.48214721679688, |
|
"logps/rejected": -147.04067993164062, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.0006150969420559704, |
|
"rewards/margins": 0.0005108726327307522, |
|
"rewards/rejected": 0.00010422446939628571, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.728607913349464e-06, |
|
"logits/chosen": 0.24528518319129944, |
|
"logits/rejected": 0.2507859468460083, |
|
"logps/chosen": -177.7463836669922, |
|
"logps/rejected": -162.4386444091797, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.0017223177710548043, |
|
"rewards/margins": 0.0007541574887000024, |
|
"rewards/rejected": -0.0024764754343777895, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.376781173017589e-06, |
|
"logits/chosen": 0.1746383160352707, |
|
"logits/rejected": 0.15819591283798218, |
|
"logps/chosen": -175.5349884033203, |
|
"logps/rejected": -150.61404418945312, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.0010503135854378343, |
|
"rewards/margins": -0.0006477275164797902, |
|
"rewards/rejected": -0.0004025862435810268, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0273958875043877e-06, |
|
"logits/chosen": 0.16554135084152222, |
|
"logits/rejected": 0.20675285160541534, |
|
"logps/chosen": -198.78173828125, |
|
"logps/rejected": -161.86558532714844, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.001666503376327455, |
|
"rewards/margins": 0.0006793343345634639, |
|
"rewards/rejected": -0.0023458378855139017, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.6873747682962393e-06, |
|
"logits/chosen": 0.24000254273414612, |
|
"logits/rejected": 0.27995795011520386, |
|
"logps/chosen": -190.5437469482422, |
|
"logps/rejected": -164.19549560546875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.002512627048417926, |
|
"rewards/margins": 0.00116717757191509, |
|
"rewards/rejected": 0.001345449360087514, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.363454985517803e-06, |
|
"logits/chosen": 0.2393382042646408, |
|
"logits/rejected": 0.26565679907798767, |
|
"logps/chosen": -164.2212677001953, |
|
"logps/rejected": -140.7093963623047, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.0010435387957841158, |
|
"rewards/margins": 0.0007807637448422611, |
|
"rewards/rejected": 0.0002627749345265329, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.062054677808238e-06, |
|
"logits/chosen": 0.20416012406349182, |
|
"logits/rejected": 0.1511019915342331, |
|
"logps/chosen": -183.35943603515625, |
|
"logps/rejected": -157.43435668945312, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -7.58793466957286e-05, |
|
"rewards/margins": 0.0020282561890780926, |
|
"rewards/rejected": -0.002104135463014245, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.891457834794711e-07, |
|
"logits/chosen": 0.19763195514678955, |
|
"logits/rejected": 0.2482624500989914, |
|
"logps/chosen": -169.32882690429688, |
|
"logps/rejected": -142.48541259765625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0012013750383630395, |
|
"rewards/margins": 0.0014427897986024618, |
|
"rewards/rejected": -0.00024141438188962638, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5.501357126768117e-07, |
|
"logits/chosen": 0.1878470778465271, |
|
"logits/rejected": 0.18784348666667938, |
|
"logps/chosen": -188.822998046875, |
|
"logps/rejected": -153.36880493164062, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.00035641956492327154, |
|
"rewards/margins": 0.0015287164133042097, |
|
"rewards/rejected": -0.001172296586446464, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_logits/chosen": -0.004656919743865728, |
|
"eval_logits/rejected": 0.09210014343261719, |
|
"eval_logps/chosen": -306.283447265625, |
|
"eval_logps/rejected": -278.6177062988281, |
|
"eval_loss": 0.0015858013648539782, |
|
"eval_rewards/accuracies": 0.4925000071525574, |
|
"eval_rewards/chosen": -0.00014072553312871605, |
|
"eval_rewards/margins": 0.00033963273745030165, |
|
"eval_rewards/rejected": -0.0004803583142347634, |
|
"eval_runtime": 1595.4244, |
|
"eval_samples_per_second": 1.254, |
|
"eval_steps_per_second": 0.313, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.4976020508682345e-07, |
|
"logits/chosen": 0.17026616632938385, |
|
"logits/rejected": 0.2095976322889328, |
|
"logps/chosen": -177.265625, |
|
"logps/rejected": -149.7288055419922, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00014497071970254183, |
|
"rewards/margins": 0.0017732717096805573, |
|
"rewards/rejected": -0.0016283008735626936, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.9198949610721273e-07, |
|
"logits/chosen": 0.19490325450897217, |
|
"logits/rejected": 0.2224169224500656, |
|
"logps/chosen": -175.52345275878906, |
|
"logps/rejected": -146.68728637695312, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.002361242426559329, |
|
"rewards/margins": 0.0017604168970137835, |
|
"rewards/rejected": 0.0006008257623761892, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.994965069994143e-08, |
|
"logits/chosen": 0.21312932670116425, |
|
"logits/rejected": 0.18425947427749634, |
|
"logps/chosen": -187.47511291503906, |
|
"logps/rejected": -163.0996856689453, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0017060479149222374, |
|
"rewards/margins": 0.001837022602558136, |
|
"rewards/rejected": -0.00013097473129164428, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.5860623616664183e-08, |
|
"logits/chosen": 0.1936773955821991, |
|
"logits/rejected": 0.12241797149181366, |
|
"logps/chosen": -176.46658325195312, |
|
"logps/rejected": -148.58331298828125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.0003967673401348293, |
|
"rewards/margins": 0.00025122734950855374, |
|
"rewards/rejected": -0.0006479948060587049, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"step": 248, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0010954971686230911, |
|
"train_runtime": 8408.6931, |
|
"train_samples_per_second": 0.476, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 248, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|