|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.144156476964564, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -1.517999529838562, |
|
"logits/rejected": -1.427964687347412, |
|
"logps/chosen": -138.13075256347656, |
|
"logps/rejected": -139.19334411621094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 16.23747042290227, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.8612151145935059, |
|
"logits/rejected": -1.837838053703308, |
|
"logps/chosen": -165.77423095703125, |
|
"logps/rejected": -167.00115966796875, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": 0.00027224430232308805, |
|
"rewards/margins": -0.0006895032711327076, |
|
"rewards/rejected": 0.0009617475443519652, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 16.626258757403523, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -1.765881896018982, |
|
"logits/rejected": -1.758798360824585, |
|
"logps/chosen": -146.74258422851562, |
|
"logps/rejected": -150.7355499267578, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.013516152277588844, |
|
"rewards/margins": 0.0023722327314317226, |
|
"rewards/rejected": -0.015888383612036705, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 15.590271696523773, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -1.6886975765228271, |
|
"logits/rejected": -1.7255547046661377, |
|
"logps/chosen": -171.6064453125, |
|
"logps/rejected": -175.8733367919922, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -0.10228633880615234, |
|
"rewards/margins": -0.003102194517850876, |
|
"rewards/rejected": -0.09918414056301117, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 15.611757303970759, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -1.6890376806259155, |
|
"logits/rejected": -1.6796882152557373, |
|
"logps/chosen": -175.96786499023438, |
|
"logps/rejected": -174.4598846435547, |
|
"loss": 0.6962, |
|
"rewards/accuracies": 0.44062501192092896, |
|
"rewards/chosen": -0.1981964409351349, |
|
"rewards/margins": -0.007378303911536932, |
|
"rewards/rejected": -0.19081811606884003, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 15.152656997490112, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -1.8153579235076904, |
|
"logits/rejected": -1.864524483680725, |
|
"logps/chosen": -173.35693359375, |
|
"logps/rejected": -175.8848114013672, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09905640780925751, |
|
"rewards/margins": 0.00832386501133442, |
|
"rewards/rejected": -0.10738028585910797, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 17.022534458266477, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -1.8550310134887695, |
|
"logits/rejected": -1.8582820892333984, |
|
"logps/chosen": -192.25155639648438, |
|
"logps/rejected": -197.19004821777344, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.2872371971607208, |
|
"rewards/margins": 0.02158377319574356, |
|
"rewards/rejected": -0.3088209331035614, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 16.571326277539317, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -1.8471654653549194, |
|
"logits/rejected": -1.8384788036346436, |
|
"logps/chosen": -192.4368896484375, |
|
"logps/rejected": -195.90115356445312, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.3420848548412323, |
|
"rewards/margins": 0.01330840028822422, |
|
"rewards/rejected": -0.35539326071739197, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 13.438756015994942, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -1.8971712589263916, |
|
"logits/rejected": -1.9240859746932983, |
|
"logps/chosen": -172.50802612304688, |
|
"logps/rejected": -181.87484741210938, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.19086754322052002, |
|
"rewards/margins": 0.0247647762298584, |
|
"rewards/rejected": -0.21563228964805603, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 15.835235204252495, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -1.803915023803711, |
|
"logits/rejected": -1.7513706684112549, |
|
"logps/chosen": -196.07321166992188, |
|
"logps/rejected": -192.37071228027344, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.3334119915962219, |
|
"rewards/margins": 0.01788436248898506, |
|
"rewards/rejected": -0.3512963652610779, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 20.64165185557883, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -1.8089154958724976, |
|
"logits/rejected": -1.7900733947753906, |
|
"logps/chosen": -193.5570831298828, |
|
"logps/rejected": -195.0845489501953, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.3730206787586212, |
|
"rewards/margins": 0.006149230990558863, |
|
"rewards/rejected": -0.37916988134384155, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 14.009691318366311, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": -1.735630750656128, |
|
"logits/rejected": -1.7698205709457397, |
|
"logps/chosen": -179.77761840820312, |
|
"logps/rejected": -190.6461944580078, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.3420836329460144, |
|
"rewards/margins": 0.045021723955869675, |
|
"rewards/rejected": -0.38710540533065796, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 15.208350623463305, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -1.8086316585540771, |
|
"logits/rejected": -1.8518075942993164, |
|
"logps/chosen": -198.0193634033203, |
|
"logps/rejected": -197.88845825195312, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.36722415685653687, |
|
"rewards/margins": 0.019394848495721817, |
|
"rewards/rejected": -0.386618971824646, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 20.27285082098782, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -1.6780946254730225, |
|
"logits/rejected": -1.6494309902191162, |
|
"logps/chosen": -181.09170532226562, |
|
"logps/rejected": -182.8488311767578, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.29602062702178955, |
|
"rewards/margins": 0.0265937689691782, |
|
"rewards/rejected": -0.3226144015789032, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 16.756793414757478, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -1.8084728717803955, |
|
"logits/rejected": -1.767961859703064, |
|
"logps/chosen": -189.6092071533203, |
|
"logps/rejected": -193.9722900390625, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.3253946006298065, |
|
"rewards/margins": 0.02885589934885502, |
|
"rewards/rejected": -0.3542505204677582, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 15.87524427688872, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": -1.70889413356781, |
|
"logits/rejected": -1.748719573020935, |
|
"logps/chosen": -189.88803100585938, |
|
"logps/rejected": -193.5582733154297, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.38032156229019165, |
|
"rewards/margins": 0.030102457851171494, |
|
"rewards/rejected": -0.41042399406433105, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.688686496936358, |
|
"train_runtime": 18523.9468, |
|
"train_samples_per_second": 1.08, |
|
"train_steps_per_second": 0.008 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|