|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9962157048249763, |
|
"eval_steps": 500, |
|
"global_step": 162, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 44.790242854161896, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": 0.4138435125350952, |
|
"logits/rejected": 0.3073309361934662, |
|
"logps/chosen": -238.74684143066406, |
|
"logps/rejected": -277.3367919921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 32.66922851838542, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": 0.19251321256160736, |
|
"logits/rejected": 0.15595921874046326, |
|
"logps/chosen": -266.190673828125, |
|
"logps/rejected": -288.5094299316406, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.04362406209111214, |
|
"rewards/margins": 0.010117193683981895, |
|
"rewards/rejected": -0.05374125763773918, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 34.2665441677138, |
|
"learning_rate": 4.99472085783721e-07, |
|
"logits/chosen": 0.4772653877735138, |
|
"logits/rejected": 0.34988027811050415, |
|
"logps/chosen": -288.5440368652344, |
|
"logps/rejected": -317.422607421875, |
|
"loss": 0.6966, |
|
"rewards/accuracies": 0.48846152424812317, |
|
"rewards/chosen": -0.29237106442451477, |
|
"rewards/margins": 0.041345465928316116, |
|
"rewards/rejected": -0.3337165117263794, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 51.989784907303765, |
|
"learning_rate": 4.901488388458247e-07, |
|
"logits/chosen": 0.08469453454017639, |
|
"logits/rejected": 0.02361711673438549, |
|
"logps/chosen": -257.55908203125, |
|
"logps/rejected": -290.6418151855469, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.5153846144676208, |
|
"rewards/chosen": 0.0216812863945961, |
|
"rewards/margins": 0.06838896870613098, |
|
"rewards/rejected": -0.04670768231153488, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 48.07967092856285, |
|
"learning_rate": 4.695964991097616e-07, |
|
"logits/chosen": 0.5012978315353394, |
|
"logits/rejected": 0.3107348382472992, |
|
"logps/chosen": -293.4065856933594, |
|
"logps/rejected": -318.2868347167969, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5153846144676208, |
|
"rewards/chosen": -0.3082655370235443, |
|
"rewards/margins": 0.04468757286667824, |
|
"rewards/rejected": -0.35295310616493225, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 50.8642865419175, |
|
"learning_rate": 4.3877607113930516e-07, |
|
"logits/chosen": 0.5789575576782227, |
|
"logits/rejected": 0.747968852519989, |
|
"logps/chosen": -291.36279296875, |
|
"logps/rejected": -306.2597961425781, |
|
"loss": 0.696, |
|
"rewards/accuracies": 0.5269230604171753, |
|
"rewards/chosen": -0.29860785603523254, |
|
"rewards/margins": 0.016954706981778145, |
|
"rewards/rejected": -0.31556254625320435, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 42.68053454562829, |
|
"learning_rate": 3.991286838919086e-07, |
|
"logits/chosen": 0.5176121592521667, |
|
"logits/rejected": 0.48673737049102783, |
|
"logps/chosen": -285.60284423828125, |
|
"logps/rejected": -301.834228515625, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.4961538314819336, |
|
"rewards/chosen": -0.20814552903175354, |
|
"rewards/margins": 0.02328580990433693, |
|
"rewards/rejected": -0.23143133521080017, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 37.59954622299739, |
|
"learning_rate": 3.52508205130354e-07, |
|
"logits/chosen": 0.47016510367393494, |
|
"logits/rejected": 0.6350060105323792, |
|
"logps/chosen": -298.3149719238281, |
|
"logps/rejected": -311.8184509277344, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.5423076748847961, |
|
"rewards/chosen": -0.33209383487701416, |
|
"rewards/margins": 0.01967799849808216, |
|
"rewards/rejected": -0.35177183151245117, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 40.19774755409481, |
|
"learning_rate": 3.010945566265912e-07, |
|
"logits/chosen": 0.8041943311691284, |
|
"logits/rejected": 0.9286781549453735, |
|
"logps/chosen": -320.7852783203125, |
|
"logps/rejected": -339.48193359375, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.557692289352417, |
|
"rewards/chosen": -0.5020374655723572, |
|
"rewards/margins": 0.013489325530827045, |
|
"rewards/rejected": -0.5155267715454102, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 40.3298911710128, |
|
"learning_rate": 2.4729178344249006e-07, |
|
"logits/chosen": 0.526244044303894, |
|
"logits/rejected": 0.5612362027168274, |
|
"logps/chosen": -289.9747009277344, |
|
"logps/rejected": -304.9601135253906, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5230769515037537, |
|
"rewards/chosen": -0.2381971925497055, |
|
"rewards/margins": 0.04978089779615402, |
|
"rewards/rejected": -0.2879781126976013, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 35.40524465285595, |
|
"learning_rate": 1.9361564345465145e-07, |
|
"logits/chosen": 0.3361697196960449, |
|
"logits/rejected": 0.5479218363761902, |
|
"logps/chosen": -272.99285888671875, |
|
"logps/rejected": -300.7643737792969, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.5153846144676208, |
|
"rewards/chosen": -0.1911364644765854, |
|
"rewards/margins": 0.03422596678137779, |
|
"rewards/rejected": -0.22536242008209229, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 38.98659700871813, |
|
"learning_rate": 1.4257597331216208e-07, |
|
"logits/chosen": 0.6074225902557373, |
|
"logits/rejected": 0.7285165786743164, |
|
"logps/chosen": -311.04827880859375, |
|
"logps/rejected": -335.56396484375, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.5423076748847961, |
|
"rewards/chosen": -0.4663804769515991, |
|
"rewards/margins": 0.05671105906367302, |
|
"rewards/rejected": -0.5230914950370789, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 50.7969521151244, |
|
"learning_rate": 9.655933126436563e-08, |
|
"logits/chosen": 0.5686596035957336, |
|
"logits/rejected": 0.6905936002731323, |
|
"logps/chosen": -276.1257629394531, |
|
"logps/rejected": -292.55120849609375, |
|
"loss": 0.7065, |
|
"rewards/accuracies": 0.5538461804389954, |
|
"rewards/chosen": -0.14772899448871613, |
|
"rewards/margins": 0.05048359930515289, |
|
"rewards/rejected": -0.19821257889270782, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 44.99508540990744, |
|
"learning_rate": 5.771740434959277e-08, |
|
"logits/chosen": 0.7891207337379456, |
|
"logits/rejected": 0.6963477730751038, |
|
"logps/chosen": -289.924072265625, |
|
"logps/rejected": -315.6805419921875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2571752965450287, |
|
"rewards/margins": 0.07165656983852386, |
|
"rewards/rejected": -0.32883188128471375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 41.07192587696782, |
|
"learning_rate": 2.7866397900677185e-08, |
|
"logits/chosen": 0.728725016117096, |
|
"logits/rejected": 0.6798302531242371, |
|
"logps/chosen": -313.1536865234375, |
|
"logps/rejected": -325.9017639160156, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.5423076748847961, |
|
"rewards/chosen": -0.5450281500816345, |
|
"rewards/margins": 0.03127431869506836, |
|
"rewards/rejected": -0.5763024687767029, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 51.88495068918687, |
|
"learning_rate": 8.402111802159412e-09, |
|
"logits/chosen": 0.7722111344337463, |
|
"logits/rejected": 0.8273798227310181, |
|
"logps/chosen": -296.1044921875, |
|
"logps/rejected": -326.81695556640625, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.607692301273346, |
|
"rewards/chosen": -0.4110731780529022, |
|
"rewards/margins": 0.07831522077322006, |
|
"rewards/rejected": -0.4893884062767029, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 43.53404843175348, |
|
"learning_rate": 2.3467443900582197e-10, |
|
"logits/chosen": 0.9644160866737366, |
|
"logits/rejected": 1.0424695014953613, |
|
"logps/chosen": -288.78082275390625, |
|
"logps/rejected": -310.8374328613281, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.5346153974533081, |
|
"rewards/chosen": -0.3611108064651489, |
|
"rewards/margins": 0.10043878108263016, |
|
"rewards/rejected": -0.4615496098995209, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 162, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6911558170377472, |
|
"train_runtime": 23474.6733, |
|
"train_samples_per_second": 0.9, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 162, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|