|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992429977289932, |
|
"eval_steps": 100, |
|
"global_step": 165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9411764705882356e-07, |
|
"logits/chosen": -2.737081289291382, |
|
"logits/rejected": -2.680964708328247, |
|
"logps/chosen": -126.38134765625, |
|
"logps/rejected": -136.25076293945312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"logits/chosen": -2.731968879699707, |
|
"logits/rejected": -2.708989143371582, |
|
"logps/chosen": -118.21139526367188, |
|
"logps/rejected": -111.94728088378906, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": -1.655664891586639e-05, |
|
"rewards/margins": -7.096579793142155e-05, |
|
"rewards/rejected": 5.4409170843428e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.994932636402032e-06, |
|
"logits/chosen": -2.744499683380127, |
|
"logits/rejected": -2.7339892387390137, |
|
"logps/chosen": -113.14430236816406, |
|
"logps/rejected": -123.68851470947266, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": 0.007225497625768185, |
|
"rewards/margins": 0.0004294503596611321, |
|
"rewards/rejected": 0.0067960480228066444, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.905416503522124e-06, |
|
"logits/chosen": -2.772244930267334, |
|
"logits/rejected": -2.7886576652526855, |
|
"logps/chosen": -115.32562255859375, |
|
"logps/rejected": -122.47587585449219, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.024915488436818123, |
|
"rewards/margins": 0.00023288575175683945, |
|
"rewards/rejected": 0.024682600051164627, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707922373336524e-06, |
|
"logits/chosen": -2.740614175796509, |
|
"logits/rejected": -2.727870464324951, |
|
"logps/chosen": -111.22251892089844, |
|
"logps/rejected": -113.79164123535156, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.052265096455812454, |
|
"rewards/margins": 0.003552838694304228, |
|
"rewards/rejected": 0.04871225729584694, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.411315662967732e-06, |
|
"logits/chosen": -2.775801181793213, |
|
"logits/rejected": -2.7031962871551514, |
|
"logps/chosen": -109.81254577636719, |
|
"logps/rejected": -113.3335952758789, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0849301889538765, |
|
"rewards/margins": 0.006556935608386993, |
|
"rewards/rejected": 0.0783732533454895, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.028910905897229e-06, |
|
"logits/chosen": -2.756106376647949, |
|
"logits/rejected": -2.7039153575897217, |
|
"logps/chosen": -110.66825103759766, |
|
"logps/rejected": -110.25054931640625, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.09365645796060562, |
|
"rewards/margins": 0.008471069857478142, |
|
"rewards/rejected": 0.08518538624048233, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.577874068920446e-06, |
|
"logits/chosen": -2.7151947021484375, |
|
"logits/rejected": -2.706727981567383, |
|
"logps/chosen": -109.93450927734375, |
|
"logps/rejected": -113.0154800415039, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.07744868099689484, |
|
"rewards/margins": 0.004513105843216181, |
|
"rewards/rejected": 0.0729355737566948, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0784519801008546e-06, |
|
"logits/chosen": -2.692235231399536, |
|
"logits/rejected": -2.6538617610931396, |
|
"logps/chosen": -111.87430572509766, |
|
"logps/rejected": -115.57984924316406, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.07563529908657074, |
|
"rewards/margins": 0.015832457691431046, |
|
"rewards/rejected": 0.059802841395139694, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.553063458334059e-06, |
|
"logits/chosen": -2.738804340362549, |
|
"logits/rejected": -2.690701723098755, |
|
"logps/chosen": -120.409423828125, |
|
"logps/rejected": -118.25407409667969, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.05362895876169205, |
|
"rewards/margins": 0.01961613819003105, |
|
"rewards/rejected": 0.034012824296951294, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.025292943281429e-06, |
|
"logits/chosen": -2.718660593032837, |
|
"logits/rejected": -2.7189323902130127, |
|
"logps/chosen": -108.1891098022461, |
|
"logps/rejected": -112.934814453125, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": 0.06428461521863937, |
|
"rewards/margins": 0.0179769154638052, |
|
"rewards/rejected": 0.04630769044160843, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_logits/chosen": -2.7115368843078613, |
|
"eval_logits/rejected": -2.6278061866760254, |
|
"eval_logps/chosen": -286.1498718261719, |
|
"eval_logps/rejected": -263.6365966796875, |
|
"eval_loss": 0.6740216016769409, |
|
"eval_rewards/accuracies": 0.6359999775886536, |
|
"eval_rewards/chosen": 0.008544988930225372, |
|
"eval_rewards/margins": 0.03750109300017357, |
|
"eval_rewards/rejected": -0.028956104069948196, |
|
"eval_runtime": 383.9881, |
|
"eval_samples_per_second": 5.208, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5188318011445907e-06, |
|
"logits/chosen": -2.7399725914001465, |
|
"logits/rejected": -2.710850954055786, |
|
"logps/chosen": -123.49687194824219, |
|
"logps/rejected": -121.06239318847656, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.031930722296237946, |
|
"rewards/margins": 0.025511348620057106, |
|
"rewards/rejected": 0.00641937181353569, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0564148305586296e-06, |
|
"logits/chosen": -2.733646869659424, |
|
"logits/rejected": -2.7130093574523926, |
|
"logps/chosen": -116.14371490478516, |
|
"logps/rejected": -121.31622314453125, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.04128889739513397, |
|
"rewards/margins": 0.021482665091753006, |
|
"rewards/rejected": 0.019806232303380966, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.587997083462197e-07, |
|
"logits/chosen": -2.706106424331665, |
|
"logits/rejected": -2.683042049407959, |
|
"logps/chosen": -121.27201080322266, |
|
"logps/rejected": -124.78038024902344, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.032023753970861435, |
|
"rewards/margins": 0.03133785352110863, |
|
"rewards/rejected": 0.0006858977722004056, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.438351873250492e-07, |
|
"logits/chosen": -2.6774215698242188, |
|
"logits/rejected": -2.66115140914917, |
|
"logps/chosen": -111.2270736694336, |
|
"logps/rejected": -116.47705078125, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.032939545810222626, |
|
"rewards/margins": 0.04166686534881592, |
|
"rewards/rejected": -0.008727315813302994, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2565987432367032e-07, |
|
"logits/chosen": -2.7032761573791504, |
|
"logits/rejected": -2.6723108291625977, |
|
"logps/chosen": -109.5132064819336, |
|
"logps/rejected": -114.572509765625, |
|
"loss": 0.6802, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.037231095135211945, |
|
"rewards/margins": 0.029751187190413475, |
|
"rewards/rejected": 0.007479907013475895, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4067554877743861e-08, |
|
"logits/chosen": -2.6688144207000732, |
|
"logits/rejected": -2.6238021850585938, |
|
"logps/chosen": -110.0870361328125, |
|
"logps/rejected": -108.6930160522461, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.050410233438014984, |
|
"rewards/margins": 0.03611644357442856, |
|
"rewards/rejected": 0.014293788000941277, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 165, |
|
"total_flos": 0.0, |
|
"train_loss": 0.685761218359976, |
|
"train_runtime": 6660.773, |
|
"train_samples_per_second": 3.173, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|