|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9962825278810409, |
|
"eval_steps": 100, |
|
"global_step": 134, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.393402431483719, |
|
"learning_rate": 3.571428571428571e-08, |
|
"logits/chosen": -0.5970903635025024, |
|
"logits/rejected": -0.02967279776930809, |
|
"logps/chosen": -254.73361206054688, |
|
"logps/rejected": -449.335693359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.97808175581924, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"logits/chosen": -0.502315104007721, |
|
"logits/rejected": -0.24948199093341827, |
|
"logps/chosen": -339.6366271972656, |
|
"logps/rejected": -657.9154663085938, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0001597129157744348, |
|
"rewards/margins": 0.00033807966974563897, |
|
"rewards/rejected": -0.0004977926146239042, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 7.671337358423795, |
|
"learning_rate": 4.969220851487844e-07, |
|
"logits/chosen": -0.5676344037055969, |
|
"logits/rejected": -0.3287120759487152, |
|
"logps/chosen": -378.62664794921875, |
|
"logps/rejected": -670.591552734375, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0052469945512712, |
|
"rewards/margins": 0.02313617616891861, |
|
"rewards/rejected": -0.028383171185851097, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 6.338334841496394, |
|
"learning_rate": 4.783863644106502e-07, |
|
"logits/chosen": -0.5812798738479614, |
|
"logits/rejected": -0.34431666135787964, |
|
"logps/chosen": -353.8559875488281, |
|
"logps/rejected": -717.6322021484375, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.018838122487068176, |
|
"rewards/margins": 0.11194054782390594, |
|
"rewards/rejected": -0.13077868521213531, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 5.540070129895064, |
|
"learning_rate": 4.442864903642427e-07, |
|
"logits/chosen": -0.4757254719734192, |
|
"logits/rejected": -0.3771602213382721, |
|
"logps/chosen": -361.31365966796875, |
|
"logps/rejected": -770.7361450195312, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.04972491413354874, |
|
"rewards/margins": 0.309120774269104, |
|
"rewards/rejected": -0.35884565114974976, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.028650135342188, |
|
"learning_rate": 3.9694631307311825e-07, |
|
"logits/chosen": -0.5247567892074585, |
|
"logits/rejected": -0.45507222414016724, |
|
"logps/chosen": -293.3636169433594, |
|
"logps/rejected": -845.0416870117188, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.1027413159608841, |
|
"rewards/margins": 0.9158787727355957, |
|
"rewards/rejected": -1.0186201333999634, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 5.965290480598111, |
|
"learning_rate": 3.39591987386325e-07, |
|
"logits/chosen": -0.5522093772888184, |
|
"logits/rejected": -0.4290170669555664, |
|
"logps/chosen": -363.2244873046875, |
|
"logps/rejected": -951.5051879882812, |
|
"loss": 0.3684, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.2073672115802765, |
|
"rewards/margins": 2.0674309730529785, |
|
"rewards/rejected": -2.2747981548309326, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.3960424532927345, |
|
"learning_rate": 2.761321158169134e-07, |
|
"logits/chosen": -0.47125476598739624, |
|
"logits/rejected": -0.4486091136932373, |
|
"logps/chosen": -341.56646728515625, |
|
"logps/rejected": -1014.1658935546875, |
|
"loss": 0.338, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.219242125749588, |
|
"rewards/margins": 3.1757972240448, |
|
"rewards/rejected": -3.3950393199920654, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 5.338829151255127, |
|
"learning_rate": 2.1089138373994222e-07, |
|
"logits/chosen": -0.5217522382736206, |
|
"logits/rejected": -0.5397945642471313, |
|
"logps/chosen": -321.6473083496094, |
|
"logps/rejected": -1235.2142333984375, |
|
"loss": 0.2868, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.31155842542648315, |
|
"rewards/margins": 4.753512382507324, |
|
"rewards/rejected": -5.065071105957031, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 4.274164409019951, |
|
"learning_rate": 1.4831583923104998e-07, |
|
"logits/chosen": -0.44177961349487305, |
|
"logits/rejected": -0.528927743434906, |
|
"logps/chosen": -327.2131042480469, |
|
"logps/rejected": -1242.676513671875, |
|
"loss": 0.2479, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.2681874930858612, |
|
"rewards/margins": 5.103245735168457, |
|
"rewards/rejected": -5.371432781219482, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.847681923589658, |
|
"learning_rate": 9.266990223754067e-08, |
|
"logits/chosen": -0.3910934329032898, |
|
"logits/rejected": -0.5766850709915161, |
|
"logps/chosen": -417.525390625, |
|
"logps/rejected": -1386.257080078125, |
|
"loss": 0.2497, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5247961282730103, |
|
"rewards/margins": 5.741795539855957, |
|
"rewards/rejected": -6.266592979431152, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_logits/chosen": -0.7826768159866333, |
|
"eval_logits/rejected": -0.5636682510375977, |
|
"eval_logps/chosen": -311.038330078125, |
|
"eval_logps/rejected": -748.6944580078125, |
|
"eval_loss": 0.3023545444011688, |
|
"eval_rewards/accuracies": 0.949999988079071, |
|
"eval_rewards/chosen": -0.08790449053049088, |
|
"eval_rewards/margins": 1.8343137502670288, |
|
"eval_rewards/rejected": -1.9222180843353271, |
|
"eval_runtime": 15.884, |
|
"eval_samples_per_second": 9.569, |
|
"eval_steps_per_second": 0.315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 4.194490591135143, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": -0.42816129326820374, |
|
"logits/rejected": -0.4562205374240875, |
|
"logps/chosen": -361.47930908203125, |
|
"logps/rejected": -1313.421630859375, |
|
"loss": 0.2458, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.5410887002944946, |
|
"rewards/margins": 5.888722896575928, |
|
"rewards/rejected": -6.429811954498291, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.9693871459743573, |
|
"learning_rate": 1.6604893375699592e-08, |
|
"logits/chosen": -0.4562758803367615, |
|
"logits/rejected": -0.5703433156013489, |
|
"logps/chosen": -393.4559631347656, |
|
"logps/rejected": -1514.4947509765625, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.518619954586029, |
|
"rewards/margins": 7.754377841949463, |
|
"rewards/rejected": -8.272997856140137, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 4.192543496278868, |
|
"learning_rate": 1.3695261579316775e-09, |
|
"logits/chosen": -0.39360299706459045, |
|
"logits/rejected": -0.4867175221443176, |
|
"logps/chosen": -386.438232421875, |
|
"logps/rejected": -1469.9609375, |
|
"loss": 0.2181, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.6078141927719116, |
|
"rewards/margins": 7.127106666564941, |
|
"rewards/rejected": -7.734920501708984, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 134, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3946254751575527, |
|
"train_runtime": 1910.8543, |
|
"train_samples_per_second": 4.489, |
|
"train_steps_per_second": 0.07 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 134, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|