|
{ |
|
"best_metric": 0.3698837161064148, |
|
"best_model_checkpoint": "./mixstral/05-04-24-Weni-WeniGPT-Agents-Mixstral-Instruct-2.0.1-KTO_KTO with Agents 1.2.0 dataset and Mixstral model, with tokenization zephyr chat template-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.052237033843994, |
|
"kl": 10.023146629333496, |
|
"learning_rate": 0.0001785714285714286, |
|
"logps/chosen": -232.72720336914062, |
|
"logps/rejected": -241.88575744628906, |
|
"loss": 0.4316, |
|
"rewards/chosen": 1.2224007844924927, |
|
"rewards/margins": 1.0828871726989746, |
|
"rewards/rejected": 0.279308944940567, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.734194278717041, |
|
"kl": 10.954643249511719, |
|
"learning_rate": 0.00015000000000000001, |
|
"logps/chosen": -228.13992309570312, |
|
"logps/rejected": -270.3298034667969, |
|
"loss": 0.4077, |
|
"rewards/chosen": 1.687286615371704, |
|
"rewards/margins": 1.9949144124984741, |
|
"rewards/rejected": -0.2780403196811676, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -315.1575927734375, |
|
"eval_logps/rejected": -328.9866027832031, |
|
"eval_loss": 0.466037392616272, |
|
"eval_rewards/chosen": -7.166321754455566, |
|
"eval_rewards/margins": 2.8236684799194336, |
|
"eval_rewards/rejected": -10.133459091186523, |
|
"eval_runtime": 353.626, |
|
"eval_samples_per_second": 0.848, |
|
"eval_steps_per_second": 0.212, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9602097272872925, |
|
"kl": 1.5916956663131714, |
|
"learning_rate": 0.00012142857142857143, |
|
"logps/chosen": -268.18023681640625, |
|
"logps/rejected": -304.6438903808594, |
|
"loss": 0.4028, |
|
"rewards/chosen": -2.1896169185638428, |
|
"rewards/margins": 4.930688858032227, |
|
"rewards/rejected": -6.980587482452393, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9362258911132812, |
|
"kl": 3.6249618530273438, |
|
"learning_rate": 9.285714285714286e-05, |
|
"logps/chosen": -263.2818603515625, |
|
"logps/rejected": -308.0414733886719, |
|
"loss": 0.3873, |
|
"rewards/chosen": -0.1703629493713379, |
|
"rewards/margins": 5.487633228302002, |
|
"rewards/rejected": -5.359496116638184, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.322431802749634, |
|
"kl": 0.9517351388931274, |
|
"learning_rate": 6.428571428571429e-05, |
|
"logps/chosen": -240.21408081054688, |
|
"logps/rejected": -329.3759765625, |
|
"loss": 0.3351, |
|
"rewards/chosen": 0.027771174907684326, |
|
"rewards/margins": 7.602840423583984, |
|
"rewards/rejected": -7.7202911376953125, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_kl": 0.48766180872917175, |
|
"eval_logps/chosen": -261.78826904296875, |
|
"eval_logps/rejected": -295.7093505859375, |
|
"eval_loss": 0.3698837161064148, |
|
"eval_rewards/chosen": -1.8293884992599487, |
|
"eval_rewards/margins": 4.808248043060303, |
|
"eval_rewards/rejected": -6.805734634399414, |
|
"eval_runtime": 352.0516, |
|
"eval_samples_per_second": 0.852, |
|
"eval_steps_per_second": 0.213, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|