|
{ |
|
"best_metric": 0.5405722260475159, |
|
"best_model_checkpoint": "Model-Meme/Typhoon/Typhoon1.5-Offensive-Fold1/checkpoint-3824", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 19120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2615062761506276, |
|
"grad_norm": 1.978398084640503, |
|
"learning_rate": 8.710801393728223e-06, |
|
"loss": 0.9518, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5230125523012552, |
|
"grad_norm": 1.655224323272705, |
|
"learning_rate": 9.986987223454062e-06, |
|
"loss": 0.6078, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7845188284518828, |
|
"grad_norm": 1.4709585905075073, |
|
"learning_rate": 9.93861382775312e-06, |
|
"loss": 0.6103, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5545303821563721, |
|
"eval_runtime": 106.2407, |
|
"eval_samples_per_second": 8.998, |
|
"eval_steps_per_second": 8.998, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 1.0460251046025104, |
|
"grad_norm": 2.412283420562744, |
|
"learning_rate": 9.854833795480441e-06, |
|
"loss": 0.587, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.3075313807531381, |
|
"grad_norm": 1.6763008832931519, |
|
"learning_rate": 9.736519159090385e-06, |
|
"loss": 0.5516, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5690376569037658, |
|
"grad_norm": 2.1787030696868896, |
|
"learning_rate": 9.584044234397988e-06, |
|
"loss": 0.5432, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.8305439330543933, |
|
"grad_norm": 2.15987229347229, |
|
"learning_rate": 9.39870470560879e-06, |
|
"loss": 0.5497, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5405722260475159, |
|
"eval_runtime": 107.6233, |
|
"eval_samples_per_second": 8.883, |
|
"eval_steps_per_second": 8.883, |
|
"step": 3824 |
|
}, |
|
{ |
|
"epoch": 2.092050209205021, |
|
"grad_norm": 2.2572760581970215, |
|
"learning_rate": 9.181829336130319e-06, |
|
"loss": 0.5301, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.3535564853556483, |
|
"grad_norm": 3.1669037342071533, |
|
"learning_rate": 8.934972980757115e-06, |
|
"loss": 0.4753, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.6150627615062763, |
|
"grad_norm": 2.6985182762145996, |
|
"learning_rate": 8.660482449775261e-06, |
|
"loss": 0.4701, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.8765690376569037, |
|
"grad_norm": 3.5524895191192627, |
|
"learning_rate": 8.35985345532633e-06, |
|
"loss": 0.499, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.5466441512107849, |
|
"eval_runtime": 105.7924, |
|
"eval_samples_per_second": 9.037, |
|
"eval_steps_per_second": 9.037, |
|
"step": 5736 |
|
}, |
|
{ |
|
"epoch": 3.1380753138075312, |
|
"grad_norm": 3.800245523452759, |
|
"learning_rate": 8.034559598992307e-06, |
|
"loss": 0.4554, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.399581589958159, |
|
"grad_norm": 5.512406826019287, |
|
"learning_rate": 7.687509931691041e-06, |
|
"loss": 0.4241, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.6610878661087867, |
|
"grad_norm": 4.886234283447266, |
|
"learning_rate": 7.32119257293766e-06, |
|
"loss": 0.4183, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.922594142259414, |
|
"grad_norm": 4.367607593536377, |
|
"learning_rate": 6.939014497791281e-06, |
|
"loss": 0.4227, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.5681700110435486, |
|
"eval_runtime": 109.707, |
|
"eval_samples_per_second": 8.714, |
|
"eval_steps_per_second": 8.714, |
|
"step": 7648 |
|
}, |
|
{ |
|
"epoch": 4.184100418410042, |
|
"grad_norm": 5.268321514129639, |
|
"learning_rate": 6.5421848153025935e-06, |
|
"loss": 0.394, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.445606694560669, |
|
"grad_norm": 6.094832897186279, |
|
"learning_rate": 6.134298674628205e-06, |
|
"loss": 0.3551, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.707112970711297, |
|
"grad_norm": 4.6014723777771, |
|
"learning_rate": 5.718280353075316e-06, |
|
"loss": 0.3585, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.968619246861925, |
|
"grad_norm": 5.809424877166748, |
|
"learning_rate": 5.297112430378265e-06, |
|
"loss": 0.3608, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.5995907187461853, |
|
"eval_runtime": 104.4657, |
|
"eval_samples_per_second": 9.151, |
|
"eval_steps_per_second": 9.151, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 5.2301255230125525, |
|
"grad_norm": 5.566999435424805, |
|
"learning_rate": 4.873814405549272e-06, |
|
"loss": 0.3129, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.49163179916318, |
|
"grad_norm": 5.675551891326904, |
|
"learning_rate": 4.451421049042024e-06, |
|
"loss": 0.3044, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.7531380753138075, |
|
"grad_norm": 7.751038551330566, |
|
"learning_rate": 4.032960645429033e-06, |
|
"loss": 0.3276, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.6560445427894592, |
|
"eval_runtime": 109.4544, |
|
"eval_samples_per_second": 8.734, |
|
"eval_steps_per_second": 8.734, |
|
"step": 11472 |
|
}, |
|
{ |
|
"epoch": 6.014644351464435, |
|
"grad_norm": 8.045265197753906, |
|
"learning_rate": 3.6222474467023006e-06, |
|
"loss": 0.3008, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.2761506276150625, |
|
"grad_norm": 6.55618953704834, |
|
"learning_rate": 3.220580839399443e-06, |
|
"loss": 0.2541, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.53765690376569, |
|
"grad_norm": 6.615233421325684, |
|
"learning_rate": 2.8324347220558857e-06, |
|
"loss": 0.2642, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.799163179916318, |
|
"grad_norm": 4.452328681945801, |
|
"learning_rate": 2.4590370850090855e-06, |
|
"loss": 0.2655, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.687631368637085, |
|
"eval_runtime": 106.5678, |
|
"eval_samples_per_second": 8.971, |
|
"eval_steps_per_second": 8.971, |
|
"step": 13384 |
|
}, |
|
{ |
|
"epoch": 7.060669456066946, |
|
"grad_norm": 9.082950592041016, |
|
"learning_rate": 2.1038564929897326e-06, |
|
"loss": 0.2613, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.322175732217573, |
|
"grad_norm": 4.5683488845825195, |
|
"learning_rate": 1.7694393589342428e-06, |
|
"loss": 0.225, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.583682008368201, |
|
"grad_norm": 6.728757381439209, |
|
"learning_rate": 1.4581832353425335e-06, |
|
"loss": 0.2322, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.845188284518828, |
|
"grad_norm": 5.570503234863281, |
|
"learning_rate": 1.172864616852632e-06, |
|
"loss": 0.2316, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.7278637290000916, |
|
"eval_runtime": 103.7809, |
|
"eval_samples_per_second": 9.212, |
|
"eval_steps_per_second": 9.212, |
|
"step": 15296 |
|
}, |
|
{ |
|
"epoch": 8.106694560669457, |
|
"grad_norm": 8.535571098327637, |
|
"learning_rate": 9.143861751413785e-07, |
|
"loss": 0.2112, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 8.368200836820083, |
|
"grad_norm": 6.845192909240723, |
|
"learning_rate": 6.856269305226026e-07, |
|
"loss": 0.2052, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 8.629707112970712, |
|
"grad_norm": 8.833856582641602, |
|
"learning_rate": 4.873106159655255e-07, |
|
"loss": 0.2105, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.891213389121338, |
|
"grad_norm": 10.798727035522461, |
|
"learning_rate": 3.2134733781792273e-07, |
|
"loss": 0.2057, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.7465147376060486, |
|
"eval_runtime": 103.4261, |
|
"eval_samples_per_second": 9.243, |
|
"eval_steps_per_second": 9.243, |
|
"step": 17208 |
|
}, |
|
{ |
|
"epoch": 9.152719665271967, |
|
"grad_norm": 5.5902252197265625, |
|
"learning_rate": 1.8892694442157356e-07, |
|
"loss": 0.1954, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 9.414225941422593, |
|
"grad_norm": 5.076449871063232, |
|
"learning_rate": 9.099880353576951e-08, |
|
"loss": 0.1949, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 9.675732217573222, |
|
"grad_norm": 7.189806938171387, |
|
"learning_rate": 2.835499832552657e-08, |
|
"loss": 0.1941, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 9.93723849372385, |
|
"grad_norm": 5.884722709655762, |
|
"learning_rate": 1.1937170093084816e-09, |
|
"loss": 0.1904, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.7588898539543152, |
|
"eval_runtime": 103.3856, |
|
"eval_samples_per_second": 9.247, |
|
"eval_steps_per_second": 9.247, |
|
"step": 19120 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 19120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3672019664225894e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|