|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.121212121212125, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029999537364671844, |
|
"loss": 3.1318, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.835486888885498, |
|
"eval_runtime": 2.1688, |
|
"eval_samples_per_second": 107.896, |
|
"eval_steps_per_second": 3.689, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 2.6363508701324463, |
|
"eval_runtime": 1.538, |
|
"eval_samples_per_second": 152.15, |
|
"eval_steps_per_second": 5.202, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 2.4945499897003174, |
|
"eval_runtime": 1.526, |
|
"eval_samples_per_second": 153.347, |
|
"eval_steps_per_second": 5.243, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_loss": 2.5338640213012695, |
|
"eval_runtime": 1.5296, |
|
"eval_samples_per_second": 152.982, |
|
"eval_steps_per_second": 5.23, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00029988435543610843, |
|
"loss": 2.7386, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_loss": 2.3351666927337646, |
|
"eval_runtime": 1.5386, |
|
"eval_samples_per_second": 152.088, |
|
"eval_steps_per_second": 5.2, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 2.2136902809143066, |
|
"eval_runtime": 1.517, |
|
"eval_samples_per_second": 154.247, |
|
"eval_steps_per_second": 5.273, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_loss": 2.164069652557373, |
|
"eval_runtime": 1.5148, |
|
"eval_samples_per_second": 154.475, |
|
"eval_steps_per_second": 5.281, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_loss": 2.105088710784912, |
|
"eval_runtime": 1.5384, |
|
"eval_samples_per_second": 152.111, |
|
"eval_steps_per_second": 5.2, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_loss": 2.0841920375823975, |
|
"eval_runtime": 1.6789, |
|
"eval_samples_per_second": 139.374, |
|
"eval_steps_per_second": 4.765, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.00029953760005996916, |
|
"loss": 2.269, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"eval_loss": 2.047882556915283, |
|
"eval_runtime": 1.5168, |
|
"eval_samples_per_second": 154.272, |
|
"eval_steps_per_second": 5.274, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"eval_loss": 1.9553548097610474, |
|
"eval_runtime": 1.5263, |
|
"eval_samples_per_second": 153.317, |
|
"eval_steps_per_second": 5.242, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"eval_loss": 1.8555233478546143, |
|
"eval_runtime": 1.527, |
|
"eval_samples_per_second": 153.245, |
|
"eval_steps_per_second": 5.239, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"eval_loss": 1.7735551595687866, |
|
"eval_runtime": 1.5194, |
|
"eval_samples_per_second": 154.009, |
|
"eval_steps_per_second": 5.265, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"eval_loss": 1.7906467914581299, |
|
"eval_runtime": 1.5217, |
|
"eval_samples_per_second": 153.779, |
|
"eval_steps_per_second": 5.257, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 0.00029896026854323894, |
|
"loss": 1.9451, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"eval_loss": 1.7737478017807007, |
|
"eval_runtime": 1.5139, |
|
"eval_samples_per_second": 154.563, |
|
"eval_steps_per_second": 5.284, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"eval_loss": 1.6676586866378784, |
|
"eval_runtime": 1.5263, |
|
"eval_samples_per_second": 153.316, |
|
"eval_steps_per_second": 5.242, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"eval_loss": 1.6410826444625854, |
|
"eval_runtime": 1.5192, |
|
"eval_samples_per_second": 154.025, |
|
"eval_steps_per_second": 5.266, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"eval_loss": 1.5739473104476929, |
|
"eval_runtime": 1.5309, |
|
"eval_samples_per_second": 152.855, |
|
"eval_steps_per_second": 5.226, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"eval_loss": 1.5334192514419556, |
|
"eval_runtime": 1.5271, |
|
"eval_samples_per_second": 153.235, |
|
"eval_steps_per_second": 5.239, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 0.00029815325108927063, |
|
"loss": 1.6568, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"eval_loss": 1.47941255569458, |
|
"eval_runtime": 1.5335, |
|
"eval_samples_per_second": 152.593, |
|
"eval_steps_per_second": 5.217, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"eval_loss": 1.4007827043533325, |
|
"eval_runtime": 1.5222, |
|
"eval_samples_per_second": 153.722, |
|
"eval_steps_per_second": 5.255, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"eval_loss": 1.3624812364578247, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 153.982, |
|
"eval_steps_per_second": 5.264, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"eval_loss": 1.2963740825653076, |
|
"eval_runtime": 1.5258, |
|
"eval_samples_per_second": 153.363, |
|
"eval_steps_per_second": 5.243, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 23.12, |
|
"eval_loss": 1.2041164636611938, |
|
"eval_runtime": 1.5251, |
|
"eval_samples_per_second": 153.434, |
|
"eval_steps_per_second": 5.246, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"learning_rate": 0.00029711779206048454, |
|
"loss": 1.3674, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"eval_loss": 1.1971029043197632, |
|
"eval_runtime": 1.535, |
|
"eval_samples_per_second": 152.446, |
|
"eval_steps_per_second": 5.212, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 25.12, |
|
"eval_loss": 1.1571109294891357, |
|
"eval_runtime": 1.5213, |
|
"eval_samples_per_second": 153.815, |
|
"eval_steps_per_second": 5.259, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"eval_loss": 1.1079976558685303, |
|
"eval_runtime": 1.5286, |
|
"eval_samples_per_second": 153.079, |
|
"eval_steps_per_second": 5.233, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"eval_loss": 1.109868049621582, |
|
"eval_runtime": 1.5388, |
|
"eval_samples_per_second": 152.068, |
|
"eval_steps_per_second": 5.199, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"eval_loss": 1.0929827690124512, |
|
"eval_runtime": 1.5243, |
|
"eval_samples_per_second": 153.513, |
|
"eval_steps_per_second": 5.248, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 0.0002958554880596515, |
|
"loss": 1.145, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"eval_loss": 1.0333445072174072, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 153.138, |
|
"eval_steps_per_second": 5.235, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 30.12, |
|
"eval_loss": 1.009576678276062, |
|
"eval_runtime": 1.5222, |
|
"eval_samples_per_second": 153.722, |
|
"eval_steps_per_second": 5.255, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 31.12, |
|
"eval_loss": 1.0011868476867676, |
|
"eval_runtime": 1.5185, |
|
"eval_samples_per_second": 154.104, |
|
"eval_steps_per_second": 5.269, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 32.12, |
|
"eval_loss": 0.9265638589859009, |
|
"eval_runtime": 1.5235, |
|
"eval_samples_per_second": 153.589, |
|
"eval_steps_per_second": 5.251, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 33.12, |
|
"eval_loss": 0.962448239326477, |
|
"eval_runtime": 1.5219, |
|
"eval_samples_per_second": 153.758, |
|
"eval_steps_per_second": 5.257, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 34.12, |
|
"learning_rate": 0.000294368285468047, |
|
"loss": 0.9987, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 34.12, |
|
"eval_loss": 0.9425073862075806, |
|
"eval_runtime": 1.5206, |
|
"eval_samples_per_second": 153.885, |
|
"eval_steps_per_second": 5.261, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 35.12, |
|
"eval_loss": 0.9353674650192261, |
|
"eval_runtime": 1.5211, |
|
"eval_samples_per_second": 153.831, |
|
"eval_steps_per_second": 5.259, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 36.12, |
|
"eval_loss": 0.9090538024902344, |
|
"eval_runtime": 1.5239, |
|
"eval_samples_per_second": 153.554, |
|
"eval_steps_per_second": 5.25, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 37.12, |
|
"eval_loss": 0.9006912708282471, |
|
"eval_runtime": 1.6666, |
|
"eval_samples_per_second": 140.404, |
|
"eval_steps_per_second": 4.8, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 38.12, |
|
"eval_loss": 0.9648869037628174, |
|
"eval_runtime": 1.5236, |
|
"eval_samples_per_second": 153.587, |
|
"eval_steps_per_second": 5.251, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 39.12, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.9071, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 39.12, |
|
"eval_loss": 0.9199429154396057, |
|
"eval_runtime": 1.526, |
|
"eval_samples_per_second": 153.343, |
|
"eval_steps_per_second": 5.242, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 40.12, |
|
"eval_loss": 0.8650604486465454, |
|
"eval_runtime": 1.5281, |
|
"eval_samples_per_second": 153.127, |
|
"eval_steps_per_second": 5.235, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 41.12, |
|
"eval_loss": 0.8727077841758728, |
|
"eval_runtime": 1.5186, |
|
"eval_samples_per_second": 154.087, |
|
"eval_steps_per_second": 5.268, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 42.12, |
|
"eval_loss": 0.8558970093727112, |
|
"eval_runtime": 1.5297, |
|
"eval_samples_per_second": 152.968, |
|
"eval_steps_per_second": 5.23, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 43.12, |
|
"eval_loss": 0.8499311804771423, |
|
"eval_runtime": 1.5225, |
|
"eval_samples_per_second": 153.692, |
|
"eval_steps_per_second": 5.254, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 44.12, |
|
"learning_rate": 0.0002907287003883726, |
|
"loss": 0.8522, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 44.12, |
|
"eval_loss": 0.8547362089157104, |
|
"eval_runtime": 1.5331, |
|
"eval_samples_per_second": 152.637, |
|
"eval_steps_per_second": 5.218, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 45.12, |
|
"eval_loss": 0.8880292177200317, |
|
"eval_runtime": 1.5217, |
|
"eval_samples_per_second": 153.771, |
|
"eval_steps_per_second": 5.257, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 46.12, |
|
"eval_loss": 0.8677502870559692, |
|
"eval_runtime": 1.5273, |
|
"eval_samples_per_second": 153.214, |
|
"eval_steps_per_second": 5.238, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 47.12, |
|
"eval_loss": 0.8565409183502197, |
|
"eval_runtime": 1.5222, |
|
"eval_samples_per_second": 153.723, |
|
"eval_steps_per_second": 5.255, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 48.12, |
|
"eval_loss": 0.8197174072265625, |
|
"eval_runtime": 1.5188, |
|
"eval_samples_per_second": 154.07, |
|
"eval_steps_per_second": 5.267, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"learning_rate": 0.000288581929876693, |
|
"loss": 0.8153, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"eval_loss": 0.8439480662345886, |
|
"eval_runtime": 1.5245, |
|
"eval_samples_per_second": 153.497, |
|
"eval_steps_per_second": 5.248, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"step": 50, |
|
"total_flos": 4527521789902848.0, |
|
"train_loss": 1.4773838996887207, |
|
"train_runtime": 2397.7837, |
|
"train_samples_per_second": 43.728, |
|
"train_steps_per_second": 0.167 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 400, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 4527521789902848.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|