justinwangx's picture
Model save
07b3e60
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.121212121212125,
"eval_steps": 500,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 0.00029999537364671844,
"loss": 3.1318,
"step": 1
},
{
"epoch": 0.12,
"eval_loss": 2.835486888885498,
"eval_runtime": 2.1688,
"eval_samples_per_second": 107.896,
"eval_steps_per_second": 3.689,
"step": 1
},
{
"epoch": 1.12,
"eval_loss": 2.6363508701324463,
"eval_runtime": 1.538,
"eval_samples_per_second": 152.15,
"eval_steps_per_second": 5.202,
"step": 2
},
{
"epoch": 2.12,
"eval_loss": 2.4945499897003174,
"eval_runtime": 1.526,
"eval_samples_per_second": 153.347,
"eval_steps_per_second": 5.243,
"step": 3
},
{
"epoch": 3.12,
"eval_loss": 2.5338640213012695,
"eval_runtime": 1.5296,
"eval_samples_per_second": 152.982,
"eval_steps_per_second": 5.23,
"step": 4
},
{
"epoch": 4.12,
"learning_rate": 0.00029988435543610843,
"loss": 2.7386,
"step": 5
},
{
"epoch": 4.12,
"eval_loss": 2.3351666927337646,
"eval_runtime": 1.5386,
"eval_samples_per_second": 152.088,
"eval_steps_per_second": 5.2,
"step": 5
},
{
"epoch": 5.12,
"eval_loss": 2.2136902809143066,
"eval_runtime": 1.517,
"eval_samples_per_second": 154.247,
"eval_steps_per_second": 5.273,
"step": 6
},
{
"epoch": 6.12,
"eval_loss": 2.164069652557373,
"eval_runtime": 1.5148,
"eval_samples_per_second": 154.475,
"eval_steps_per_second": 5.281,
"step": 7
},
{
"epoch": 7.12,
"eval_loss": 2.105088710784912,
"eval_runtime": 1.5384,
"eval_samples_per_second": 152.111,
"eval_steps_per_second": 5.2,
"step": 8
},
{
"epoch": 8.12,
"eval_loss": 2.0841920375823975,
"eval_runtime": 1.6789,
"eval_samples_per_second": 139.374,
"eval_steps_per_second": 4.765,
"step": 9
},
{
"epoch": 9.12,
"learning_rate": 0.00029953760005996916,
"loss": 2.269,
"step": 10
},
{
"epoch": 9.12,
"eval_loss": 2.047882556915283,
"eval_runtime": 1.5168,
"eval_samples_per_second": 154.272,
"eval_steps_per_second": 5.274,
"step": 10
},
{
"epoch": 10.12,
"eval_loss": 1.9553548097610474,
"eval_runtime": 1.5263,
"eval_samples_per_second": 153.317,
"eval_steps_per_second": 5.242,
"step": 11
},
{
"epoch": 11.12,
"eval_loss": 1.8555233478546143,
"eval_runtime": 1.527,
"eval_samples_per_second": 153.245,
"eval_steps_per_second": 5.239,
"step": 12
},
{
"epoch": 12.12,
"eval_loss": 1.7735551595687866,
"eval_runtime": 1.5194,
"eval_samples_per_second": 154.009,
"eval_steps_per_second": 5.265,
"step": 13
},
{
"epoch": 13.12,
"eval_loss": 1.7906467914581299,
"eval_runtime": 1.5217,
"eval_samples_per_second": 153.779,
"eval_steps_per_second": 5.257,
"step": 14
},
{
"epoch": 14.12,
"learning_rate": 0.00029896026854323894,
"loss": 1.9451,
"step": 15
},
{
"epoch": 14.12,
"eval_loss": 1.7737478017807007,
"eval_runtime": 1.5139,
"eval_samples_per_second": 154.563,
"eval_steps_per_second": 5.284,
"step": 15
},
{
"epoch": 15.12,
"eval_loss": 1.6676586866378784,
"eval_runtime": 1.5263,
"eval_samples_per_second": 153.316,
"eval_steps_per_second": 5.242,
"step": 16
},
{
"epoch": 16.12,
"eval_loss": 1.6410826444625854,
"eval_runtime": 1.5192,
"eval_samples_per_second": 154.025,
"eval_steps_per_second": 5.266,
"step": 17
},
{
"epoch": 17.12,
"eval_loss": 1.5739473104476929,
"eval_runtime": 1.5309,
"eval_samples_per_second": 152.855,
"eval_steps_per_second": 5.226,
"step": 18
},
{
"epoch": 18.12,
"eval_loss": 1.5334192514419556,
"eval_runtime": 1.5271,
"eval_samples_per_second": 153.235,
"eval_steps_per_second": 5.239,
"step": 19
},
{
"epoch": 19.12,
"learning_rate": 0.00029815325108927063,
"loss": 1.6568,
"step": 20
},
{
"epoch": 19.12,
"eval_loss": 1.47941255569458,
"eval_runtime": 1.5335,
"eval_samples_per_second": 152.593,
"eval_steps_per_second": 5.217,
"step": 20
},
{
"epoch": 20.12,
"eval_loss": 1.4007827043533325,
"eval_runtime": 1.5222,
"eval_samples_per_second": 153.722,
"eval_steps_per_second": 5.255,
"step": 21
},
{
"epoch": 21.12,
"eval_loss": 1.3624812364578247,
"eval_runtime": 1.5197,
"eval_samples_per_second": 153.982,
"eval_steps_per_second": 5.264,
"step": 22
},
{
"epoch": 22.12,
"eval_loss": 1.2963740825653076,
"eval_runtime": 1.5258,
"eval_samples_per_second": 153.363,
"eval_steps_per_second": 5.243,
"step": 23
},
{
"epoch": 23.12,
"eval_loss": 1.2041164636611938,
"eval_runtime": 1.5251,
"eval_samples_per_second": 153.434,
"eval_steps_per_second": 5.246,
"step": 24
},
{
"epoch": 24.12,
"learning_rate": 0.00029711779206048454,
"loss": 1.3674,
"step": 25
},
{
"epoch": 24.12,
"eval_loss": 1.1971029043197632,
"eval_runtime": 1.535,
"eval_samples_per_second": 152.446,
"eval_steps_per_second": 5.212,
"step": 25
},
{
"epoch": 25.12,
"eval_loss": 1.1571109294891357,
"eval_runtime": 1.5213,
"eval_samples_per_second": 153.815,
"eval_steps_per_second": 5.259,
"step": 26
},
{
"epoch": 26.12,
"eval_loss": 1.1079976558685303,
"eval_runtime": 1.5286,
"eval_samples_per_second": 153.079,
"eval_steps_per_second": 5.233,
"step": 27
},
{
"epoch": 27.12,
"eval_loss": 1.109868049621582,
"eval_runtime": 1.5388,
"eval_samples_per_second": 152.068,
"eval_steps_per_second": 5.199,
"step": 28
},
{
"epoch": 28.12,
"eval_loss": 1.0929827690124512,
"eval_runtime": 1.5243,
"eval_samples_per_second": 153.513,
"eval_steps_per_second": 5.248,
"step": 29
},
{
"epoch": 29.12,
"learning_rate": 0.0002958554880596515,
"loss": 1.145,
"step": 30
},
{
"epoch": 29.12,
"eval_loss": 1.0333445072174072,
"eval_runtime": 1.528,
"eval_samples_per_second": 153.138,
"eval_steps_per_second": 5.235,
"step": 30
},
{
"epoch": 30.12,
"eval_loss": 1.009576678276062,
"eval_runtime": 1.5222,
"eval_samples_per_second": 153.722,
"eval_steps_per_second": 5.255,
"step": 31
},
{
"epoch": 31.12,
"eval_loss": 1.0011868476867676,
"eval_runtime": 1.5185,
"eval_samples_per_second": 154.104,
"eval_steps_per_second": 5.269,
"step": 32
},
{
"epoch": 32.12,
"eval_loss": 0.9265638589859009,
"eval_runtime": 1.5235,
"eval_samples_per_second": 153.589,
"eval_steps_per_second": 5.251,
"step": 33
},
{
"epoch": 33.12,
"eval_loss": 0.962448239326477,
"eval_runtime": 1.5219,
"eval_samples_per_second": 153.758,
"eval_steps_per_second": 5.257,
"step": 34
},
{
"epoch": 34.12,
"learning_rate": 0.000294368285468047,
"loss": 0.9987,
"step": 35
},
{
"epoch": 34.12,
"eval_loss": 0.9425073862075806,
"eval_runtime": 1.5206,
"eval_samples_per_second": 153.885,
"eval_steps_per_second": 5.261,
"step": 35
},
{
"epoch": 35.12,
"eval_loss": 0.9353674650192261,
"eval_runtime": 1.5211,
"eval_samples_per_second": 153.831,
"eval_steps_per_second": 5.259,
"step": 36
},
{
"epoch": 36.12,
"eval_loss": 0.9090538024902344,
"eval_runtime": 1.5239,
"eval_samples_per_second": 153.554,
"eval_steps_per_second": 5.25,
"step": 37
},
{
"epoch": 37.12,
"eval_loss": 0.9006912708282471,
"eval_runtime": 1.6666,
"eval_samples_per_second": 140.404,
"eval_steps_per_second": 4.8,
"step": 38
},
{
"epoch": 38.12,
"eval_loss": 0.9648869037628174,
"eval_runtime": 1.5236,
"eval_samples_per_second": 153.587,
"eval_steps_per_second": 5.251,
"step": 39
},
{
"epoch": 39.12,
"learning_rate": 0.00029265847744427303,
"loss": 0.9071,
"step": 40
},
{
"epoch": 39.12,
"eval_loss": 0.9199429154396057,
"eval_runtime": 1.526,
"eval_samples_per_second": 153.343,
"eval_steps_per_second": 5.242,
"step": 40
},
{
"epoch": 40.12,
"eval_loss": 0.8650604486465454,
"eval_runtime": 1.5281,
"eval_samples_per_second": 153.127,
"eval_steps_per_second": 5.235,
"step": 41
},
{
"epoch": 41.12,
"eval_loss": 0.8727077841758728,
"eval_runtime": 1.5186,
"eval_samples_per_second": 154.087,
"eval_steps_per_second": 5.268,
"step": 42
},
{
"epoch": 42.12,
"eval_loss": 0.8558970093727112,
"eval_runtime": 1.5297,
"eval_samples_per_second": 152.968,
"eval_steps_per_second": 5.23,
"step": 43
},
{
"epoch": 43.12,
"eval_loss": 0.8499311804771423,
"eval_runtime": 1.5225,
"eval_samples_per_second": 153.692,
"eval_steps_per_second": 5.254,
"step": 44
},
{
"epoch": 44.12,
"learning_rate": 0.0002907287003883726,
"loss": 0.8522,
"step": 45
},
{
"epoch": 44.12,
"eval_loss": 0.8547362089157104,
"eval_runtime": 1.5331,
"eval_samples_per_second": 152.637,
"eval_steps_per_second": 5.218,
"step": 45
},
{
"epoch": 45.12,
"eval_loss": 0.8880292177200317,
"eval_runtime": 1.5217,
"eval_samples_per_second": 153.771,
"eval_steps_per_second": 5.257,
"step": 46
},
{
"epoch": 46.12,
"eval_loss": 0.8677502870559692,
"eval_runtime": 1.5273,
"eval_samples_per_second": 153.214,
"eval_steps_per_second": 5.238,
"step": 47
},
{
"epoch": 47.12,
"eval_loss": 0.8565409183502197,
"eval_runtime": 1.5222,
"eval_samples_per_second": 153.723,
"eval_steps_per_second": 5.255,
"step": 48
},
{
"epoch": 48.12,
"eval_loss": 0.8197174072265625,
"eval_runtime": 1.5188,
"eval_samples_per_second": 154.07,
"eval_steps_per_second": 5.267,
"step": 49
},
{
"epoch": 49.12,
"learning_rate": 0.000288581929876693,
"loss": 0.8153,
"step": 50
},
{
"epoch": 49.12,
"eval_loss": 0.8439480662345886,
"eval_runtime": 1.5245,
"eval_samples_per_second": 153.497,
"eval_steps_per_second": 5.248,
"step": 50
},
{
"epoch": 49.12,
"step": 50,
"total_flos": 4527521789902848.0,
"train_loss": 1.4773838996887207,
"train_runtime": 2397.7837,
"train_samples_per_second": 43.728,
"train_steps_per_second": 0.167
}
],
"logging_steps": 5,
"max_steps": 400,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 4527521789902848.0,
"trial_name": null,
"trial_params": null
}