|
{
|
|
"best_metric": 0.9240393208221627,
|
|
"best_model_checkpoint": "pokemon_models\\checkpoint-1750",
|
|
"epoch": 28.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1960,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 5e-06,
|
|
"loss": 5.0145,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 1e-05,
|
|
"loss": 5.0039,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.5e-05,
|
|
"loss": 4.9942,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_accuracy": 0.01876675603217158,
|
|
"eval_loss": 4.973499298095703,
|
|
"eval_runtime": 102.0829,
|
|
"eval_samples_per_second": 10.962,
|
|
"eval_steps_per_second": 0.686,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 2e-05,
|
|
"loss": 4.97,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 2.5e-05,
|
|
"loss": 4.9313,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 3e-05,
|
|
"loss": 4.893,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 3.5e-05,
|
|
"loss": 4.8374,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_accuracy": 0.20196604110813227,
|
|
"eval_loss": 4.816006660461426,
|
|
"eval_runtime": 124.2897,
|
|
"eval_samples_per_second": 9.003,
|
|
"eval_steps_per_second": 0.563,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 4e-05,
|
|
"loss": 4.7329,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 4.5e-05,
|
|
"loss": 4.6472,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 5e-05,
|
|
"loss": 4.541,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_accuracy": 0.5495978552278821,
|
|
"eval_loss": 4.4448018074035645,
|
|
"eval_runtime": 101.0357,
|
|
"eval_samples_per_second": 11.075,
|
|
"eval_steps_per_second": 0.693,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"learning_rate": 4.9444444444444446e-05,
|
|
"loss": 4.4117,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"learning_rate": 4.888888888888889e-05,
|
|
"loss": 4.2454,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"learning_rate": 4.8333333333333334e-05,
|
|
"loss": 4.1227,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"learning_rate": 4.7777777777777784e-05,
|
|
"loss": 4.0198,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.7042001787310098,
|
|
"eval_loss": 4.0061211585998535,
|
|
"eval_runtime": 100.1956,
|
|
"eval_samples_per_second": 11.168,
|
|
"eval_steps_per_second": 0.699,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 4.722222222222222e-05,
|
|
"loss": 3.84,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"learning_rate": 4.666666666666667e-05,
|
|
"loss": 3.757,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"learning_rate": 4.6111111111111115e-05,
|
|
"loss": 3.6626,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_accuracy": 0.7605004468275246,
|
|
"eval_loss": 3.630556106567383,
|
|
"eval_runtime": 100.0509,
|
|
"eval_samples_per_second": 11.184,
|
|
"eval_steps_per_second": 0.7,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 5.14,
|
|
"learning_rate": 4.555555555555556e-05,
|
|
"loss": 3.5477,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 5.43,
|
|
"learning_rate": 4.5e-05,
|
|
"loss": 3.3914,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 5.71,
|
|
"learning_rate": 4.4444444444444447e-05,
|
|
"loss": 3.3164,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"learning_rate": 4.388888888888889e-05,
|
|
"loss": 3.2654,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_accuracy": 0.7971403038427167,
|
|
"eval_loss": 3.3061511516571045,
|
|
"eval_runtime": 99.8013,
|
|
"eval_samples_per_second": 11.212,
|
|
"eval_steps_per_second": 0.701,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 6.29,
|
|
"learning_rate": 4.3333333333333334e-05,
|
|
"loss": 3.1041,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 6.57,
|
|
"learning_rate": 4.277777777777778e-05,
|
|
"loss": 3.0193,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 6.86,
|
|
"learning_rate": 4.222222222222222e-05,
|
|
"loss": 2.9314,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_accuracy": 0.8310991957104558,
|
|
"eval_loss": 2.994609832763672,
|
|
"eval_runtime": 106.5638,
|
|
"eval_samples_per_second": 10.501,
|
|
"eval_steps_per_second": 0.657,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 7.14,
|
|
"learning_rate": 4.166666666666667e-05,
|
|
"loss": 2.871,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 7.43,
|
|
"learning_rate": 4.111111111111111e-05,
|
|
"loss": 2.7418,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 7.71,
|
|
"learning_rate": 4.055555555555556e-05,
|
|
"loss": 2.6542,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"learning_rate": 4e-05,
|
|
"loss": 2.5893,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.8507596067917784,
|
|
"eval_loss": 2.7318336963653564,
|
|
"eval_runtime": 125.4233,
|
|
"eval_samples_per_second": 8.922,
|
|
"eval_steps_per_second": 0.558,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 8.29,
|
|
"learning_rate": 3.944444444444445e-05,
|
|
"loss": 2.5106,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 8.57,
|
|
"learning_rate": 3.888888888888889e-05,
|
|
"loss": 2.4358,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 8.86,
|
|
"learning_rate": 3.8333333333333334e-05,
|
|
"loss": 2.3645,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_accuracy": 0.8579088471849866,
|
|
"eval_loss": 2.4826338291168213,
|
|
"eval_runtime": 121.4568,
|
|
"eval_samples_per_second": 9.213,
|
|
"eval_steps_per_second": 0.576,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 9.14,
|
|
"learning_rate": 3.777777777777778e-05,
|
|
"loss": 2.2831,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 9.43,
|
|
"learning_rate": 3.722222222222222e-05,
|
|
"loss": 2.2297,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 9.71,
|
|
"learning_rate": 3.6666666666666666e-05,
|
|
"loss": 2.1367,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"learning_rate": 3.611111111111111e-05,
|
|
"loss": 2.0793,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"eval_accuracy": 0.871313672922252,
|
|
"eval_loss": 2.245124578475952,
|
|
"eval_runtime": 122.6079,
|
|
"eval_samples_per_second": 9.127,
|
|
"eval_steps_per_second": 0.571,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 10.29,
|
|
"learning_rate": 3.555555555555556e-05,
|
|
"loss": 1.9796,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 10.57,
|
|
"learning_rate": 3.5e-05,
|
|
"loss": 1.9471,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 10.86,
|
|
"learning_rate": 3.444444444444445e-05,
|
|
"loss": 1.8754,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"eval_accuracy": 0.871313672922252,
|
|
"eval_loss": 2.060222625732422,
|
|
"eval_runtime": 122.2722,
|
|
"eval_samples_per_second": 9.152,
|
|
"eval_steps_per_second": 0.572,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 11.14,
|
|
"learning_rate": 3.388888888888889e-05,
|
|
"loss": 1.8259,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 11.43,
|
|
"learning_rate": 3.3333333333333335e-05,
|
|
"loss": 1.7872,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 11.71,
|
|
"learning_rate": 3.277777777777778e-05,
|
|
"loss": 1.6884,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"learning_rate": 3.222222222222223e-05,
|
|
"loss": 1.6703,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.8811438784629133,
|
|
"eval_loss": 1.872039556503296,
|
|
"eval_runtime": 98.0421,
|
|
"eval_samples_per_second": 11.413,
|
|
"eval_steps_per_second": 0.714,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 12.29,
|
|
"learning_rate": 3.1666666666666666e-05,
|
|
"loss": 1.6003,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 12.57,
|
|
"learning_rate": 3.111111111111111e-05,
|
|
"loss": 1.5433,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 12.86,
|
|
"learning_rate": 3.055555555555556e-05,
|
|
"loss": 1.5198,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"eval_accuracy": 0.8900804289544236,
|
|
"eval_loss": 1.7361352443695068,
|
|
"eval_runtime": 97.0673,
|
|
"eval_samples_per_second": 11.528,
|
|
"eval_steps_per_second": 0.721,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 13.14,
|
|
"learning_rate": 3e-05,
|
|
"loss": 1.4742,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 13.43,
|
|
"learning_rate": 2.9444444444444448e-05,
|
|
"loss": 1.3876,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 13.71,
|
|
"learning_rate": 2.8888888888888888e-05,
|
|
"loss": 1.3603,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"learning_rate": 2.8333333333333335e-05,
|
|
"loss": 1.329,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"eval_accuracy": 0.900804289544236,
|
|
"eval_loss": 1.563855528831482,
|
|
"eval_runtime": 97.4399,
|
|
"eval_samples_per_second": 11.484,
|
|
"eval_steps_per_second": 0.718,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 14.29,
|
|
"learning_rate": 2.777777777777778e-05,
|
|
"loss": 1.2523,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 14.57,
|
|
"learning_rate": 2.7222222222222223e-05,
|
|
"loss": 1.2747,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 14.86,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 1.203,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"eval_accuracy": 0.8927613941018767,
|
|
"eval_loss": 1.4685680866241455,
|
|
"eval_runtime": 96.9819,
|
|
"eval_samples_per_second": 11.538,
|
|
"eval_steps_per_second": 0.722,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 15.14,
|
|
"learning_rate": 2.6111111111111114e-05,
|
|
"loss": 1.1697,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 15.43,
|
|
"learning_rate": 2.5555555555555554e-05,
|
|
"loss": 1.0943,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 15.71,
|
|
"learning_rate": 2.5e-05,
|
|
"loss": 1.0947,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"learning_rate": 2.4444444444444445e-05,
|
|
"loss": 1.104,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.8981233243967829,
|
|
"eval_loss": 1.3596620559692383,
|
|
"eval_runtime": 97.1177,
|
|
"eval_samples_per_second": 11.522,
|
|
"eval_steps_per_second": 0.721,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 16.29,
|
|
"learning_rate": 2.3888888888888892e-05,
|
|
"loss": 1.0113,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 16.57,
|
|
"learning_rate": 2.3333333333333336e-05,
|
|
"loss": 1.0285,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 16.86,
|
|
"learning_rate": 2.277777777777778e-05,
|
|
"loss": 0.9682,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"eval_accuracy": 0.8990169794459338,
|
|
"eval_loss": 1.2199994325637817,
|
|
"eval_runtime": 486.7671,
|
|
"eval_samples_per_second": 2.299,
|
|
"eval_steps_per_second": 0.144,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 17.14,
|
|
"learning_rate": 2.2222222222222223e-05,
|
|
"loss": 0.9578,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 17.43,
|
|
"learning_rate": 2.1666666666666667e-05,
|
|
"loss": 0.9403,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 17.71,
|
|
"learning_rate": 2.111111111111111e-05,
|
|
"loss": 0.8924,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"learning_rate": 2.0555555555555555e-05,
|
|
"loss": 0.872,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"eval_accuracy": 0.903485254691689,
|
|
"eval_loss": 1.1389293670654297,
|
|
"eval_runtime": 110.8112,
|
|
"eval_samples_per_second": 10.098,
|
|
"eval_steps_per_second": 0.632,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 18.29,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.8312,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 18.57,
|
|
"learning_rate": 1.9444444444444445e-05,
|
|
"loss": 0.8201,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 18.86,
|
|
"learning_rate": 1.888888888888889e-05,
|
|
"loss": 0.844,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"eval_accuracy": 0.9124218051831993,
|
|
"eval_loss": 1.0643764734268188,
|
|
"eval_runtime": 109.2391,
|
|
"eval_samples_per_second": 10.244,
|
|
"eval_steps_per_second": 0.641,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 19.14,
|
|
"learning_rate": 1.8333333333333333e-05,
|
|
"loss": 0.8116,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 19.43,
|
|
"learning_rate": 1.777777777777778e-05,
|
|
"loss": 0.7649,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 19.71,
|
|
"learning_rate": 1.7222222222222224e-05,
|
|
"loss": 0.7402,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"learning_rate": 1.6666666666666667e-05,
|
|
"loss": 0.7605,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.9088471849865952,
|
|
"eval_loss": 1.0364218950271606,
|
|
"eval_runtime": 108.8495,
|
|
"eval_samples_per_second": 10.28,
|
|
"eval_steps_per_second": 0.643,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 20.29,
|
|
"learning_rate": 1.6111111111111115e-05,
|
|
"loss": 0.7156,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 20.57,
|
|
"learning_rate": 1.5555555555555555e-05,
|
|
"loss": 0.7109,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 20.86,
|
|
"learning_rate": 1.5e-05,
|
|
"loss": 0.7244,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"eval_accuracy": 0.902591599642538,
|
|
"eval_loss": 0.9655722379684448,
|
|
"eval_runtime": 106.989,
|
|
"eval_samples_per_second": 10.459,
|
|
"eval_steps_per_second": 0.654,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 21.14,
|
|
"learning_rate": 1.4444444444444444e-05,
|
|
"loss": 0.6925,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 21.43,
|
|
"learning_rate": 1.388888888888889e-05,
|
|
"loss": 0.6687,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 21.71,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.658,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"learning_rate": 1.2777777777777777e-05,
|
|
"loss": 0.6595,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"eval_accuracy": 0.9133154602323503,
|
|
"eval_loss": 0.9125866889953613,
|
|
"eval_runtime": 106.9609,
|
|
"eval_samples_per_second": 10.462,
|
|
"eval_steps_per_second": 0.654,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 22.29,
|
|
"learning_rate": 1.2222222222222222e-05,
|
|
"loss": 0.6489,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 22.57,
|
|
"learning_rate": 1.1666666666666668e-05,
|
|
"loss": 0.6666,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 22.86,
|
|
"learning_rate": 1.1111111111111112e-05,
|
|
"loss": 0.6188,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 23.0,
|
|
"eval_accuracy": 0.9142091152815014,
|
|
"eval_loss": 0.8716733455657959,
|
|
"eval_runtime": 107.8489,
|
|
"eval_samples_per_second": 10.376,
|
|
"eval_steps_per_second": 0.649,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 23.14,
|
|
"learning_rate": 1.0555555555555555e-05,
|
|
"loss": 0.5869,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 23.43,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5809,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 23.71,
|
|
"learning_rate": 9.444444444444445e-06,
|
|
"loss": 0.5972,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"learning_rate": 8.88888888888889e-06,
|
|
"loss": 0.5726,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.9186773905272565,
|
|
"eval_loss": 0.8351905345916748,
|
|
"eval_runtime": 109.1081,
|
|
"eval_samples_per_second": 10.256,
|
|
"eval_steps_per_second": 0.642,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 24.29,
|
|
"learning_rate": 8.333333333333334e-06,
|
|
"loss": 0.5562,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 24.57,
|
|
"learning_rate": 7.777777777777777e-06,
|
|
"loss": 0.5687,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 24.86,
|
|
"learning_rate": 7.222222222222222e-06,
|
|
"loss": 0.5418,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"eval_accuracy": 0.9240393208221627,
|
|
"eval_loss": 0.8053392767906189,
|
|
"eval_runtime": 111.5634,
|
|
"eval_samples_per_second": 10.03,
|
|
"eval_steps_per_second": 0.627,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 25.14,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 0.5496,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 25.43,
|
|
"learning_rate": 6.111111111111111e-06,
|
|
"loss": 0.5354,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 25.71,
|
|
"learning_rate": 5.555555555555556e-06,
|
|
"loss": 0.5564,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.579,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"eval_accuracy": 0.9151027703306523,
|
|
"eval_loss": 0.8136078119277954,
|
|
"eval_runtime": 105.9237,
|
|
"eval_samples_per_second": 10.564,
|
|
"eval_steps_per_second": 0.661,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 26.29,
|
|
"learning_rate": 4.444444444444445e-06,
|
|
"loss": 0.5322,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 26.57,
|
|
"learning_rate": 3.888888888888889e-06,
|
|
"loss": 0.5529,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 26.86,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.5077,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"eval_accuracy": 0.9168900804289544,
|
|
"eval_loss": 0.7921976447105408,
|
|
"eval_runtime": 105.3647,
|
|
"eval_samples_per_second": 10.62,
|
|
"eval_steps_per_second": 0.664,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 27.14,
|
|
"learning_rate": 2.777777777777778e-06,
|
|
"loss": 0.5443,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 27.43,
|
|
"learning_rate": 2.2222222222222225e-06,
|
|
"loss": 0.5181,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 27.71,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 0.5318,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"learning_rate": 1.1111111111111112e-06,
|
|
"loss": 0.5138,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_accuracy": 0.9133154602323503,
|
|
"eval_loss": 0.7894989252090454,
|
|
"eval_runtime": 107.1689,
|
|
"eval_samples_per_second": 10.441,
|
|
"eval_steps_per_second": 0.653,
|
|
"step": 1960
|
|
}
|
|
],
|
|
"logging_steps": 20,
|
|
"max_steps": 2000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 29,
|
|
"save_steps": 500,
|
|
"total_flos": 9.718280747932926e+18,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|