llama3-sudo-sanity / trainer_state.json
Qin Liu
Model save
c5196f3 verified
raw
history blame
6.82 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9696969696969697,
"eval_steps": 500,
"global_step": 147,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.020202020202020204,
"grad_norm": 1.08310938404347,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.5976,
"step": 1
},
{
"epoch": 0.10101010101010101,
"grad_norm": 1.2015655639458453,
"learning_rate": 6.666666666666667e-05,
"loss": 2.5737,
"step": 5
},
{
"epoch": 0.20202020202020202,
"grad_norm": 0.5529484610069302,
"learning_rate": 0.00013333333333333334,
"loss": 2.4242,
"step": 10
},
{
"epoch": 0.30303030303030304,
"grad_norm": 0.44235368767500005,
"learning_rate": 0.0002,
"loss": 2.2287,
"step": 15
},
{
"epoch": 0.40404040404040403,
"grad_norm": 0.34186780209717693,
"learning_rate": 0.00019929278846732884,
"loss": 2.1199,
"step": 20
},
{
"epoch": 0.5050505050505051,
"grad_norm": 0.2690393065112005,
"learning_rate": 0.00019718115683235417,
"loss": 1.963,
"step": 25
},
{
"epoch": 0.6060606060606061,
"grad_norm": 0.2895526654213361,
"learning_rate": 0.0001936949724999762,
"loss": 1.9819,
"step": 30
},
{
"epoch": 0.7070707070707071,
"grad_norm": 0.21284939456784369,
"learning_rate": 0.00018888354486549237,
"loss": 1.9367,
"step": 35
},
{
"epoch": 0.8080808080808081,
"grad_norm": 0.220661039014496,
"learning_rate": 0.00018281492787113708,
"loss": 1.9123,
"step": 40
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.25800435226254403,
"learning_rate": 0.00017557495743542585,
"loss": 1.8955,
"step": 45
},
{
"epoch": 0.98989898989899,
"eval_loss": 1.8462597131729126,
"eval_runtime": 164.2887,
"eval_samples_per_second": 38.475,
"eval_steps_per_second": 2.41,
"step": 49
},
{
"epoch": 1.0101010101010102,
"grad_norm": 0.21280470872825646,
"learning_rate": 0.00016726603737012529,
"loss": 1.9023,
"step": 50
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.2281964993767009,
"learning_rate": 0.00015800569095711982,
"loss": 1.8291,
"step": 55
},
{
"epoch": 1.2121212121212122,
"grad_norm": 0.2468620521645798,
"learning_rate": 0.0001479248986720057,
"loss": 1.8448,
"step": 60
},
{
"epoch": 1.3131313131313131,
"grad_norm": 0.24256005211247963,
"learning_rate": 0.00013716624556603274,
"loss": 1.8667,
"step": 65
},
{
"epoch": 1.4141414141414141,
"grad_norm": 0.22252607581763362,
"learning_rate": 0.00012588190451025207,
"loss": 1.8213,
"step": 70
},
{
"epoch": 1.5151515151515151,
"grad_norm": 0.2742840702900069,
"learning_rate": 0.00011423148382732853,
"loss": 1.8122,
"step": 75
},
{
"epoch": 1.6161616161616161,
"grad_norm": 0.25494953140215326,
"learning_rate": 0.00010237976975461075,
"loss": 1.8726,
"step": 80
},
{
"epoch": 1.7171717171717171,
"grad_norm": 0.2657201331656562,
"learning_rate": 9.049439566958175e-05,
"loss": 1.8425,
"step": 85
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.2700733549650734,
"learning_rate": 7.874347104470234e-05,
"loss": 1.8205,
"step": 90
},
{
"epoch": 1.9191919191919191,
"grad_norm": 0.28284460248100685,
"learning_rate": 6.729320366825784e-05,
"loss": 1.8698,
"step": 95
},
{
"epoch": 2.0,
"eval_loss": 1.7677603960037231,
"eval_runtime": 177.7383,
"eval_samples_per_second": 35.564,
"eval_steps_per_second": 2.228,
"step": 99
},
{
"epoch": 2.0202020202020203,
"grad_norm": 0.28052694585968674,
"learning_rate": 5.630554876306407e-05,
"loss": 1.8412,
"step": 100
},
{
"epoch": 2.121212121212121,
"grad_norm": 0.30253313940579424,
"learning_rate": 4.593591825444028e-05,
"loss": 1.7843,
"step": 105
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.2919901233198437,
"learning_rate": 3.6330982588091186e-05,
"loss": 1.8085,
"step": 110
},
{
"epoch": 2.323232323232323,
"grad_norm": 0.2985415087687047,
"learning_rate": 2.7626596189492983e-05,
"loss": 1.7548,
"step": 115
},
{
"epoch": 2.4242424242424243,
"grad_norm": 0.31809894547826195,
"learning_rate": 1.994587590756397e-05,
"loss": 1.758,
"step": 120
},
{
"epoch": 2.525252525252525,
"grad_norm": 0.31151542636398494,
"learning_rate": 1.339745962155613e-05,
"loss": 1.7844,
"step": 125
},
{
"epoch": 2.6262626262626263,
"grad_norm": 0.3034143797714973,
"learning_rate": 8.073969641833445e-06,
"loss": 1.7975,
"step": 130
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.28778283893396456,
"learning_rate": 4.050702638550275e-06,
"loss": 1.7661,
"step": 135
},
{
"epoch": 2.8282828282828283,
"grad_norm": 0.2907166952704868,
"learning_rate": 1.3845646281813507e-06,
"loss": 1.7619,
"step": 140
},
{
"epoch": 2.929292929292929,
"grad_norm": 0.31772634861484544,
"learning_rate": 1.1326608169920372e-07,
"loss": 1.8282,
"step": 145
},
{
"epoch": 2.9696969696969697,
"eval_loss": 1.7491472959518433,
"eval_runtime": 161.8737,
"eval_samples_per_second": 39.049,
"eval_steps_per_second": 2.446,
"step": 147
},
{
"epoch": 2.9696969696969697,
"step": 147,
"total_flos": 706816481427456.0,
"train_loss": 1.9096906704156578,
"train_runtime": 1619.2688,
"train_samples_per_second": 11.711,
"train_steps_per_second": 0.091
}
],
"logging_steps": 5,
"max_steps": 147,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 706816481427456.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}