amazon_vlm / trainer_state.json
smjain's picture
Upload model checkpoint
f4612a1
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.991097922848665,
"eval_steps": 500,
"global_step": 126,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11869436201780416,
"grad_norm": 0.6501234173774719,
"learning_rate": 0.0002,
"loss": 2.5793,
"step": 5
},
{
"epoch": 0.23738872403560832,
"grad_norm": 0.9226903915405273,
"learning_rate": 0.0002,
"loss": 2.2797,
"step": 10
},
{
"epoch": 0.3560830860534125,
"grad_norm": 1.3594255447387695,
"learning_rate": 0.0002,
"loss": 1.8775,
"step": 15
},
{
"epoch": 0.47477744807121663,
"grad_norm": 1.391525149345398,
"learning_rate": 0.0002,
"loss": 1.4896,
"step": 20
},
{
"epoch": 0.5934718100890207,
"grad_norm": 0.9402475357055664,
"learning_rate": 0.0002,
"loss": 1.2145,
"step": 25
},
{
"epoch": 0.712166172106825,
"grad_norm": 0.516862690448761,
"learning_rate": 0.0002,
"loss": 1.0708,
"step": 30
},
{
"epoch": 0.8308605341246291,
"grad_norm": 0.39975354075431824,
"learning_rate": 0.0002,
"loss": 0.9909,
"step": 35
},
{
"epoch": 0.9495548961424333,
"grad_norm": 0.4522175192832947,
"learning_rate": 0.0002,
"loss": 0.9651,
"step": 40
},
{
"epoch": 1.0682492581602374,
"grad_norm": 0.4957733452320099,
"learning_rate": 0.0002,
"loss": 0.9213,
"step": 45
},
{
"epoch": 1.1869436201780414,
"grad_norm": 0.45304545760154724,
"learning_rate": 0.0002,
"loss": 0.9047,
"step": 50
},
{
"epoch": 1.3056379821958457,
"grad_norm": 0.6747499108314514,
"learning_rate": 0.0002,
"loss": 0.8819,
"step": 55
},
{
"epoch": 1.4243323442136497,
"grad_norm": 0.7882275581359863,
"learning_rate": 0.0002,
"loss": 0.8359,
"step": 60
},
{
"epoch": 1.543026706231454,
"grad_norm": 0.42021647095680237,
"learning_rate": 0.0002,
"loss": 0.8254,
"step": 65
},
{
"epoch": 1.6617210682492582,
"grad_norm": 0.41371551156044006,
"learning_rate": 0.0002,
"loss": 0.7991,
"step": 70
},
{
"epoch": 1.7804154302670623,
"grad_norm": 0.45561087131500244,
"learning_rate": 0.0002,
"loss": 0.7887,
"step": 75
},
{
"epoch": 1.8991097922848663,
"grad_norm": 0.40611913800239563,
"learning_rate": 0.0002,
"loss": 0.7941,
"step": 80
},
{
"epoch": 2.0178041543026706,
"grad_norm": 0.5473902225494385,
"learning_rate": 0.0002,
"loss": 0.7779,
"step": 85
},
{
"epoch": 2.136498516320475,
"grad_norm": 0.4852384924888611,
"learning_rate": 0.0002,
"loss": 0.7517,
"step": 90
},
{
"epoch": 2.255192878338279,
"grad_norm": 0.4257807731628418,
"learning_rate": 0.0002,
"loss": 0.7545,
"step": 95
},
{
"epoch": 2.373887240356083,
"grad_norm": 0.4694693386554718,
"learning_rate": 0.0002,
"loss": 0.7389,
"step": 100
},
{
"epoch": 2.492581602373887,
"grad_norm": 0.46692556142807007,
"learning_rate": 0.0002,
"loss": 0.7348,
"step": 105
},
{
"epoch": 2.6112759643916914,
"grad_norm": 0.38663822412490845,
"learning_rate": 0.0002,
"loss": 0.7368,
"step": 110
},
{
"epoch": 2.7299703264094957,
"grad_norm": 0.4077226519584656,
"learning_rate": 0.0002,
"loss": 0.7335,
"step": 115
},
{
"epoch": 2.8486646884272995,
"grad_norm": 0.4740726351737976,
"learning_rate": 0.0002,
"loss": 0.7462,
"step": 120
},
{
"epoch": 2.9673590504451037,
"grad_norm": 0.40621665120124817,
"learning_rate": 0.0002,
"loss": 0.75,
"step": 125
}
],
"logging_steps": 5,
"max_steps": 126,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.310063237541069e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}