|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 4770, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9968553459119497, |
|
"grad_norm": 0.007859878242015839, |
|
"learning_rate": 0.0007934093547399718, |
|
"loss": 0.5931, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5877403020858765, |
|
"eval_runtime": 5.3692, |
|
"eval_samples_per_second": 577.366, |
|
"eval_steps_per_second": 12.106, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.9937106918238994, |
|
"grad_norm": 0.013005654327571392, |
|
"learning_rate": 0.0007676597858899992, |
|
"loss": 0.5933, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876849293708801, |
|
"eval_runtime": 5.3979, |
|
"eval_samples_per_second": 574.293, |
|
"eval_steps_per_second": 12.042, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.990566037735849, |
|
"grad_norm": 0.011686289682984352, |
|
"learning_rate": 0.0007259882616863973, |
|
"loss": 0.5936, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876944661140442, |
|
"eval_runtime": 5.3376, |
|
"eval_samples_per_second": 580.786, |
|
"eval_steps_per_second": 12.178, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 3.9874213836477987, |
|
"grad_norm": 0.010250881314277649, |
|
"learning_rate": 0.0006702046329072582, |
|
"loss": 0.5932, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876566171646118, |
|
"eval_runtime": 5.3623, |
|
"eval_samples_per_second": 578.11, |
|
"eval_steps_per_second": 12.122, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 4.984276729559748, |
|
"grad_norm": 0.013469184748828411, |
|
"learning_rate": 0.0006027316581600536, |
|
"loss": 0.594, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876731276512146, |
|
"eval_runtime": 5.3565, |
|
"eval_samples_per_second": 578.738, |
|
"eval_steps_per_second": 12.135, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.981132075471698, |
|
"grad_norm": 0.015176467597484589, |
|
"learning_rate": 0.0005264997801914848, |
|
"loss": 0.5936, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876610279083252, |
|
"eval_runtime": 5.3967, |
|
"eval_samples_per_second": 574.42, |
|
"eval_steps_per_second": 12.044, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 6.977987421383648, |
|
"grad_norm": 0.01225706934928894, |
|
"learning_rate": 0.0004448198527870465, |
|
"loss": 0.593, |
|
"step": 2219 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876200795173645, |
|
"eval_runtime": 5.386, |
|
"eval_samples_per_second": 575.564, |
|
"eval_steps_per_second": 12.068, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 7.9748427672955975, |
|
"grad_norm": 0.014281037263572216, |
|
"learning_rate": 0.00036123934590356535, |
|
"loss": 0.5938, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876643061637878, |
|
"eval_runtime": 5.34, |
|
"eval_samples_per_second": 580.525, |
|
"eval_steps_per_second": 12.172, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 8.971698113207546, |
|
"grad_norm": 0.020413335412740707, |
|
"learning_rate": 0.0002793882742407039, |
|
"loss": 0.5934, |
|
"step": 2853 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876378417015076, |
|
"eval_runtime": 5.3343, |
|
"eval_samples_per_second": 581.141, |
|
"eval_steps_per_second": 12.185, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 9.968553459119496, |
|
"grad_norm": 0.010601122863590717, |
|
"learning_rate": 0.00020282154078240177, |
|
"loss": 0.5935, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5876396298408508, |
|
"eval_runtime": 5.3546, |
|
"eval_samples_per_second": 578.942, |
|
"eval_steps_per_second": 12.139, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 10.965408805031446, |
|
"grad_norm": 0.015482204966247082, |
|
"learning_rate": 0.00013486454254193946, |
|
"loss": 0.5936, |
|
"step": 3487 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5875952839851379, |
|
"eval_runtime": 5.3966, |
|
"eval_samples_per_second": 574.436, |
|
"eval_steps_per_second": 12.045, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 11.962264150943396, |
|
"grad_norm": 0.007544202264398336, |
|
"learning_rate": 7.846874406237966e-05, |
|
"loss": 0.5932, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.587660014629364, |
|
"eval_runtime": 5.388, |
|
"eval_samples_per_second": 575.35, |
|
"eval_steps_per_second": 12.064, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 12.959119496855346, |
|
"grad_norm": 0.015037346631288528, |
|
"learning_rate": 3.608349131102299e-05, |
|
"loss": 0.594, |
|
"step": 4121 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5875839591026306, |
|
"eval_runtime": 5.3407, |
|
"eval_samples_per_second": 580.45, |
|
"eval_steps_per_second": 12.171, |
|
"step": 4134 |
|
}, |
|
{ |
|
"epoch": 13.955974842767295, |
|
"grad_norm": 0.013815987855196, |
|
"learning_rate": 9.549633264184268e-06, |
|
"loss": 0.593, |
|
"step": 4438 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.03225806451612903, |
|
"eval_loss": 0.5875993371009827, |
|
"eval_runtime": 5.3398, |
|
"eval_samples_per_second": 580.541, |
|
"eval_steps_per_second": 12.173, |
|
"step": 4452 |
|
}, |
|
{ |
|
"epoch": 14.952830188679245, |
|
"grad_norm": 0.010695732198655605, |
|
"learning_rate": 1.9571341049241364e-08, |
|
"loss": 0.5934, |
|
"step": 4755 |
|
} |
|
], |
|
"logging_steps": 317, |
|
"max_steps": 4770, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 1000000000.0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1236646073993904.0, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.5457565605433671, |
|
"learning_rate": 0.0008021186295599815, |
|
"lr_scheduler_type": "cosine", |
|
"num_train_epochs": 15, |
|
"temperature": 13.83793993486481, |
|
"weight_decay": 0.09136269626429569 |
|
} |
|
} |
|
|