output_qwen / trainer_state.json
anhng94's picture
Training in progress, step 10
2354f15 verified
raw
history blame
5.25 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9953917050691244,
"eval_steps": 500,
"global_step": 27,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03686635944700461,
"grad_norm": 0.2873728095047169,
"learning_rate": 0.0,
"loss": 1.4734,
"step": 1
},
{
"epoch": 0.07373271889400922,
"grad_norm": 0.27419596643661837,
"learning_rate": 0.0003,
"loss": 1.279,
"step": 2
},
{
"epoch": 0.11059907834101383,
"grad_norm": 0.3351799319426796,
"learning_rate": 0.0003,
"loss": 1.4582,
"step": 3
},
{
"epoch": 0.14746543778801843,
"grad_norm": 0.2789893810816952,
"learning_rate": 0.0003,
"loss": 1.1216,
"step": 4
},
{
"epoch": 0.18433179723502305,
"grad_norm": 0.14190144952821382,
"learning_rate": 0.0003,
"loss": 1.0121,
"step": 5
},
{
"epoch": 0.22119815668202766,
"grad_norm": 0.1542021738719797,
"learning_rate": 0.0003,
"loss": 1.0723,
"step": 6
},
{
"epoch": 0.25806451612903225,
"grad_norm": 0.17476255091645262,
"learning_rate": 0.0003,
"loss": 0.8962,
"step": 7
},
{
"epoch": 0.29493087557603687,
"grad_norm": 0.30183805297227384,
"learning_rate": 0.0003,
"loss": 0.9577,
"step": 8
},
{
"epoch": 0.3317972350230415,
"grad_norm": 0.2945209326852545,
"learning_rate": 0.0003,
"loss": 0.9143,
"step": 9
},
{
"epoch": 0.3686635944700461,
"grad_norm": 0.21480966699076806,
"learning_rate": 0.0003,
"loss": 0.7952,
"step": 10
},
{
"epoch": 0.4055299539170507,
"grad_norm": 0.18078986894945484,
"learning_rate": 0.0003,
"loss": 0.8234,
"step": 11
},
{
"epoch": 0.4423963133640553,
"grad_norm": 0.15453708977718567,
"learning_rate": 0.0003,
"loss": 0.7589,
"step": 12
},
{
"epoch": 0.4792626728110599,
"grad_norm": 0.1631172234239537,
"learning_rate": 0.0003,
"loss": 0.7419,
"step": 13
},
{
"epoch": 0.5161290322580645,
"grad_norm": 0.09781085387100458,
"learning_rate": 0.0003,
"loss": 0.7239,
"step": 14
},
{
"epoch": 0.5529953917050692,
"grad_norm": 0.09897379010199117,
"learning_rate": 0.0003,
"loss": 0.7673,
"step": 15
},
{
"epoch": 0.5898617511520737,
"grad_norm": 0.11558640849854486,
"learning_rate": 0.0003,
"loss": 0.7533,
"step": 16
},
{
"epoch": 0.6267281105990783,
"grad_norm": 0.11345769354838794,
"learning_rate": 0.0003,
"loss": 0.7581,
"step": 17
},
{
"epoch": 0.663594470046083,
"grad_norm": 0.1013501193678853,
"learning_rate": 0.0003,
"loss": 0.7224,
"step": 18
},
{
"epoch": 0.7004608294930875,
"grad_norm": 0.09930580785134363,
"learning_rate": 0.0003,
"loss": 0.7597,
"step": 19
},
{
"epoch": 0.7373271889400922,
"grad_norm": 0.10206714996240562,
"learning_rate": 0.0003,
"loss": 0.704,
"step": 20
},
{
"epoch": 0.7741935483870968,
"grad_norm": 0.10775281367207125,
"learning_rate": 0.0003,
"loss": 0.6639,
"step": 21
},
{
"epoch": 0.8110599078341014,
"grad_norm": 0.12015377273414085,
"learning_rate": 0.0003,
"loss": 0.7494,
"step": 22
},
{
"epoch": 0.847926267281106,
"grad_norm": 0.08770642908913276,
"learning_rate": 0.0003,
"loss": 0.7115,
"step": 23
},
{
"epoch": 0.8847926267281107,
"grad_norm": 0.135245894998221,
"learning_rate": 0.0003,
"loss": 0.7169,
"step": 24
},
{
"epoch": 0.9216589861751152,
"grad_norm": 0.0993611544536447,
"learning_rate": 0.0003,
"loss": 0.6667,
"step": 25
},
{
"epoch": 0.9585253456221198,
"grad_norm": 0.09795283307056235,
"learning_rate": 0.0003,
"loss": 0.6859,
"step": 26
},
{
"epoch": 0.9953917050691244,
"grad_norm": 0.10408097730031732,
"learning_rate": 0.0003,
"loss": 0.6844,
"step": 27
},
{
"epoch": 0.9953917050691244,
"step": 27,
"total_flos": 5728527974400.0,
"train_loss": 0.8656142420238919,
"train_runtime": 448.8389,
"train_samples_per_second": 1.934,
"train_steps_per_second": 0.06
}
],
"logging_steps": 1.0,
"max_steps": 27,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5728527974400.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}