{ "best_metric": 84.3335778331317, "best_model_checkpoint": "/root/turkic_qa/tr_kaz_models/tr_kaz_xlm_roberta_large_squad_model/checkpoint-2996", "epoch": 5.0, "eval_steps": 500, "global_step": 3745, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 749, "train_exact_match": 69.13086913086913, "train_f1": 85.05221652384066, "train_runtime": 34.8082, "train_samples_per_second": 43.668, "train_steps_per_second": 1.58 }, { "epoch": 1.0, "grad_norm": 26.24435043334961, "learning_rate": 1e-05, "loss": 1.0066, "step": 749 }, { "epoch": 1.0, "eval_exact_match": 66.0625, "eval_f1": 82.35823940468298, "eval_runtime": 105.1616, "eval_samples_per_second": 43.913, "eval_steps_per_second": 1.569, "step": 749 }, { "epoch": 2.0, "step": 1498, "train_exact_match": 76.42357642357642, "train_f1": 89.30189341154144, "train_runtime": 33.8507, "train_samples_per_second": 43.78, "train_steps_per_second": 1.566 }, { "epoch": 2.0, "grad_norm": 19.309417724609375, "learning_rate": 7.500000000000001e-06, "loss": 0.6623, "step": 1498 }, { "epoch": 2.0, "eval_exact_match": 67.0625, "eval_f1": 83.28485925766783, "eval_runtime": 105.3597, "eval_samples_per_second": 43.831, "eval_steps_per_second": 1.566, "step": 1498 }, { "epoch": 3.0, "step": 2247, "train_exact_match": 83.71628371628371, "train_f1": 93.99788828536703, "train_runtime": 34.2893, "train_samples_per_second": 43.658, "train_steps_per_second": 1.575 }, { "epoch": 3.0, "grad_norm": 30.92243766784668, "learning_rate": 5e-06, "loss": 0.4407, "step": 2247 }, { "epoch": 3.0, "eval_exact_match": 69.03125, "eval_f1": 84.12904259140414, "eval_runtime": 105.3447, "eval_samples_per_second": 43.837, "eval_steps_per_second": 1.566, "step": 2247 }, { "epoch": 4.0, "step": 2996, "train_exact_match": 88.51148851148851, "train_f1": 95.38995803272034, "train_runtime": 34.4321, "train_samples_per_second": 43.738, "train_steps_per_second": 1.568 }, { "epoch": 4.0, "grad_norm": 16.18670082092285, "learning_rate": 2.5e-06, "loss": 0.3048, "step": 2996 }, { "epoch": 4.0, "eval_exact_match": 69.4375, "eval_f1": 84.3335778331317, "eval_runtime": 105.3761, "eval_samples_per_second": 43.824, "eval_steps_per_second": 1.566, "step": 2996 }, { "epoch": 5.0, "step": 3745, "train_exact_match": 90.10989010989012, "train_f1": 96.46340834910971, "train_runtime": 34.2854, "train_samples_per_second": 43.634, "train_steps_per_second": 1.575 }, { "epoch": 5.0, "grad_norm": 10.52661418914795, "learning_rate": 0.0, "loss": 0.2169, "step": 3745 }, { "epoch": 5.0, "eval_exact_match": 69.78125, "eval_f1": 84.15544283092075, "eval_runtime": 105.3513, "eval_samples_per_second": 43.834, "eval_steps_per_second": 1.566, "step": 3745 }, { "epoch": 5.0, "step": 3745, "total_flos": 7.301726548858368e+16, "train_loss": 0.5262851322922751, "train_runtime": 6744.6122, "train_samples_per_second": 15.543, "train_steps_per_second": 0.555 } ], "logging_steps": 500, "max_steps": 3745, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 7.301726548858368e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }