{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.044989775051125, "eval_steps": 1000, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10224948875255624, "grad_norm": 0.22070330381393433, "learning_rate": 9.999694946400538e-05, "loss": 1.2022, "step": 50 }, { "epoch": 0.20449897750511248, "grad_norm": 0.32263442873954773, "learning_rate": 9.963133532962538e-05, "loss": 1.0028, "step": 100 }, { "epoch": 0.3067484662576687, "grad_norm": 0.2867143452167511, "learning_rate": 9.866072190997923e-05, "loss": 0.9375, "step": 150 }, { "epoch": 0.40899795501022496, "grad_norm": 0.2979172170162201, "learning_rate": 9.709694085177272e-05, "loss": 0.9275, "step": 200 }, { "epoch": 0.5112474437627812, "grad_norm": 0.3326389789581299, "learning_rate": 9.495905443524156e-05, "loss": 0.9066, "step": 250 }, { "epoch": 0.6134969325153374, "grad_norm": 0.3445422649383545, "learning_rate": 9.227312320752585e-05, "loss": 0.9226, "step": 300 }, { "epoch": 0.7157464212678937, "grad_norm": 0.3284899890422821, "learning_rate": 8.907188830811434e-05, "loss": 0.8938, "step": 350 }, { "epoch": 0.8179959100204499, "grad_norm": 0.3464341461658478, "learning_rate": 8.539437235876908e-05, "loss": 0.9039, "step": 400 }, { "epoch": 0.9202453987730062, "grad_norm": 0.35458436608314514, "learning_rate": 8.1285403783028e-05, "loss": 0.888, "step": 450 }, { "epoch": 1.0224948875255624, "grad_norm": 0.3531360626220703, "learning_rate": 7.679507035376672e-05, "loss": 0.8834, "step": 500 }, { "epoch": 1.1247443762781186, "grad_norm": 0.38273200392723083, "learning_rate": 7.197810863000116e-05, "loss": 0.8308, "step": 550 }, { "epoch": 1.2269938650306749, "grad_norm": 0.3767881691455841, "learning_rate": 6.689323672561398e-05, "loss": 0.8212, "step": 600 }, { "epoch": 1.329243353783231, "grad_norm": 0.4009556472301483, "learning_rate": 6.160243854346398e-05, "loss": 0.8223, "step": 650 }, { "epoch": 1.4314928425357873, "grad_norm": 0.430261492729187, "learning_rate": 5.617020819996831e-05, "loss": 0.8233, "step": 700 }, { "epoch": 1.5337423312883436, "grad_norm": 0.39679399132728577, "learning_rate": 5.0662763850519936e-05, "loss": 0.8224, "step": 750 }, { "epoch": 1.6359918200408998, "grad_norm": 0.4061990976333618, "learning_rate": 4.514724049910228e-05, "loss": 0.8018, "step": 800 }, { "epoch": 1.738241308793456, "grad_norm": 0.4112933874130249, "learning_rate": 3.969087163164348e-05, "loss": 0.8233, "step": 850 }, { "epoch": 1.8404907975460123, "grad_norm": 0.41034209728240967, "learning_rate": 3.436016964888865e-05, "loss": 0.8079, "step": 900 }, { "epoch": 1.9427402862985685, "grad_norm": 0.4243488907814026, "learning_rate": 2.922011508920362e-05, "loss": 0.8134, "step": 950 }, { "epoch": 2.044989775051125, "grad_norm": 0.432036817073822, "learning_rate": 2.433336452457431e-05, "loss": 0.763, "step": 1000 }, { "epoch": 2.044989775051125, "eval_loss": 0.8031564950942993, "eval_runtime": 238.4795, "eval_samples_per_second": 0.923, "eval_steps_per_second": 0.923, "step": 1000 } ], "logging_steps": 50, "max_steps": 1467, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.709436552019968e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }