{
  "best_metric": 11.079472541809082,
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
  "epoch": 0.03266372693124286,
  "eval_steps": 25,
  "global_step": 25,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0013065490772497142,
      "grad_norm": 3.913083553314209,
      "learning_rate": 5.000000000000001e-07,
      "loss": 177.3398,
      "step": 1
    },
    {
      "epoch": 0.0013065490772497142,
      "eval_loss": 11.082952499389648,
      "eval_runtime": 9.6032,
      "eval_samples_per_second": 268.452,
      "eval_steps_per_second": 67.165,
      "step": 1
    },
    {
      "epoch": 0.0026130981544994283,
      "grad_norm": 3.7991042137145996,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 177.3366,
      "step": 2
    },
    {
      "epoch": 0.0039196472317491425,
      "grad_norm": 3.5443339347839355,
      "learning_rate": 1.5e-06,
      "loss": 177.3718,
      "step": 3
    },
    {
      "epoch": 0.005226196308998857,
      "grad_norm": 4.066579341888428,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 177.3382,
      "step": 4
    },
    {
      "epoch": 0.006532745386248571,
      "grad_norm": 3.708773374557495,
      "learning_rate": 2.5e-06,
      "loss": 177.3313,
      "step": 5
    },
    {
      "epoch": 0.007839294463498285,
      "grad_norm": 3.7606372833251953,
      "learning_rate": 3e-06,
      "loss": 177.3268,
      "step": 6
    },
    {
      "epoch": 0.009145843540748,
      "grad_norm": 3.9121954441070557,
      "learning_rate": 3.5000000000000004e-06,
      "loss": 177.331,
      "step": 7
    },
    {
      "epoch": 0.010452392617997713,
      "grad_norm": 3.798919916152954,
      "learning_rate": 4.000000000000001e-06,
      "loss": 177.3199,
      "step": 8
    },
    {
      "epoch": 0.011758941695247428,
      "grad_norm": 4.223837852478027,
      "learning_rate": 4.5e-06,
      "loss": 177.3199,
      "step": 9
    },
    {
      "epoch": 0.013065490772497142,
      "grad_norm": 4.020258903503418,
      "learning_rate": 5e-06,
      "loss": 177.308,
      "step": 10
    },
    {
      "epoch": 0.014372039849746856,
      "grad_norm": 3.514195203781128,
      "learning_rate": 5.500000000000001e-06,
      "loss": 177.3344,
      "step": 11
    },
    {
      "epoch": 0.01567858892699657,
      "grad_norm": 3.7526743412017822,
      "learning_rate": 6e-06,
      "loss": 177.3065,
      "step": 12
    },
    {
      "epoch": 0.016985138004246284,
      "grad_norm": 4.039890289306641,
      "learning_rate": 6.5000000000000004e-06,
      "loss": 177.3449,
      "step": 13
    },
    {
      "epoch": 0.018291687081496,
      "grad_norm": 3.5606868267059326,
      "learning_rate": 7.000000000000001e-06,
      "loss": 177.2917,
      "step": 14
    },
    {
      "epoch": 0.019598236158745713,
      "grad_norm": 3.8957228660583496,
      "learning_rate": 7.5e-06,
      "loss": 177.3112,
      "step": 15
    },
    {
      "epoch": 0.020904785235995427,
      "grad_norm": 4.019400596618652,
      "learning_rate": 8.000000000000001e-06,
      "loss": 177.2863,
      "step": 16
    },
    {
      "epoch": 0.02221133431324514,
      "grad_norm": 4.092992305755615,
      "learning_rate": 8.500000000000002e-06,
      "loss": 177.2939,
      "step": 17
    },
    {
      "epoch": 0.023517883390494855,
      "grad_norm": 3.5349273681640625,
      "learning_rate": 9e-06,
      "loss": 177.3197,
      "step": 18
    },
    {
      "epoch": 0.02482443246774457,
      "grad_norm": 3.5666286945343018,
      "learning_rate": 9.5e-06,
      "loss": 177.2924,
      "step": 19
    },
    {
      "epoch": 0.026130981544994283,
      "grad_norm": 3.725250720977783,
      "learning_rate": 1e-05,
      "loss": 177.2928,
      "step": 20
    },
    {
      "epoch": 0.027437530622243998,
      "grad_norm": 3.561274766921997,
      "learning_rate": 1.05e-05,
      "loss": 177.3136,
      "step": 21
    },
    {
      "epoch": 0.02874407969949371,
      "grad_norm": 3.4899213314056396,
      "learning_rate": 1.1000000000000001e-05,
      "loss": 177.2746,
      "step": 22
    },
    {
      "epoch": 0.030050628776743426,
      "grad_norm": 3.950650930404663,
      "learning_rate": 1.1500000000000002e-05,
      "loss": 177.2791,
      "step": 23
    },
    {
      "epoch": 0.03135717785399314,
      "grad_norm": 3.843275547027588,
      "learning_rate": 1.2e-05,
      "loss": 177.2599,
      "step": 24
    },
    {
      "epoch": 0.03266372693124286,
      "grad_norm": 3.8913445472717285,
      "learning_rate": 1.25e-05,
      "loss": 177.2308,
      "step": 25
    },
    {
      "epoch": 0.03266372693124286,
      "eval_loss": 11.079472541809082,
      "eval_runtime": 9.2405,
      "eval_samples_per_second": 278.988,
      "eval_steps_per_second": 69.801,
      "step": 25
    }
  ],
  "logging_steps": 1,
  "max_steps": 50,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 1,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1654652928000.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}