File size: 2,328 Bytes
146bb75
 
 
09828d6
146bb75
09828d6
146bb75
 
 
 
 
 
 
 
 
 
 
 
 
 
a3497bd
 
 
146bb75
 
 
 
a3497bd
146bb75
 
 
 
 
 
a3497bd
146bb75
 
 
 
 
 
a3497bd
 
 
 
146bb75
09828d6
 
 
a3497bd
09828d6
a3497bd
09828d6
 
 
 
a3497bd
09828d6
a3497bd
09828d6
 
 
 
a3497bd
09828d6
a3497bd
09828d6
 
 
 
a3497bd
 
 
 
09828d6
146bb75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09828d6
146bb75
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.004397215097105167,
  "eval_steps": 3,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0007328691828508611,
      "grad_norm": Infinity,
      "learning_rate": 0.0,
      "loss": 15.4792,
      "step": 1
    },
    {
      "epoch": 0.0007328691828508611,
      "eval_loss": 12.304828643798828,
      "eval_runtime": 57.0786,
      "eval_samples_per_second": 5.046,
      "eval_steps_per_second": 5.046,
      "step": 1
    },
    {
      "epoch": 0.0014657383657017222,
      "grad_norm": 39.946189880371094,
      "learning_rate": 2e-05,
      "loss": 10.6283,
      "step": 2
    },
    {
      "epoch": 0.0021986075485525836,
      "grad_norm": 36.35205078125,
      "learning_rate": 4e-05,
      "loss": 11.3833,
      "step": 3
    },
    {
      "epoch": 0.0021986075485525836,
      "eval_loss": 12.076598167419434,
      "eval_runtime": 56.5009,
      "eval_samples_per_second": 5.097,
      "eval_steps_per_second": 5.097,
      "step": 3
    },
    {
      "epoch": 0.0029314767314034445,
      "grad_norm": 43.041465759277344,
      "learning_rate": 6e-05,
      "loss": 9.7042,
      "step": 4
    },
    {
      "epoch": 0.003664345914254306,
      "grad_norm": 46.06086349487305,
      "learning_rate": 8e-05,
      "loss": 14.8458,
      "step": 5
    },
    {
      "epoch": 0.004397215097105167,
      "grad_norm": 31.989681243896484,
      "learning_rate": 0.0001,
      "loss": 7.5335,
      "step": 6
    },
    {
      "epoch": 0.004397215097105167,
      "eval_loss": 9.594720840454102,
      "eval_runtime": 56.5212,
      "eval_samples_per_second": 5.095,
      "eval_steps_per_second": 5.095,
      "step": 6
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1180530414452736.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}