File size: 2,429 Bytes
7bd64ab
74cc4ca
7bd64ab
74cc4ca
7bd64ab
 
 
 
 
 
 
74cc4ca
7bd64ab
74cc4ca
7bd64ab
 
 
74cc4ca
7bd64ab
74cc4ca
7bd64ab
 
 
74cc4ca
 
 
 
 
 
7bd64ab
 
 
74cc4ca
7bd64ab
74cc4ca
7bd64ab
 
 
74cc4ca
 
 
 
 
 
7bd64ab
 
 
74cc4ca
7bd64ab
74cc4ca
7bd64ab
 
 
74cc4ca
 
 
 
 
 
7bd64ab
 
 
74cc4ca
 
 
7bd64ab
 
 
74cc4ca
 
 
 
 
 
7bd64ab
 
 
74cc4ca
 
 
7bd64ab
 
 
74cc4ca
 
 
 
 
 
7bd64ab
 
 
 
 
74cc4ca
7bd64ab
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
  "best_metric": 1.1131775379180908,
  "best_model_checkpoint": "/kaggle/output/checkpoint-5000",
  "epoch": 0.20371577574967406,
  "eval_steps": 1000,
  "global_step": 5000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 2.7777777777777777e-11,
      "loss": 1.029,
      "step": 1
    },
    {
      "epoch": 0.04,
      "learning_rate": 2.7638888888888893e-08,
      "loss": 1.181,
      "step": 1000
    },
    {
      "epoch": 0.04,
      "eval_accuracy": 0.3273453093812375,
      "eval_loss": 1.1529844999313354,
      "eval_runtime": 54.2837,
      "eval_samples_per_second": 92.293,
      "eval_steps_per_second": 11.55,
      "step": 1000
    },
    {
      "epoch": 0.08,
      "learning_rate": 5.541666666666667e-08,
      "loss": 1.1527,
      "step": 2000
    },
    {
      "epoch": 0.08,
      "eval_accuracy": 0.33013972055888224,
      "eval_loss": 1.1351025104522705,
      "eval_runtime": 54.0518,
      "eval_samples_per_second": 92.689,
      "eval_steps_per_second": 11.6,
      "step": 2000
    },
    {
      "epoch": 0.12,
      "learning_rate": 8.316666666666666e-08,
      "loss": 1.142,
      "step": 3000
    },
    {
      "epoch": 0.12,
      "eval_accuracy": 0.3317365269461078,
      "eval_loss": 1.127414345741272,
      "eval_runtime": 54.0871,
      "eval_samples_per_second": 92.628,
      "eval_steps_per_second": 11.592,
      "step": 3000
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.1091666666666668e-07,
      "loss": 1.1371,
      "step": 4000
    },
    {
      "epoch": 0.16,
      "eval_accuracy": 0.331936127744511,
      "eval_loss": 1.1194497346878052,
      "eval_runtime": 54.3907,
      "eval_samples_per_second": 92.111,
      "eval_steps_per_second": 11.528,
      "step": 4000
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.3869444444444447e-07,
      "loss": 1.1246,
      "step": 5000
    },
    {
      "epoch": 0.2,
      "eval_accuracy": 0.3345309381237525,
      "eval_loss": 1.1131775379180908,
      "eval_runtime": 54.3492,
      "eval_samples_per_second": 92.182,
      "eval_steps_per_second": 11.537,
      "step": 5000
    }
  ],
  "logging_steps": 1000,
  "max_steps": 10000000,
  "num_train_epochs": 408,
  "save_steps": 1000,
  "total_flos": 1.045177565184e+16,
  "trial_name": null,
  "trial_params": null
}