File size: 2,063 Bytes
c109064
a7a7444
c109064
a7a7444
c109064
 
 
 
 
 
 
a7a7444
c109064
a7a7444
c109064
 
 
a7a7444
c109064
a7a7444
c109064
 
 
a7a7444
 
 
 
 
 
c109064
 
 
a7a7444
c109064
a7a7444
c109064
 
 
a7a7444
 
 
 
 
 
c109064
 
 
a7a7444
c109064
a7a7444
c109064
 
 
a7a7444
 
 
 
 
 
c109064
 
 
a7a7444
 
 
c109064
 
 
a7a7444
 
 
 
 
 
c109064
 
 
 
 
a7a7444
c109064
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
  "best_metric": 1.1194497346878052,
  "best_model_checkpoint": "/kaggle/output/checkpoint-4000",
  "epoch": 0.16297262059973924,
  "eval_steps": 1000,
  "global_step": 4000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 2.7777777777777777e-11,
      "loss": 1.029,
      "step": 1
    },
    {
      "epoch": 0.04,
      "learning_rate": 2.7638888888888893e-08,
      "loss": 1.181,
      "step": 1000
    },
    {
      "epoch": 0.04,
      "eval_accuracy": 0.3273453093812375,
      "eval_loss": 1.1529844999313354,
      "eval_runtime": 54.2837,
      "eval_samples_per_second": 92.293,
      "eval_steps_per_second": 11.55,
      "step": 1000
    },
    {
      "epoch": 0.08,
      "learning_rate": 5.541666666666667e-08,
      "loss": 1.1527,
      "step": 2000
    },
    {
      "epoch": 0.08,
      "eval_accuracy": 0.33013972055888224,
      "eval_loss": 1.1351025104522705,
      "eval_runtime": 54.0518,
      "eval_samples_per_second": 92.689,
      "eval_steps_per_second": 11.6,
      "step": 2000
    },
    {
      "epoch": 0.12,
      "learning_rate": 8.316666666666666e-08,
      "loss": 1.142,
      "step": 3000
    },
    {
      "epoch": 0.12,
      "eval_accuracy": 0.3317365269461078,
      "eval_loss": 1.127414345741272,
      "eval_runtime": 54.0871,
      "eval_samples_per_second": 92.628,
      "eval_steps_per_second": 11.592,
      "step": 3000
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.1091666666666668e-07,
      "loss": 1.1371,
      "step": 4000
    },
    {
      "epoch": 0.16,
      "eval_accuracy": 0.331936127744511,
      "eval_loss": 1.1194497346878052,
      "eval_runtime": 54.3907,
      "eval_samples_per_second": 92.111,
      "eval_steps_per_second": 11.528,
      "step": 4000
    }
  ],
  "logging_steps": 1000,
  "max_steps": 10000000,
  "num_train_epochs": 408,
  "save_steps": 1000,
  "total_flos": 8361420521472000.0,
  "trial_name": null,
  "trial_params": null
}