zabir735 commited on
Commit
94b56d3
1 Parent(s): c179f23

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +159 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.848484848484849,
3
+ "total_flos": 5.991868094431887e+17,
4
+ "train_loss": 0.13874833025814345,
5
+ "train_runtime": 338.048,
6
+ "train_samples_per_second": 23.31,
7
+ "train_steps_per_second": 0.355
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.848484848484849,
3
+ "total_flos": 5.991868094431887e+17,
4
+ "train_loss": 0.13874833025814345,
5
+ "train_runtime": 338.048,
6
+ "train_samples_per_second": 23.31,
7
+ "train_steps_per_second": 0.355
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9886363636363636,
3
+ "best_model_checkpoint": "swin-base-patch4-window7-224-in22k-finetuned-batch8-22k/checkpoint-120",
4
+ "epoch": 4.848484848484849,
5
+ "eval_steps": 500,
6
+ "global_step": 120,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.40404040404040403,
13
+ "grad_norm": 11.940139770507812,
14
+ "learning_rate": 4.166666666666667e-05,
15
+ "loss": 0.74,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.8080808080808081,
20
+ "grad_norm": 8.186209678649902,
21
+ "learning_rate": 4.62962962962963e-05,
22
+ "loss": 0.3752,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.9696969696969697,
27
+ "eval_accuracy": 0.9659090909090909,
28
+ "eval_loss": 0.1119123175740242,
29
+ "eval_runtime": 2.7264,
30
+ "eval_samples_per_second": 64.555,
31
+ "eval_steps_per_second": 4.035,
32
+ "step": 24
33
+ },
34
+ {
35
+ "epoch": 1.2121212121212122,
36
+ "grad_norm": 13.843687057495117,
37
+ "learning_rate": 4.166666666666667e-05,
38
+ "loss": 0.1399,
39
+ "step": 30
40
+ },
41
+ {
42
+ "epoch": 1.6161616161616161,
43
+ "grad_norm": 9.353983879089355,
44
+ "learning_rate": 3.7037037037037037e-05,
45
+ "loss": 0.2189,
46
+ "step": 40
47
+ },
48
+ {
49
+ "epoch": 1.9797979797979797,
50
+ "eval_accuracy": 0.9375,
51
+ "eval_loss": 0.16898323595523834,
52
+ "eval_runtime": 2.9538,
53
+ "eval_samples_per_second": 59.584,
54
+ "eval_steps_per_second": 3.724,
55
+ "step": 49
56
+ },
57
+ {
58
+ "epoch": 2.0202020202020203,
59
+ "grad_norm": 9.988105773925781,
60
+ "learning_rate": 3.240740740740741e-05,
61
+ "loss": 0.0815,
62
+ "step": 50
63
+ },
64
+ {
65
+ "epoch": 2.4242424242424243,
66
+ "grad_norm": 5.987796306610107,
67
+ "learning_rate": 2.777777777777778e-05,
68
+ "loss": 0.0405,
69
+ "step": 60
70
+ },
71
+ {
72
+ "epoch": 2.8282828282828283,
73
+ "grad_norm": 2.5615344047546387,
74
+ "learning_rate": 2.314814814814815e-05,
75
+ "loss": 0.0343,
76
+ "step": 70
77
+ },
78
+ {
79
+ "epoch": 2.98989898989899,
80
+ "eval_accuracy": 0.9829545454545454,
81
+ "eval_loss": 0.08864129334688187,
82
+ "eval_runtime": 2.7177,
83
+ "eval_samples_per_second": 64.76,
84
+ "eval_steps_per_second": 4.048,
85
+ "step": 74
86
+ },
87
+ {
88
+ "epoch": 3.2323232323232323,
89
+ "grad_norm": 6.737287998199463,
90
+ "learning_rate": 1.8518518518518518e-05,
91
+ "loss": 0.0161,
92
+ "step": 80
93
+ },
94
+ {
95
+ "epoch": 3.6363636363636362,
96
+ "grad_norm": 8.469164848327637,
97
+ "learning_rate": 1.388888888888889e-05,
98
+ "loss": 0.009,
99
+ "step": 90
100
+ },
101
+ {
102
+ "epoch": 4.0,
103
+ "eval_accuracy": 0.9829545454545454,
104
+ "eval_loss": 0.09685815125703812,
105
+ "eval_runtime": 2.7931,
106
+ "eval_samples_per_second": 63.012,
107
+ "eval_steps_per_second": 3.938,
108
+ "step": 99
109
+ },
110
+ {
111
+ "epoch": 4.040404040404041,
112
+ "grad_norm": 3.5041699409484863,
113
+ "learning_rate": 9.259259259259259e-06,
114
+ "loss": 0.0054,
115
+ "step": 100
116
+ },
117
+ {
118
+ "epoch": 4.444444444444445,
119
+ "grad_norm": 0.15403775870800018,
120
+ "learning_rate": 4.6296296296296296e-06,
121
+ "loss": 0.0027,
122
+ "step": 110
123
+ },
124
+ {
125
+ "epoch": 4.848484848484849,
126
+ "grad_norm": 0.31535252928733826,
127
+ "learning_rate": 0.0,
128
+ "loss": 0.0015,
129
+ "step": 120
130
+ },
131
+ {
132
+ "epoch": 4.848484848484849,
133
+ "eval_accuracy": 0.9886363636363636,
134
+ "eval_loss": 0.0984315350651741,
135
+ "eval_runtime": 2.7608,
136
+ "eval_samples_per_second": 63.749,
137
+ "eval_steps_per_second": 3.984,
138
+ "step": 120
139
+ },
140
+ {
141
+ "epoch": 4.848484848484849,
142
+ "step": 120,
143
+ "total_flos": 5.991868094431887e+17,
144
+ "train_loss": 0.13874833025814345,
145
+ "train_runtime": 338.048,
146
+ "train_samples_per_second": 23.31,
147
+ "train_steps_per_second": 0.355
148
+ }
149
+ ],
150
+ "logging_steps": 10,
151
+ "max_steps": 120,
152
+ "num_input_tokens_seen": 0,
153
+ "num_train_epochs": 5,
154
+ "save_steps": 500,
155
+ "total_flos": 5.991868094431887e+17,
156
+ "train_batch_size": 16,
157
+ "trial_name": null,
158
+ "trial_params": null
159
+ }