Jatme26 commited on
Commit
e60dbe2
·
1 Parent(s): 514737d

Upload 7 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +3 -0
  2. pytorch_model.bin +3 -0
  3. rng_state.pth +3 -0
  4. scaler.pt +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +186 -0
  7. training_args.bin +3 -0
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ff9d7bd979f68a4fe58702525d04c0c16f3e9b44b02216fbf2102f130cffe1
3
+ size 33661637
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e46ffb1ef1b7d4061f9f8892541ab0cb47e557733e350b80ff472fb7f2f29faa
3
+ size 16822989
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d8580f7635059089a007d64bc134302830a39e0142d36d179b3a04077aea76
3
+ size 14575
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f56e2d94fbf475c8ee7bb541a0c78739d0af9b9df1c40b0d2dbe8978e561b1ce
3
+ size 557
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ea9321a9351156078558d5930b1e4d22d9fefcb1014aef75723c92be857ab9
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7586850523948669,
3
+ "best_model_checkpoint": "experiments/checkpoint-42",
4
+ "epoch": 78.0,
5
+ "global_step": 98,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 4.0,
12
+ "learning_rate": 4.9999999999999996e-05,
13
+ "loss": 0.3998,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 8.0,
18
+ "learning_rate": 9.999999999999999e-05,
19
+ "loss": 0.3916,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 11.0,
24
+ "eval_loss": 1.4091475009918213,
25
+ "eval_runtime": 4.4251,
26
+ "eval_samples_per_second": 2.26,
27
+ "eval_steps_per_second": 0.452,
28
+ "step": 14
29
+ },
30
+ {
31
+ "epoch": 12.0,
32
+ "learning_rate": 0.00015,
33
+ "loss": 0.3745,
34
+ "step": 15
35
+ },
36
+ {
37
+ "epoch": 16.0,
38
+ "learning_rate": 0.00019999999999999998,
39
+ "loss": 0.3387,
40
+ "step": 20
41
+ },
42
+ {
43
+ "epoch": 20.0,
44
+ "learning_rate": 0.00025,
45
+ "loss": 0.2894,
46
+ "step": 25
47
+ },
48
+ {
49
+ "epoch": 22.4,
50
+ "eval_loss": 0.8969688415527344,
51
+ "eval_runtime": 4.4836,
52
+ "eval_samples_per_second": 2.23,
53
+ "eval_steps_per_second": 0.446,
54
+ "step": 28
55
+ },
56
+ {
57
+ "epoch": 24.0,
58
+ "learning_rate": 0.0003,
59
+ "loss": 0.2322,
60
+ "step": 30
61
+ },
62
+ {
63
+ "epoch": 28.0,
64
+ "learning_rate": 0.00027857142857142854,
65
+ "loss": 0.1964,
66
+ "step": 35
67
+ },
68
+ {
69
+ "epoch": 32.0,
70
+ "learning_rate": 0.0002571428571428571,
71
+ "loss": 0.1739,
72
+ "step": 40
73
+ },
74
+ {
75
+ "epoch": 34.0,
76
+ "eval_loss": 0.7586850523948669,
77
+ "eval_runtime": 4.4178,
78
+ "eval_samples_per_second": 2.264,
79
+ "eval_steps_per_second": 0.453,
80
+ "step": 42
81
+ },
82
+ {
83
+ "epoch": 36.0,
84
+ "learning_rate": 0.00023571428571428569,
85
+ "loss": 0.157,
86
+ "step": 45
87
+ },
88
+ {
89
+ "epoch": 40.0,
90
+ "learning_rate": 0.00021428571428571427,
91
+ "loss": 0.1408,
92
+ "step": 50
93
+ },
94
+ {
95
+ "epoch": 44.0,
96
+ "learning_rate": 0.00019285714285714286,
97
+ "loss": 0.1264,
98
+ "step": 55
99
+ },
100
+ {
101
+ "epoch": 44.8,
102
+ "eval_loss": 0.7922688126564026,
103
+ "eval_runtime": 4.4244,
104
+ "eval_samples_per_second": 2.26,
105
+ "eval_steps_per_second": 0.452,
106
+ "step": 56
107
+ },
108
+ {
109
+ "epoch": 48.0,
110
+ "learning_rate": 0.0001714285714285714,
111
+ "loss": 0.1134,
112
+ "step": 60
113
+ },
114
+ {
115
+ "epoch": 52.0,
116
+ "learning_rate": 0.00015,
117
+ "loss": 0.1021,
118
+ "step": 65
119
+ },
120
+ {
121
+ "epoch": 56.0,
122
+ "learning_rate": 0.00012857142857142855,
123
+ "loss": 0.0893,
124
+ "step": 70
125
+ },
126
+ {
127
+ "epoch": 56.0,
128
+ "eval_loss": 0.8863828778266907,
129
+ "eval_runtime": 4.4142,
130
+ "eval_samples_per_second": 2.265,
131
+ "eval_steps_per_second": 0.453,
132
+ "step": 70
133
+ },
134
+ {
135
+ "epoch": 60.0,
136
+ "learning_rate": 0.00010714285714285714,
137
+ "loss": 0.0778,
138
+ "step": 75
139
+ },
140
+ {
141
+ "epoch": 64.0,
142
+ "learning_rate": 8.57142857142857e-05,
143
+ "loss": 0.0677,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 67.2,
148
+ "eval_loss": 1.0279890298843384,
149
+ "eval_runtime": 4.4181,
150
+ "eval_samples_per_second": 2.263,
151
+ "eval_steps_per_second": 0.453,
152
+ "step": 84
153
+ },
154
+ {
155
+ "epoch": 68.0,
156
+ "learning_rate": 6.428571428571427e-05,
157
+ "loss": 0.0598,
158
+ "step": 85
159
+ },
160
+ {
161
+ "epoch": 72.0,
162
+ "learning_rate": 4.285714285714285e-05,
163
+ "loss": 0.0524,
164
+ "step": 90
165
+ },
166
+ {
167
+ "epoch": 76.0,
168
+ "learning_rate": 2.1428571428571425e-05,
169
+ "loss": 0.0474,
170
+ "step": 95
171
+ },
172
+ {
173
+ "epoch": 78.0,
174
+ "eval_loss": 1.1172549724578857,
175
+ "eval_runtime": 4.4004,
176
+ "eval_samples_per_second": 2.273,
177
+ "eval_steps_per_second": 0.455,
178
+ "step": 98
179
+ }
180
+ ],
181
+ "max_steps": 100,
182
+ "num_train_epochs": 100,
183
+ "total_flos": 6.335270841090048e+16,
184
+ "trial_name": null,
185
+ "trial_params": null
186
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a730d8bc8fa50a7ac7ab133876f167a354a5dcc0a044229b70f3d06a335eca50
3
+ size 3899