savage1221 commited on
Commit
8d42482
·
verified ·
1 Parent(s): 6b1c2bf

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a797f551087e010ee19d4f64a47f7e6b7e8cd3368b6d09b6005ff7e655a959eb
3
  size 293635024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b87905b967818bb58a7d8a98924637fd8e529c17e65d81fa4ba48b259e6a0a1
3
  size 293635024
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7eb48560a5b5da168faee233fb742ce90f8dea785af52ba490234711ffc29199
3
- size 587360954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc6c06b142e47618bb27401d10a3629248fd822f97d5681c50f323b9668abef
3
+ size 587361210
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8701b93e17a7f0e50887059a7c196dcbbaad17b98884e0b3fb9b08c6829c92a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd2dbb560b33d4689684d823717a94af32d66310a306a84655b09229b2cb70eb
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9721d5ba1fb05d41f214a5bebb8206b88b746e805209d13e654bce65e0ffa08
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d77a12c6ddcb588d8eb1fffa03b3e3be8b5f25c15acdb9a618c73413f62484db
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.963855421686747,
5
  "eval_steps": 500,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -77,6 +77,146 @@
77
  "learning_rate": 0.0001,
78
  "loss": 0.2676,
79
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }
81
  ],
82
  "logging_steps": 10,
@@ -96,7 +236,7 @@
96
  "attributes": {}
97
  }
98
  },
99
- "total_flos": 5158452580270080.0,
100
  "train_batch_size": 8,
101
  "trial_name": null,
102
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.891566265060241,
5
  "eval_steps": 500,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
77
  "learning_rate": 0.0001,
78
  "loss": 0.2676,
79
  "step": 100
80
+ },
81
+ {
82
+ "epoch": 1.0602409638554218,
83
+ "grad_norm": 0.1597064584493637,
84
+ "learning_rate": 0.0001,
85
+ "loss": 0.2644,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 1.1566265060240963,
90
+ "grad_norm": 0.16861063241958618,
91
+ "learning_rate": 0.0001,
92
+ "loss": 0.2635,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 1.2530120481927711,
97
+ "grad_norm": 0.1666216403245926,
98
+ "learning_rate": 0.0001,
99
+ "loss": 0.256,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 1.3493975903614457,
104
+ "grad_norm": 0.1696414053440094,
105
+ "learning_rate": 0.0001,
106
+ "loss": 0.2529,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 1.4457831325301205,
111
+ "grad_norm": 0.16793620586395264,
112
+ "learning_rate": 0.0001,
113
+ "loss": 0.2466,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 1.5421686746987953,
118
+ "grad_norm": 0.19180789589881897,
119
+ "learning_rate": 0.0001,
120
+ "loss": 0.2446,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 1.6385542168674698,
125
+ "grad_norm": 0.18770650029182434,
126
+ "learning_rate": 0.0001,
127
+ "loss": 0.2446,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 1.7349397590361446,
132
+ "grad_norm": 0.1509401947259903,
133
+ "learning_rate": 0.0001,
134
+ "loss": 0.2418,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 1.8313253012048194,
139
+ "grad_norm": 0.1551649123430252,
140
+ "learning_rate": 0.0001,
141
+ "loss": 0.2391,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 1.927710843373494,
146
+ "grad_norm": 0.17986978590488434,
147
+ "learning_rate": 0.0001,
148
+ "loss": 0.2344,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 2.0240963855421685,
153
+ "grad_norm": 0.20075887441635132,
154
+ "learning_rate": 0.0001,
155
+ "loss": 0.2304,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 2.1204819277108435,
160
+ "grad_norm": 0.14372113347053528,
161
+ "learning_rate": 0.0001,
162
+ "loss": 0.2313,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 2.216867469879518,
167
+ "grad_norm": 0.17301425337791443,
168
+ "learning_rate": 0.0001,
169
+ "loss": 0.2294,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 2.3132530120481927,
174
+ "grad_norm": 0.1603270024061203,
175
+ "learning_rate": 0.0001,
176
+ "loss": 0.2238,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 2.4096385542168672,
181
+ "grad_norm": 0.1819937825202942,
182
+ "learning_rate": 0.0001,
183
+ "loss": 0.2236,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 2.5060240963855422,
188
+ "grad_norm": 0.201055645942688,
189
+ "learning_rate": 0.0001,
190
+ "loss": 0.2184,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 2.602409638554217,
195
+ "grad_norm": 0.17147445678710938,
196
+ "learning_rate": 0.0001,
197
+ "loss": 0.2205,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 2.6987951807228914,
202
+ "grad_norm": 0.12793570756912231,
203
+ "learning_rate": 0.0001,
204
+ "loss": 0.2146,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 2.7951807228915664,
209
+ "grad_norm": 0.18070928752422333,
210
+ "learning_rate": 0.0001,
211
+ "loss": 0.2163,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 2.891566265060241,
216
+ "grad_norm": 0.1529516875743866,
217
+ "learning_rate": 0.0001,
218
+ "loss": 0.216,
219
+ "step": 300
220
  }
221
  ],
222
  "logging_steps": 10,
 
236
  "attributes": {}
237
  }
238
  },
239
+ "total_flos": 1.547183001305088e+16,
240
  "train_batch_size": 8,
241
  "trial_name": null,
242
  "trial_params": null