Training in progress, step 4421, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83945296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbc68c162e53d2aac26456683814d8d51e7523de276c1e5d970882f334de98f1
|
3 |
size 83945296
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168155346
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:046e0344f94182f69dc2f01192aa3c2dbd83a21e77945252a81adee8593c8aa8
|
3 |
size 168155346
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e9cbf36cc778790142eadfdac22fd02d67467e63a64edf810a72e967757bb21
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6167,6 +6167,34 @@
|
|
6167 |
"learning_rate": 2.913878093990796e-09,
|
6168 |
"loss": 0.6676,
|
6169 |
"step": 4400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6170 |
}
|
6171 |
],
|
6172 |
"logging_steps": 5,
|
@@ -6181,12 +6209,12 @@
|
|
6181 |
"should_evaluate": false,
|
6182 |
"should_log": false,
|
6183 |
"should_save": true,
|
6184 |
-
"should_training_stop":
|
6185 |
},
|
6186 |
"attributes": {}
|
6187 |
}
|
6188 |
},
|
6189 |
-
"total_flos": 4.
|
6190 |
"train_batch_size": 1,
|
6191 |
"trial_name": null,
|
6192 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9998492272898606,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 4421,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6167 |
"learning_rate": 2.913878093990796e-09,
|
6168 |
"loss": 0.6676,
|
6169 |
"step": 4400
|
6170 |
+
},
|
6171 |
+
{
|
6172 |
+
"epoch": 0.9962306822465133,
|
6173 |
+
"grad_norm": 0.3391641080379486,
|
6174 |
+
"learning_rate": 1.6915167145525878e-09,
|
6175 |
+
"loss": 0.6981,
|
6176 |
+
"step": 4405
|
6177 |
+
},
|
6178 |
+
{
|
6179 |
+
"epoch": 0.9973614775725593,
|
6180 |
+
"grad_norm": 0.41276663541793823,
|
6181 |
+
"learning_rate": 7.995107016406378e-10,
|
6182 |
+
"loss": 0.6594,
|
6183 |
+
"step": 4410
|
6184 |
+
},
|
6185 |
+
{
|
6186 |
+
"epoch": 0.9984922728986053,
|
6187 |
+
"grad_norm": 0.4570743143558502,
|
6188 |
+
"learning_rate": 2.3787184321444335e-10,
|
6189 |
+
"loss": 0.7059,
|
6190 |
+
"step": 4415
|
6191 |
+
},
|
6192 |
+
{
|
6193 |
+
"epoch": 0.9996230682246513,
|
6194 |
+
"grad_norm": 0.3156117796897888,
|
6195 |
+
"learning_rate": 6.607561386928751e-12,
|
6196 |
+
"loss": 0.6465,
|
6197 |
+
"step": 4420
|
6198 |
}
|
6199 |
],
|
6200 |
"logging_steps": 5,
|
|
|
6209 |
"should_evaluate": false,
|
6210 |
"should_log": false,
|
6211 |
"should_save": true,
|
6212 |
+
"should_training_stop": true
|
6213 |
},
|
6214 |
"attributes": {}
|
6215 |
}
|
6216 |
},
|
6217 |
+
"total_flos": 4.746119130111803e+18,
|
6218 |
"train_batch_size": 1,
|
6219 |
"trial_name": null,
|
6220 |
"trial_params": null
|