Training in progress, step 789, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31b0837275cc5f11270651e4afa09e46d066ac962dab68f2dc813068f170d69a
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07a002c9cb506fad059bebbe67c0737c9f684d39d8a96d697d7f947c812a25b3
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d054cb5b5973f365f853a3e5809ff93e1f3146a44706d0ea9814bf12ba99652
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5488,6 +5488,48 @@
|
|
5488 |
"learning_rate": 3.1565967940813812e-06,
|
5489 |
"loss": 1.0938,
|
5490 |
"step": 783
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5491 |
}
|
5492 |
],
|
5493 |
"logging_steps": 1,
|
@@ -5507,7 +5549,7 @@
|
|
5507 |
"attributes": {}
|
5508 |
}
|
5509 |
},
|
5510 |
-
"total_flos": 8.
|
5511 |
"train_batch_size": 4,
|
5512 |
"trial_name": null,
|
5513 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8658436213991769,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 789,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5488 |
"learning_rate": 3.1565967940813812e-06,
|
5489 |
"loss": 1.0938,
|
5490 |
"step": 783
|
5491 |
+
},
|
5492 |
+
{
|
5493 |
+
"epoch": 0.8603566529492456,
|
5494 |
+
"grad_norm": 0.12630558013916016,
|
5495 |
+
"learning_rate": 3.1319358816276206e-06,
|
5496 |
+
"loss": 1.1672,
|
5497 |
+
"step": 784
|
5498 |
+
},
|
5499 |
+
{
|
5500 |
+
"epoch": 0.8614540466392319,
|
5501 |
+
"grad_norm": 0.14046333730220795,
|
5502 |
+
"learning_rate": 3.1072749691738595e-06,
|
5503 |
+
"loss": 1.1078,
|
5504 |
+
"step": 785
|
5505 |
+
},
|
5506 |
+
{
|
5507 |
+
"epoch": 0.8625514403292182,
|
5508 |
+
"grad_norm": 0.13367818295955658,
|
5509 |
+
"learning_rate": 3.0826140567200992e-06,
|
5510 |
+
"loss": 1.1097,
|
5511 |
+
"step": 786
|
5512 |
+
},
|
5513 |
+
{
|
5514 |
+
"epoch": 0.8636488340192043,
|
5515 |
+
"grad_norm": 0.14946097135543823,
|
5516 |
+
"learning_rate": 3.057953144266338e-06,
|
5517 |
+
"loss": 1.0131,
|
5518 |
+
"step": 787
|
5519 |
+
},
|
5520 |
+
{
|
5521 |
+
"epoch": 0.8647462277091906,
|
5522 |
+
"grad_norm": 0.1397130787372589,
|
5523 |
+
"learning_rate": 3.0332922318125775e-06,
|
5524 |
+
"loss": 1.1225,
|
5525 |
+
"step": 788
|
5526 |
+
},
|
5527 |
+
{
|
5528 |
+
"epoch": 0.8658436213991769,
|
5529 |
+
"grad_norm": 0.13838127255439758,
|
5530 |
+
"learning_rate": 3.0086313193588164e-06,
|
5531 |
+
"loss": 1.0598,
|
5532 |
+
"step": 789
|
5533 |
}
|
5534 |
],
|
5535 |
"logging_steps": 1,
|
|
|
5549 |
"attributes": {}
|
5550 |
}
|
5551 |
},
|
5552 |
+
"total_flos": 8.180270209825137e+17,
|
5553 |
"train_batch_size": 4,
|
5554 |
"trial_name": null,
|
5555 |
"trial_params": null
|