DuongTrongChi
commited on
Training in progress, step 676, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47c1e90dbecf9635856d092c6cddea8202536da475af28c5df57f9a15b232128
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99bb6870709dc61c780604e0ba8b8967af8b3b68a8fe57d1de47cc64ae9e2f69
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9967ef256882f56c127a1407616df2fb585de0b861d9905ab72b987597cec7ec
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4669,6 +4669,76 @@
|
|
4669 |
"learning_rate": 6.041923551171394e-06,
|
4670 |
"loss": 1.2012,
|
4671 |
"step": 666
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4672 |
}
|
4673 |
],
|
4674 |
"logging_steps": 1,
|
@@ -4688,7 +4758,7 @@
|
|
4688 |
"attributes": {}
|
4689 |
}
|
4690 |
},
|
4691 |
-
"total_flos":
|
4692 |
"train_batch_size": 4,
|
4693 |
"trial_name": null,
|
4694 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.741838134430727,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 676,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4669 |
"learning_rate": 6.041923551171394e-06,
|
4670 |
"loss": 1.2012,
|
4671 |
"step": 666
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 0.7319615912208505,
|
4675 |
+
"grad_norm": 0.14305201172828674,
|
4676 |
+
"learning_rate": 6.017262638717633e-06,
|
4677 |
+
"loss": 1.2075,
|
4678 |
+
"step": 667
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 0.7330589849108368,
|
4682 |
+
"grad_norm": 0.1388700008392334,
|
4683 |
+
"learning_rate": 5.9926017262638725e-06,
|
4684 |
+
"loss": 1.1049,
|
4685 |
+
"step": 668
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 0.7341563786008231,
|
4689 |
+
"grad_norm": 0.13110363483428955,
|
4690 |
+
"learning_rate": 5.967940813810111e-06,
|
4691 |
+
"loss": 1.1915,
|
4692 |
+
"step": 669
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 0.7352537722908093,
|
4696 |
+
"grad_norm": 0.1336205154657364,
|
4697 |
+
"learning_rate": 5.94327990135635e-06,
|
4698 |
+
"loss": 1.1189,
|
4699 |
+
"step": 670
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 0.7363511659807956,
|
4703 |
+
"grad_norm": 0.15483205020427704,
|
4704 |
+
"learning_rate": 5.91861898890259e-06,
|
4705 |
+
"loss": 1.0508,
|
4706 |
+
"step": 671
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 0.7374485596707819,
|
4710 |
+
"grad_norm": 0.1405985802412033,
|
4711 |
+
"learning_rate": 5.893958076448829e-06,
|
4712 |
+
"loss": 1.1348,
|
4713 |
+
"step": 672
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 0.7385459533607682,
|
4717 |
+
"grad_norm": 0.13037075102329254,
|
4718 |
+
"learning_rate": 5.869297163995068e-06,
|
4719 |
+
"loss": 1.1437,
|
4720 |
+
"step": 673
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 0.7396433470507544,
|
4724 |
+
"grad_norm": 0.12945199012756348,
|
4725 |
+
"learning_rate": 5.844636251541308e-06,
|
4726 |
+
"loss": 1.1265,
|
4727 |
+
"step": 674
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 0.7407407407407407,
|
4731 |
+
"grad_norm": 0.1295364648103714,
|
4732 |
+
"learning_rate": 5.8199753390875466e-06,
|
4733 |
+
"loss": 1.1266,
|
4734 |
+
"step": 675
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 0.741838134430727,
|
4738 |
+
"grad_norm": 0.12387209385633469,
|
4739 |
+
"learning_rate": 5.7953144266337855e-06,
|
4740 |
+
"loss": 1.184,
|
4741 |
+
"step": 676
|
4742 |
}
|
4743 |
],
|
4744 |
"logging_steps": 1,
|
|
|
4758 |
"attributes": {}
|
4759 |
}
|
4760 |
},
|
4761 |
+
"total_flos": 7.010987710203372e+17,
|
4762 |
"train_batch_size": 4,
|
4763 |
"trial_name": null,
|
4764 |
"trial_params": null
|