DuongTrongChi
commited on
Commit
•
8702ce7
1
Parent(s):
32ce1d5
Training in progress, step 116, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b570962ede4265c9488fa98dcd00095b1ca3d903d14f064ee79d3cb2379651f4
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675156
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68afdadf9dcafbea18732f32b8ac5fa2ad488bf587daf988c9af28727179daa0
|
3 |
size 50675156
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07e779822c485743db355cfc0cc7805b58345253d12afcfcd7953cd3834152cb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -595,6 +595,230 @@
|
|
595 |
"learning_rate": 1.6800000000000002e-05,
|
596 |
"loss": 1.6169,
|
597 |
"step": 84
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
598 |
}
|
599 |
],
|
600 |
"logging_steps": 1,
|
@@ -614,7 +838,7 @@
|
|
614 |
"attributes": {}
|
615 |
}
|
616 |
},
|
617 |
-
"total_flos":
|
618 |
"train_batch_size": 4,
|
619 |
"trial_name": null,
|
620 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.16948223906492557,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 116,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
595 |
"learning_rate": 1.6800000000000002e-05,
|
596 |
"loss": 1.6169,
|
597 |
"step": 84
|
598 |
+
},
|
599 |
+
{
|
600 |
+
"epoch": 0.12418957172860925,
|
601 |
+
"grad_norm": 0.3446858525276184,
|
602 |
+
"learning_rate": 1.7e-05,
|
603 |
+
"loss": 1.6721,
|
604 |
+
"step": 85
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"epoch": 0.12565062551365172,
|
608 |
+
"grad_norm": 0.33179470896720886,
|
609 |
+
"learning_rate": 1.72e-05,
|
610 |
+
"loss": 1.578,
|
611 |
+
"step": 86
|
612 |
+
},
|
613 |
+
{
|
614 |
+
"epoch": 0.12711167929869419,
|
615 |
+
"grad_norm": 0.3791605830192566,
|
616 |
+
"learning_rate": 1.7400000000000003e-05,
|
617 |
+
"loss": 1.6055,
|
618 |
+
"step": 87
|
619 |
+
},
|
620 |
+
{
|
621 |
+
"epoch": 0.12857273308373665,
|
622 |
+
"grad_norm": 0.5245212912559509,
|
623 |
+
"learning_rate": 1.76e-05,
|
624 |
+
"loss": 1.626,
|
625 |
+
"step": 88
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"epoch": 0.13003378686877912,
|
629 |
+
"grad_norm": 0.43215855956077576,
|
630 |
+
"learning_rate": 1.7800000000000002e-05,
|
631 |
+
"loss": 1.6177,
|
632 |
+
"step": 89
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 0.13149484065382158,
|
636 |
+
"grad_norm": 0.4050828516483307,
|
637 |
+
"learning_rate": 1.8e-05,
|
638 |
+
"loss": 1.4903,
|
639 |
+
"step": 90
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"epoch": 0.13295589443886402,
|
643 |
+
"grad_norm": 0.399501234292984,
|
644 |
+
"learning_rate": 1.8200000000000002e-05,
|
645 |
+
"loss": 1.6079,
|
646 |
+
"step": 91
|
647 |
+
},
|
648 |
+
{
|
649 |
+
"epoch": 0.13441694822390649,
|
650 |
+
"grad_norm": 0.439622700214386,
|
651 |
+
"learning_rate": 1.8400000000000003e-05,
|
652 |
+
"loss": 1.5405,
|
653 |
+
"step": 92
|
654 |
+
},
|
655 |
+
{
|
656 |
+
"epoch": 0.13587800200894895,
|
657 |
+
"grad_norm": 0.4368193447589874,
|
658 |
+
"learning_rate": 1.86e-05,
|
659 |
+
"loss": 1.415,
|
660 |
+
"step": 93
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"epoch": 0.13733905579399142,
|
664 |
+
"grad_norm": 0.3644118010997772,
|
665 |
+
"learning_rate": 1.88e-05,
|
666 |
+
"loss": 1.525,
|
667 |
+
"step": 94
|
668 |
+
},
|
669 |
+
{
|
670 |
+
"epoch": 0.13880010957903388,
|
671 |
+
"grad_norm": 0.3868708312511444,
|
672 |
+
"learning_rate": 1.9e-05,
|
673 |
+
"loss": 1.4903,
|
674 |
+
"step": 95
|
675 |
+
},
|
676 |
+
{
|
677 |
+
"epoch": 0.14026116336407635,
|
678 |
+
"grad_norm": 0.43034952878952026,
|
679 |
+
"learning_rate": 1.9200000000000003e-05,
|
680 |
+
"loss": 1.4605,
|
681 |
+
"step": 96
|
682 |
+
},
|
683 |
+
{
|
684 |
+
"epoch": 0.1417222171491188,
|
685 |
+
"grad_norm": 0.4087560772895813,
|
686 |
+
"learning_rate": 1.94e-05,
|
687 |
+
"loss": 1.3544,
|
688 |
+
"step": 97
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"epoch": 0.14318327093416128,
|
692 |
+
"grad_norm": 0.29801666736602783,
|
693 |
+
"learning_rate": 1.9600000000000002e-05,
|
694 |
+
"loss": 1.4098,
|
695 |
+
"step": 98
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"epoch": 0.14464432471920372,
|
699 |
+
"grad_norm": 0.275905966758728,
|
700 |
+
"learning_rate": 1.98e-05,
|
701 |
+
"loss": 1.4732,
|
702 |
+
"step": 99
|
703 |
+
},
|
704 |
+
{
|
705 |
+
"epoch": 0.14610537850424618,
|
706 |
+
"grad_norm": 0.32271912693977356,
|
707 |
+
"learning_rate": 2e-05,
|
708 |
+
"loss": 1.4156,
|
709 |
+
"step": 100
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"epoch": 0.14756643228928865,
|
713 |
+
"grad_norm": 0.3191397190093994,
|
714 |
+
"learning_rate": 1.9965753424657538e-05,
|
715 |
+
"loss": 1.3254,
|
716 |
+
"step": 101
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 0.1490274860743311,
|
720 |
+
"grad_norm": 0.26260653138160706,
|
721 |
+
"learning_rate": 1.993150684931507e-05,
|
722 |
+
"loss": 1.3877,
|
723 |
+
"step": 102
|
724 |
+
},
|
725 |
+
{
|
726 |
+
"epoch": 0.15048853985937358,
|
727 |
+
"grad_norm": 0.2782766819000244,
|
728 |
+
"learning_rate": 1.9897260273972604e-05,
|
729 |
+
"loss": 1.3683,
|
730 |
+
"step": 103
|
731 |
+
},
|
732 |
+
{
|
733 |
+
"epoch": 0.15194959364441604,
|
734 |
+
"grad_norm": 0.2510565221309662,
|
735 |
+
"learning_rate": 1.9863013698630137e-05,
|
736 |
+
"loss": 1.3996,
|
737 |
+
"step": 104
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"epoch": 0.1534106474294585,
|
741 |
+
"grad_norm": 0.2523151934146881,
|
742 |
+
"learning_rate": 1.9828767123287674e-05,
|
743 |
+
"loss": 1.3192,
|
744 |
+
"step": 105
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 0.15487170121450095,
|
748 |
+
"grad_norm": 0.20559488236904144,
|
749 |
+
"learning_rate": 1.9794520547945207e-05,
|
750 |
+
"loss": 1.2096,
|
751 |
+
"step": 106
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"epoch": 0.1563327549995434,
|
755 |
+
"grad_norm": 0.17568816244602203,
|
756 |
+
"learning_rate": 1.9760273972602743e-05,
|
757 |
+
"loss": 1.3795,
|
758 |
+
"step": 107
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 0.15779380878458588,
|
762 |
+
"grad_norm": 0.1778278350830078,
|
763 |
+
"learning_rate": 1.9726027397260276e-05,
|
764 |
+
"loss": 1.3146,
|
765 |
+
"step": 108
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"epoch": 0.15925486256962834,
|
769 |
+
"grad_norm": 0.18488670885562897,
|
770 |
+
"learning_rate": 1.969178082191781e-05,
|
771 |
+
"loss": 1.4105,
|
772 |
+
"step": 109
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"epoch": 0.1607159163546708,
|
776 |
+
"grad_norm": 0.1593291312456131,
|
777 |
+
"learning_rate": 1.9657534246575346e-05,
|
778 |
+
"loss": 1.3054,
|
779 |
+
"step": 110
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 0.16217697013971327,
|
783 |
+
"grad_norm": 0.14311783015727997,
|
784 |
+
"learning_rate": 1.962328767123288e-05,
|
785 |
+
"loss": 1.3985,
|
786 |
+
"step": 111
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"epoch": 0.16363802392475574,
|
790 |
+
"grad_norm": 0.14948627352714539,
|
791 |
+
"learning_rate": 1.9589041095890412e-05,
|
792 |
+
"loss": 1.3395,
|
793 |
+
"step": 112
|
794 |
+
},
|
795 |
+
{
|
796 |
+
"epoch": 0.1650990777097982,
|
797 |
+
"grad_norm": 0.14075608551502228,
|
798 |
+
"learning_rate": 1.9554794520547945e-05,
|
799 |
+
"loss": 1.3868,
|
800 |
+
"step": 113
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 0.16656013149484064,
|
804 |
+
"grad_norm": 0.14439420402050018,
|
805 |
+
"learning_rate": 1.952054794520548e-05,
|
806 |
+
"loss": 1.2985,
|
807 |
+
"step": 114
|
808 |
+
},
|
809 |
+
{
|
810 |
+
"epoch": 0.1680211852798831,
|
811 |
+
"grad_norm": 0.13425147533416748,
|
812 |
+
"learning_rate": 1.9486301369863014e-05,
|
813 |
+
"loss": 1.3855,
|
814 |
+
"step": 115
|
815 |
+
},
|
816 |
+
{
|
817 |
+
"epoch": 0.16948223906492557,
|
818 |
+
"grad_norm": 0.1291724145412445,
|
819 |
+
"learning_rate": 1.945205479452055e-05,
|
820 |
+
"loss": 1.2942,
|
821 |
+
"step": 116
|
822 |
}
|
823 |
],
|
824 |
"logging_steps": 1,
|
|
|
838 |
"attributes": {}
|
839 |
}
|
840 |
},
|
841 |
+
"total_flos": 1.304737835336663e+17,
|
842 |
"train_batch_size": 4,
|
843 |
"trial_name": null,
|
844 |
"trial_params": null
|