Training in progress, step 837, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11816752771862021d1f1c094679a1a902cd618f90af4e8dba0e4478f5f39b0a
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39351dce492da1ec76082c4d982fa0e400b6d4b4fec021f8577941b139b31751
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e3bbc37b1c4948deb4e7c7ff80c3871b0fd3eb0b6501980c9b6ab76dcbae87d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5586,6 +5586,286 @@
|
|
5586 |
"learning_rate": 2.81134401972873e-06,
|
5587 |
"loss": 1.1065,
|
5588 |
"step": 797
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5589 |
}
|
5590 |
],
|
5591 |
"logging_steps": 1,
|
@@ -5605,7 +5885,7 @@
|
|
5605 |
"attributes": {}
|
5606 |
}
|
5607 |
},
|
5608 |
-
"total_flos": 8.
|
5609 |
"train_batch_size": 4,
|
5610 |
"trial_name": null,
|
5611 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9185185185185185,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 837,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5586 |
"learning_rate": 2.81134401972873e-06,
|
5587 |
"loss": 1.1065,
|
5588 |
"step": 797
|
5589 |
+
},
|
5590 |
+
{
|
5591 |
+
"epoch": 0.8757201646090536,
|
5592 |
+
"grad_norm": 0.13176120817661285,
|
5593 |
+
"learning_rate": 2.7866831072749695e-06,
|
5594 |
+
"loss": 1.1029,
|
5595 |
+
"step": 798
|
5596 |
+
},
|
5597 |
+
{
|
5598 |
+
"epoch": 0.8768175582990397,
|
5599 |
+
"grad_norm": 0.1394006758928299,
|
5600 |
+
"learning_rate": 2.7620221948212084e-06,
|
5601 |
+
"loss": 1.0872,
|
5602 |
+
"step": 799
|
5603 |
+
},
|
5604 |
+
{
|
5605 |
+
"epoch": 0.877914951989026,
|
5606 |
+
"grad_norm": 0.16219663619995117,
|
5607 |
+
"learning_rate": 2.7373612823674478e-06,
|
5608 |
+
"loss": 1.1006,
|
5609 |
+
"step": 800
|
5610 |
+
},
|
5611 |
+
{
|
5612 |
+
"epoch": 0.8790123456790123,
|
5613 |
+
"grad_norm": 0.13275958597660065,
|
5614 |
+
"learning_rate": 2.712700369913687e-06,
|
5615 |
+
"loss": 1.1087,
|
5616 |
+
"step": 801
|
5617 |
+
},
|
5618 |
+
{
|
5619 |
+
"epoch": 0.8801097393689986,
|
5620 |
+
"grad_norm": 0.15880566835403442,
|
5621 |
+
"learning_rate": 2.688039457459926e-06,
|
5622 |
+
"loss": 1.0905,
|
5623 |
+
"step": 802
|
5624 |
+
},
|
5625 |
+
{
|
5626 |
+
"epoch": 0.8812071330589849,
|
5627 |
+
"grad_norm": 0.14361722767353058,
|
5628 |
+
"learning_rate": 2.6633785450061657e-06,
|
5629 |
+
"loss": 1.0701,
|
5630 |
+
"step": 803
|
5631 |
+
},
|
5632 |
+
{
|
5633 |
+
"epoch": 0.8823045267489712,
|
5634 |
+
"grad_norm": 0.13469044864177704,
|
5635 |
+
"learning_rate": 2.6387176325524042e-06,
|
5636 |
+
"loss": 1.1495,
|
5637 |
+
"step": 804
|
5638 |
+
},
|
5639 |
+
{
|
5640 |
+
"epoch": 0.8834019204389575,
|
5641 |
+
"grad_norm": 0.16092169284820557,
|
5642 |
+
"learning_rate": 2.614056720098644e-06,
|
5643 |
+
"loss": 1.074,
|
5644 |
+
"step": 805
|
5645 |
+
},
|
5646 |
+
{
|
5647 |
+
"epoch": 0.8844993141289438,
|
5648 |
+
"grad_norm": 0.13376876711845398,
|
5649 |
+
"learning_rate": 2.5893958076448833e-06,
|
5650 |
+
"loss": 1.1405,
|
5651 |
+
"step": 806
|
5652 |
+
},
|
5653 |
+
{
|
5654 |
+
"epoch": 0.8855967078189301,
|
5655 |
+
"grad_norm": 0.1367831826210022,
|
5656 |
+
"learning_rate": 2.5647348951911222e-06,
|
5657 |
+
"loss": 1.0685,
|
5658 |
+
"step": 807
|
5659 |
+
},
|
5660 |
+
{
|
5661 |
+
"epoch": 0.8866941015089164,
|
5662 |
+
"grad_norm": 0.13057412207126617,
|
5663 |
+
"learning_rate": 2.5400739827373616e-06,
|
5664 |
+
"loss": 1.133,
|
5665 |
+
"step": 808
|
5666 |
+
},
|
5667 |
+
{
|
5668 |
+
"epoch": 0.8877914951989027,
|
5669 |
+
"grad_norm": 0.1330074518918991,
|
5670 |
+
"learning_rate": 2.5154130702836005e-06,
|
5671 |
+
"loss": 1.2354,
|
5672 |
+
"step": 809
|
5673 |
+
},
|
5674 |
+
{
|
5675 |
+
"epoch": 0.8888888888888888,
|
5676 |
+
"grad_norm": 0.15305842459201813,
|
5677 |
+
"learning_rate": 2.49075215782984e-06,
|
5678 |
+
"loss": 1.0574,
|
5679 |
+
"step": 810
|
5680 |
+
},
|
5681 |
+
{
|
5682 |
+
"epoch": 0.8899862825788751,
|
5683 |
+
"grad_norm": 0.13910268247127533,
|
5684 |
+
"learning_rate": 2.466091245376079e-06,
|
5685 |
+
"loss": 1.0733,
|
5686 |
+
"step": 811
|
5687 |
+
},
|
5688 |
+
{
|
5689 |
+
"epoch": 0.8910836762688614,
|
5690 |
+
"grad_norm": 0.13843494653701782,
|
5691 |
+
"learning_rate": 2.441430332922318e-06,
|
5692 |
+
"loss": 1.0353,
|
5693 |
+
"step": 812
|
5694 |
+
},
|
5695 |
+
{
|
5696 |
+
"epoch": 0.8921810699588477,
|
5697 |
+
"grad_norm": 0.14887547492980957,
|
5698 |
+
"learning_rate": 2.416769420468558e-06,
|
5699 |
+
"loss": 0.9972,
|
5700 |
+
"step": 813
|
5701 |
+
},
|
5702 |
+
{
|
5703 |
+
"epoch": 0.893278463648834,
|
5704 |
+
"grad_norm": 0.13981156051158905,
|
5705 |
+
"learning_rate": 2.3921085080147967e-06,
|
5706 |
+
"loss": 1.2125,
|
5707 |
+
"step": 814
|
5708 |
+
},
|
5709 |
+
{
|
5710 |
+
"epoch": 0.8943758573388203,
|
5711 |
+
"grad_norm": 0.13580431044101715,
|
5712 |
+
"learning_rate": 2.367447595561036e-06,
|
5713 |
+
"loss": 1.2606,
|
5714 |
+
"step": 815
|
5715 |
+
},
|
5716 |
+
{
|
5717 |
+
"epoch": 0.8954732510288066,
|
5718 |
+
"grad_norm": 0.14896319806575775,
|
5719 |
+
"learning_rate": 2.342786683107275e-06,
|
5720 |
+
"loss": 1.1067,
|
5721 |
+
"step": 816
|
5722 |
+
},
|
5723 |
+
{
|
5724 |
+
"epoch": 0.8965706447187929,
|
5725 |
+
"grad_norm": 0.1296452283859253,
|
5726 |
+
"learning_rate": 2.3181257706535143e-06,
|
5727 |
+
"loss": 1.0699,
|
5728 |
+
"step": 817
|
5729 |
+
},
|
5730 |
+
{
|
5731 |
+
"epoch": 0.8976680384087792,
|
5732 |
+
"grad_norm": 0.145808607339859,
|
5733 |
+
"learning_rate": 2.2934648581997536e-06,
|
5734 |
+
"loss": 1.1611,
|
5735 |
+
"step": 818
|
5736 |
+
},
|
5737 |
+
{
|
5738 |
+
"epoch": 0.8987654320987655,
|
5739 |
+
"grad_norm": 0.13980808854103088,
|
5740 |
+
"learning_rate": 2.268803945745993e-06,
|
5741 |
+
"loss": 1.0291,
|
5742 |
+
"step": 819
|
5743 |
+
},
|
5744 |
+
{
|
5745 |
+
"epoch": 0.8998628257887518,
|
5746 |
+
"grad_norm": 0.13322117924690247,
|
5747 |
+
"learning_rate": 2.244143033292232e-06,
|
5748 |
+
"loss": 1.0608,
|
5749 |
+
"step": 820
|
5750 |
+
},
|
5751 |
+
{
|
5752 |
+
"epoch": 0.900960219478738,
|
5753 |
+
"grad_norm": 0.1385853886604309,
|
5754 |
+
"learning_rate": 2.219482120838471e-06,
|
5755 |
+
"loss": 1.0595,
|
5756 |
+
"step": 821
|
5757 |
+
},
|
5758 |
+
{
|
5759 |
+
"epoch": 0.9020576131687242,
|
5760 |
+
"grad_norm": 0.16439485549926758,
|
5761 |
+
"learning_rate": 2.1948212083847105e-06,
|
5762 |
+
"loss": 1.031,
|
5763 |
+
"step": 822
|
5764 |
+
},
|
5765 |
+
{
|
5766 |
+
"epoch": 0.9031550068587105,
|
5767 |
+
"grad_norm": 0.12988966703414917,
|
5768 |
+
"learning_rate": 2.1701602959309494e-06,
|
5769 |
+
"loss": 1.0905,
|
5770 |
+
"step": 823
|
5771 |
+
},
|
5772 |
+
{
|
5773 |
+
"epoch": 0.9042524005486968,
|
5774 |
+
"grad_norm": 0.13069093227386475,
|
5775 |
+
"learning_rate": 2.1454993834771887e-06,
|
5776 |
+
"loss": 1.1536,
|
5777 |
+
"step": 824
|
5778 |
+
},
|
5779 |
+
{
|
5780 |
+
"epoch": 0.9053497942386831,
|
5781 |
+
"grad_norm": 0.13863211870193481,
|
5782 |
+
"learning_rate": 2.120838471023428e-06,
|
5783 |
+
"loss": 1.1898,
|
5784 |
+
"step": 825
|
5785 |
+
},
|
5786 |
+
{
|
5787 |
+
"epoch": 0.9064471879286694,
|
5788 |
+
"grad_norm": 0.14132994413375854,
|
5789 |
+
"learning_rate": 2.0961775585696674e-06,
|
5790 |
+
"loss": 1.1759,
|
5791 |
+
"step": 826
|
5792 |
+
},
|
5793 |
+
{
|
5794 |
+
"epoch": 0.9075445816186557,
|
5795 |
+
"grad_norm": 0.14824488759040833,
|
5796 |
+
"learning_rate": 2.0715166461159063e-06,
|
5797 |
+
"loss": 1.0744,
|
5798 |
+
"step": 827
|
5799 |
+
},
|
5800 |
+
{
|
5801 |
+
"epoch": 0.908641975308642,
|
5802 |
+
"grad_norm": 0.1388639658689499,
|
5803 |
+
"learning_rate": 2.0468557336621456e-06,
|
5804 |
+
"loss": 1.0687,
|
5805 |
+
"step": 828
|
5806 |
+
},
|
5807 |
+
{
|
5808 |
+
"epoch": 0.9097393689986283,
|
5809 |
+
"grad_norm": 0.14056843519210815,
|
5810 |
+
"learning_rate": 2.022194821208385e-06,
|
5811 |
+
"loss": 1.1299,
|
5812 |
+
"step": 829
|
5813 |
+
},
|
5814 |
+
{
|
5815 |
+
"epoch": 0.9108367626886146,
|
5816 |
+
"grad_norm": 0.1364564299583435,
|
5817 |
+
"learning_rate": 1.9975339087546243e-06,
|
5818 |
+
"loss": 1.1216,
|
5819 |
+
"step": 830
|
5820 |
+
},
|
5821 |
+
{
|
5822 |
+
"epoch": 0.9119341563786009,
|
5823 |
+
"grad_norm": 0.14670343697071075,
|
5824 |
+
"learning_rate": 1.9728729963008632e-06,
|
5825 |
+
"loss": 1.0785,
|
5826 |
+
"step": 831
|
5827 |
+
},
|
5828 |
+
{
|
5829 |
+
"epoch": 0.9130315500685872,
|
5830 |
+
"grad_norm": 0.13665646314620972,
|
5831 |
+
"learning_rate": 1.9482120838471025e-06,
|
5832 |
+
"loss": 1.0355,
|
5833 |
+
"step": 832
|
5834 |
+
},
|
5835 |
+
{
|
5836 |
+
"epoch": 0.9141289437585733,
|
5837 |
+
"grad_norm": 0.1377921998500824,
|
5838 |
+
"learning_rate": 1.9235511713933415e-06,
|
5839 |
+
"loss": 1.1445,
|
5840 |
+
"step": 833
|
5841 |
+
},
|
5842 |
+
{
|
5843 |
+
"epoch": 0.9152263374485596,
|
5844 |
+
"grad_norm": 0.12789370119571686,
|
5845 |
+
"learning_rate": 1.8988902589395808e-06,
|
5846 |
+
"loss": 1.1242,
|
5847 |
+
"step": 834
|
5848 |
+
},
|
5849 |
+
{
|
5850 |
+
"epoch": 0.9163237311385459,
|
5851 |
+
"grad_norm": 0.12622785568237305,
|
5852 |
+
"learning_rate": 1.8742293464858201e-06,
|
5853 |
+
"loss": 1.1358,
|
5854 |
+
"step": 835
|
5855 |
+
},
|
5856 |
+
{
|
5857 |
+
"epoch": 0.9174211248285322,
|
5858 |
+
"grad_norm": 0.14954856038093567,
|
5859 |
+
"learning_rate": 1.8495684340320595e-06,
|
5860 |
+
"loss": 1.0822,
|
5861 |
+
"step": 836
|
5862 |
+
},
|
5863 |
+
{
|
5864 |
+
"epoch": 0.9185185185185185,
|
5865 |
+
"grad_norm": 0.12256734073162079,
|
5866 |
+
"learning_rate": 1.8249075215782986e-06,
|
5867 |
+
"loss": 1.0968,
|
5868 |
+
"step": 837
|
5869 |
}
|
5870 |
],
|
5871 |
"logging_steps": 1,
|
|
|
5885 |
"attributes": {}
|
5886 |
}
|
5887 |
},
|
5888 |
+
"total_flos": 8.67918284112937e+17,
|
5889 |
"train_batch_size": 4,
|
5890 |
"trial_name": null,
|
5891 |
"trial_params": null
|