DuongTrongChi
commited on
Commit
•
9efdc17
1
Parent(s):
c0491be
Training in progress, step 428, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76fd31fb9f62306330a62d03ac774b78f6f28c2f740c1c11d27cc0b459d180f7
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a32e133fd2812edc1c5dce1be27b5a46fee4bd8f173f66ca7b3afda005973393
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e4d88d6aa0cf77e00d03223bf672dc6270466833d08ef64560b03a03290bd1e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2702,6 +2702,307 @@
|
|
2702 |
"learning_rate": 1.0239726027397261e-05,
|
2703 |
"loss": 1.2105,
|
2704 |
"step": 385
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2705 |
}
|
2706 |
],
|
2707 |
"logging_steps": 1,
|
@@ -2721,7 +3022,7 @@
|
|
2721 |
"attributes": {}
|
2722 |
}
|
2723 |
},
|
2724 |
-
"total_flos": 4.
|
2725 |
"train_batch_size": 4,
|
2726 |
"trial_name": null,
|
2727 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6253310199981736,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 428,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2702 |
"learning_rate": 1.0239726027397261e-05,
|
2703 |
"loss": 1.2105,
|
2704 |
"step": 385
|
2705 |
+
},
|
2706 |
+
{
|
2707 |
+
"epoch": 0.5639667610263903,
|
2708 |
+
"grad_norm": 0.10605639219284058,
|
2709 |
+
"learning_rate": 1.0205479452054796e-05,
|
2710 |
+
"loss": 1.232,
|
2711 |
+
"step": 386
|
2712 |
+
},
|
2713 |
+
{
|
2714 |
+
"epoch": 0.5654278148114328,
|
2715 |
+
"grad_norm": 0.10501563549041748,
|
2716 |
+
"learning_rate": 1.0171232876712329e-05,
|
2717 |
+
"loss": 1.3193,
|
2718 |
+
"step": 387
|
2719 |
+
},
|
2720 |
+
{
|
2721 |
+
"epoch": 0.5668888685964752,
|
2722 |
+
"grad_norm": 0.1139717772603035,
|
2723 |
+
"learning_rate": 1.0136986301369864e-05,
|
2724 |
+
"loss": 1.1178,
|
2725 |
+
"step": 388
|
2726 |
+
},
|
2727 |
+
{
|
2728 |
+
"epoch": 0.5683499223815177,
|
2729 |
+
"grad_norm": 0.10598957538604736,
|
2730 |
+
"learning_rate": 1.0102739726027397e-05,
|
2731 |
+
"loss": 1.2438,
|
2732 |
+
"step": 389
|
2733 |
+
},
|
2734 |
+
{
|
2735 |
+
"epoch": 0.5698109761665602,
|
2736 |
+
"grad_norm": 0.10228073596954346,
|
2737 |
+
"learning_rate": 1.0068493150684933e-05,
|
2738 |
+
"loss": 1.2064,
|
2739 |
+
"step": 390
|
2740 |
+
},
|
2741 |
+
{
|
2742 |
+
"epoch": 0.5712720299516026,
|
2743 |
+
"grad_norm": 0.10198397189378738,
|
2744 |
+
"learning_rate": 1.0034246575342466e-05,
|
2745 |
+
"loss": 1.2535,
|
2746 |
+
"step": 391
|
2747 |
+
},
|
2748 |
+
{
|
2749 |
+
"epoch": 0.5727330837366451,
|
2750 |
+
"grad_norm": 0.12202975898981094,
|
2751 |
+
"learning_rate": 1e-05,
|
2752 |
+
"loss": 1.1619,
|
2753 |
+
"step": 392
|
2754 |
+
},
|
2755 |
+
{
|
2756 |
+
"epoch": 0.5741941375216875,
|
2757 |
+
"grad_norm": 0.11935204267501831,
|
2758 |
+
"learning_rate": 9.965753424657536e-06,
|
2759 |
+
"loss": 1.2056,
|
2760 |
+
"step": 393
|
2761 |
+
},
|
2762 |
+
{
|
2763 |
+
"epoch": 0.5756551913067299,
|
2764 |
+
"grad_norm": 0.10678353160619736,
|
2765 |
+
"learning_rate": 9.931506849315069e-06,
|
2766 |
+
"loss": 1.2223,
|
2767 |
+
"step": 394
|
2768 |
+
},
|
2769 |
+
{
|
2770 |
+
"epoch": 0.5771162450917724,
|
2771 |
+
"grad_norm": 0.10997404158115387,
|
2772 |
+
"learning_rate": 9.897260273972603e-06,
|
2773 |
+
"loss": 1.2401,
|
2774 |
+
"step": 395
|
2775 |
+
},
|
2776 |
+
{
|
2777 |
+
"epoch": 0.5785772988768149,
|
2778 |
+
"grad_norm": 0.11465183645486832,
|
2779 |
+
"learning_rate": 9.863013698630138e-06,
|
2780 |
+
"loss": 1.2073,
|
2781 |
+
"step": 396
|
2782 |
+
},
|
2783 |
+
{
|
2784 |
+
"epoch": 0.5800383526618573,
|
2785 |
+
"grad_norm": 0.13768929243087769,
|
2786 |
+
"learning_rate": 9.828767123287673e-06,
|
2787 |
+
"loss": 1.1872,
|
2788 |
+
"step": 397
|
2789 |
+
},
|
2790 |
+
{
|
2791 |
+
"epoch": 0.5814994064468998,
|
2792 |
+
"grad_norm": 0.12065139412879944,
|
2793 |
+
"learning_rate": 9.794520547945206e-06,
|
2794 |
+
"loss": 1.2145,
|
2795 |
+
"step": 398
|
2796 |
+
},
|
2797 |
+
{
|
2798 |
+
"epoch": 0.5829604602319423,
|
2799 |
+
"grad_norm": 0.10538379102945328,
|
2800 |
+
"learning_rate": 9.76027397260274e-06,
|
2801 |
+
"loss": 1.217,
|
2802 |
+
"step": 399
|
2803 |
+
},
|
2804 |
+
{
|
2805 |
+
"epoch": 0.5844215140169847,
|
2806 |
+
"grad_norm": 0.09868345409631729,
|
2807 |
+
"learning_rate": 9.726027397260275e-06,
|
2808 |
+
"loss": 1.255,
|
2809 |
+
"step": 400
|
2810 |
+
},
|
2811 |
+
{
|
2812 |
+
"epoch": 0.5858825678020272,
|
2813 |
+
"grad_norm": 0.10661034286022186,
|
2814 |
+
"learning_rate": 9.691780821917808e-06,
|
2815 |
+
"loss": 1.1742,
|
2816 |
+
"step": 401
|
2817 |
+
},
|
2818 |
+
{
|
2819 |
+
"epoch": 0.5873436215870697,
|
2820 |
+
"grad_norm": 0.11624684184789658,
|
2821 |
+
"learning_rate": 9.657534246575343e-06,
|
2822 |
+
"loss": 1.1662,
|
2823 |
+
"step": 402
|
2824 |
+
},
|
2825 |
+
{
|
2826 |
+
"epoch": 0.5888046753721121,
|
2827 |
+
"grad_norm": 0.11101629585027695,
|
2828 |
+
"learning_rate": 9.623287671232878e-06,
|
2829 |
+
"loss": 1.218,
|
2830 |
+
"step": 403
|
2831 |
+
},
|
2832 |
+
{
|
2833 |
+
"epoch": 0.5902657291571546,
|
2834 |
+
"grad_norm": 0.13213178515434265,
|
2835 |
+
"learning_rate": 9.589041095890411e-06,
|
2836 |
+
"loss": 1.133,
|
2837 |
+
"step": 404
|
2838 |
+
},
|
2839 |
+
{
|
2840 |
+
"epoch": 0.591726782942197,
|
2841 |
+
"grad_norm": 0.11517394334077835,
|
2842 |
+
"learning_rate": 9.554794520547946e-06,
|
2843 |
+
"loss": 1.1966,
|
2844 |
+
"step": 405
|
2845 |
+
},
|
2846 |
+
{
|
2847 |
+
"epoch": 0.5931878367272395,
|
2848 |
+
"grad_norm": 0.10408038645982742,
|
2849 |
+
"learning_rate": 9.52054794520548e-06,
|
2850 |
+
"loss": 1.1859,
|
2851 |
+
"step": 406
|
2852 |
+
},
|
2853 |
+
{
|
2854 |
+
"epoch": 0.594648890512282,
|
2855 |
+
"grad_norm": 0.1159515306353569,
|
2856 |
+
"learning_rate": 9.486301369863015e-06,
|
2857 |
+
"loss": 1.2286,
|
2858 |
+
"step": 407
|
2859 |
+
},
|
2860 |
+
{
|
2861 |
+
"epoch": 0.5961099442973244,
|
2862 |
+
"grad_norm": 0.11420222371816635,
|
2863 |
+
"learning_rate": 9.452054794520548e-06,
|
2864 |
+
"loss": 1.0952,
|
2865 |
+
"step": 408
|
2866 |
+
},
|
2867 |
+
{
|
2868 |
+
"epoch": 0.5975709980823669,
|
2869 |
+
"grad_norm": 0.11413077265024185,
|
2870 |
+
"learning_rate": 9.417808219178083e-06,
|
2871 |
+
"loss": 1.2159,
|
2872 |
+
"step": 409
|
2873 |
+
},
|
2874 |
+
{
|
2875 |
+
"epoch": 0.5990320518674094,
|
2876 |
+
"grad_norm": 0.12136485427618027,
|
2877 |
+
"learning_rate": 9.383561643835618e-06,
|
2878 |
+
"loss": 1.2166,
|
2879 |
+
"step": 410
|
2880 |
+
},
|
2881 |
+
{
|
2882 |
+
"epoch": 0.6004931056524518,
|
2883 |
+
"grad_norm": 0.12264648824930191,
|
2884 |
+
"learning_rate": 9.34931506849315e-06,
|
2885 |
+
"loss": 1.1057,
|
2886 |
+
"step": 411
|
2887 |
+
},
|
2888 |
+
{
|
2889 |
+
"epoch": 0.6019541594374943,
|
2890 |
+
"grad_norm": 0.10724509507417679,
|
2891 |
+
"learning_rate": 9.315068493150685e-06,
|
2892 |
+
"loss": 1.1633,
|
2893 |
+
"step": 412
|
2894 |
+
},
|
2895 |
+
{
|
2896 |
+
"epoch": 0.6034152132225368,
|
2897 |
+
"grad_norm": 0.11786479502916336,
|
2898 |
+
"learning_rate": 9.28082191780822e-06,
|
2899 |
+
"loss": 1.1942,
|
2900 |
+
"step": 413
|
2901 |
+
},
|
2902 |
+
{
|
2903 |
+
"epoch": 0.6048762670075792,
|
2904 |
+
"grad_norm": 0.10697019845247269,
|
2905 |
+
"learning_rate": 9.246575342465755e-06,
|
2906 |
+
"loss": 1.265,
|
2907 |
+
"step": 414
|
2908 |
+
},
|
2909 |
+
{
|
2910 |
+
"epoch": 0.6063373207926217,
|
2911 |
+
"grad_norm": 0.11874634027481079,
|
2912 |
+
"learning_rate": 9.212328767123288e-06,
|
2913 |
+
"loss": 1.2373,
|
2914 |
+
"step": 415
|
2915 |
+
},
|
2916 |
+
{
|
2917 |
+
"epoch": 0.6077983745776642,
|
2918 |
+
"grad_norm": 0.11465580761432648,
|
2919 |
+
"learning_rate": 9.178082191780823e-06,
|
2920 |
+
"loss": 1.1977,
|
2921 |
+
"step": 416
|
2922 |
+
},
|
2923 |
+
{
|
2924 |
+
"epoch": 0.6092594283627066,
|
2925 |
+
"grad_norm": 0.11860576272010803,
|
2926 |
+
"learning_rate": 9.143835616438357e-06,
|
2927 |
+
"loss": 1.1474,
|
2928 |
+
"step": 417
|
2929 |
+
},
|
2930 |
+
{
|
2931 |
+
"epoch": 0.6107204821477491,
|
2932 |
+
"grad_norm": 0.11340127140283585,
|
2933 |
+
"learning_rate": 9.10958904109589e-06,
|
2934 |
+
"loss": 1.219,
|
2935 |
+
"step": 418
|
2936 |
+
},
|
2937 |
+
{
|
2938 |
+
"epoch": 0.6121815359327916,
|
2939 |
+
"grad_norm": 0.1260974407196045,
|
2940 |
+
"learning_rate": 9.075342465753425e-06,
|
2941 |
+
"loss": 1.1837,
|
2942 |
+
"step": 419
|
2943 |
+
},
|
2944 |
+
{
|
2945 |
+
"epoch": 0.613642589717834,
|
2946 |
+
"grad_norm": 0.1299670934677124,
|
2947 |
+
"learning_rate": 9.04109589041096e-06,
|
2948 |
+
"loss": 1.0615,
|
2949 |
+
"step": 420
|
2950 |
+
},
|
2951 |
+
{
|
2952 |
+
"epoch": 0.6151036435028765,
|
2953 |
+
"grad_norm": 0.10845065861940384,
|
2954 |
+
"learning_rate": 9.006849315068495e-06,
|
2955 |
+
"loss": 1.2173,
|
2956 |
+
"step": 421
|
2957 |
+
},
|
2958 |
+
{
|
2959 |
+
"epoch": 0.616564697287919,
|
2960 |
+
"grad_norm": 0.10730204731225967,
|
2961 |
+
"learning_rate": 8.972602739726028e-06,
|
2962 |
+
"loss": 1.1314,
|
2963 |
+
"step": 422
|
2964 |
+
},
|
2965 |
+
{
|
2966 |
+
"epoch": 0.6180257510729614,
|
2967 |
+
"grad_norm": 0.10890056192874908,
|
2968 |
+
"learning_rate": 8.938356164383562e-06,
|
2969 |
+
"loss": 1.2128,
|
2970 |
+
"step": 423
|
2971 |
+
},
|
2972 |
+
{
|
2973 |
+
"epoch": 0.6194868048580038,
|
2974 |
+
"grad_norm": 0.11053816974163055,
|
2975 |
+
"learning_rate": 8.904109589041097e-06,
|
2976 |
+
"loss": 1.2554,
|
2977 |
+
"step": 424
|
2978 |
+
},
|
2979 |
+
{
|
2980 |
+
"epoch": 0.6209478586430462,
|
2981 |
+
"grad_norm": 0.1105181872844696,
|
2982 |
+
"learning_rate": 8.86986301369863e-06,
|
2983 |
+
"loss": 1.1943,
|
2984 |
+
"step": 425
|
2985 |
+
},
|
2986 |
+
{
|
2987 |
+
"epoch": 0.6224089124280887,
|
2988 |
+
"grad_norm": 0.12110709398984909,
|
2989 |
+
"learning_rate": 8.835616438356165e-06,
|
2990 |
+
"loss": 1.2072,
|
2991 |
+
"step": 426
|
2992 |
+
},
|
2993 |
+
{
|
2994 |
+
"epoch": 0.6238699662131312,
|
2995 |
+
"grad_norm": 0.12286946922540665,
|
2996 |
+
"learning_rate": 8.8013698630137e-06,
|
2997 |
+
"loss": 1.2465,
|
2998 |
+
"step": 427
|
2999 |
+
},
|
3000 |
+
{
|
3001 |
+
"epoch": 0.6253310199981736,
|
3002 |
+
"grad_norm": 0.132927805185318,
|
3003 |
+
"learning_rate": 8.767123287671233e-06,
|
3004 |
+
"loss": 1.1644,
|
3005 |
+
"step": 428
|
3006 |
}
|
3007 |
],
|
3008 |
"logging_steps": 1,
|
|
|
3022 |
"attributes": {}
|
3023 |
}
|
3024 |
},
|
3025 |
+
"total_flos": 4.8293970753705984e+17,
|
3026 |
"train_batch_size": 4,
|
3027 |
"trial_name": null,
|
3028 |
"trial_params": null
|