DuongTrongChi commited on
Commit
9efdc17
1 Parent(s): c0491be

Training in progress, step 428, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:636cede6b3e50ff1861e2dc1273f6134e7156308fc6d82d873c1502b6d314e41
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76fd31fb9f62306330a62d03ac774b78f6f28c2f740c1c11d27cc0b459d180f7
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd537c19ebae2b08dbd0494d960639a61ecf609956482e67a546d6c169289698
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a32e133fd2812edc1c5dce1be27b5a46fee4bd8f173f66ca7b3afda005973393
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2475b14d80337b5e82132cfdcc36b578188577f583a180c04ac0c29d7bf259cc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4d88d6aa0cf77e00d03223bf672dc6270466833d08ef64560b03a03290bd1e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5625057072413479,
5
  "eval_steps": 500,
6
- "global_step": 385,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2702,6 +2702,307 @@
2702
  "learning_rate": 1.0239726027397261e-05,
2703
  "loss": 1.2105,
2704
  "step": 385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2705
  }
2706
  ],
2707
  "logging_steps": 1,
@@ -2721,7 +3022,7 @@
2721
  "attributes": {}
2722
  }
2723
  },
2724
- "total_flos": 4.340642918557778e+17,
2725
  "train_batch_size": 4,
2726
  "trial_name": null,
2727
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6253310199981736,
5
  "eval_steps": 500,
6
+ "global_step": 428,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2702
  "learning_rate": 1.0239726027397261e-05,
2703
  "loss": 1.2105,
2704
  "step": 385
2705
+ },
2706
+ {
2707
+ "epoch": 0.5639667610263903,
2708
+ "grad_norm": 0.10605639219284058,
2709
+ "learning_rate": 1.0205479452054796e-05,
2710
+ "loss": 1.232,
2711
+ "step": 386
2712
+ },
2713
+ {
2714
+ "epoch": 0.5654278148114328,
2715
+ "grad_norm": 0.10501563549041748,
2716
+ "learning_rate": 1.0171232876712329e-05,
2717
+ "loss": 1.3193,
2718
+ "step": 387
2719
+ },
2720
+ {
2721
+ "epoch": 0.5668888685964752,
2722
+ "grad_norm": 0.1139717772603035,
2723
+ "learning_rate": 1.0136986301369864e-05,
2724
+ "loss": 1.1178,
2725
+ "step": 388
2726
+ },
2727
+ {
2728
+ "epoch": 0.5683499223815177,
2729
+ "grad_norm": 0.10598957538604736,
2730
+ "learning_rate": 1.0102739726027397e-05,
2731
+ "loss": 1.2438,
2732
+ "step": 389
2733
+ },
2734
+ {
2735
+ "epoch": 0.5698109761665602,
2736
+ "grad_norm": 0.10228073596954346,
2737
+ "learning_rate": 1.0068493150684933e-05,
2738
+ "loss": 1.2064,
2739
+ "step": 390
2740
+ },
2741
+ {
2742
+ "epoch": 0.5712720299516026,
2743
+ "grad_norm": 0.10198397189378738,
2744
+ "learning_rate": 1.0034246575342466e-05,
2745
+ "loss": 1.2535,
2746
+ "step": 391
2747
+ },
2748
+ {
2749
+ "epoch": 0.5727330837366451,
2750
+ "grad_norm": 0.12202975898981094,
2751
+ "learning_rate": 1e-05,
2752
+ "loss": 1.1619,
2753
+ "step": 392
2754
+ },
2755
+ {
2756
+ "epoch": 0.5741941375216875,
2757
+ "grad_norm": 0.11935204267501831,
2758
+ "learning_rate": 9.965753424657536e-06,
2759
+ "loss": 1.2056,
2760
+ "step": 393
2761
+ },
2762
+ {
2763
+ "epoch": 0.5756551913067299,
2764
+ "grad_norm": 0.10678353160619736,
2765
+ "learning_rate": 9.931506849315069e-06,
2766
+ "loss": 1.2223,
2767
+ "step": 394
2768
+ },
2769
+ {
2770
+ "epoch": 0.5771162450917724,
2771
+ "grad_norm": 0.10997404158115387,
2772
+ "learning_rate": 9.897260273972603e-06,
2773
+ "loss": 1.2401,
2774
+ "step": 395
2775
+ },
2776
+ {
2777
+ "epoch": 0.5785772988768149,
2778
+ "grad_norm": 0.11465183645486832,
2779
+ "learning_rate": 9.863013698630138e-06,
2780
+ "loss": 1.2073,
2781
+ "step": 396
2782
+ },
2783
+ {
2784
+ "epoch": 0.5800383526618573,
2785
+ "grad_norm": 0.13768929243087769,
2786
+ "learning_rate": 9.828767123287673e-06,
2787
+ "loss": 1.1872,
2788
+ "step": 397
2789
+ },
2790
+ {
2791
+ "epoch": 0.5814994064468998,
2792
+ "grad_norm": 0.12065139412879944,
2793
+ "learning_rate": 9.794520547945206e-06,
2794
+ "loss": 1.2145,
2795
+ "step": 398
2796
+ },
2797
+ {
2798
+ "epoch": 0.5829604602319423,
2799
+ "grad_norm": 0.10538379102945328,
2800
+ "learning_rate": 9.76027397260274e-06,
2801
+ "loss": 1.217,
2802
+ "step": 399
2803
+ },
2804
+ {
2805
+ "epoch": 0.5844215140169847,
2806
+ "grad_norm": 0.09868345409631729,
2807
+ "learning_rate": 9.726027397260275e-06,
2808
+ "loss": 1.255,
2809
+ "step": 400
2810
+ },
2811
+ {
2812
+ "epoch": 0.5858825678020272,
2813
+ "grad_norm": 0.10661034286022186,
2814
+ "learning_rate": 9.691780821917808e-06,
2815
+ "loss": 1.1742,
2816
+ "step": 401
2817
+ },
2818
+ {
2819
+ "epoch": 0.5873436215870697,
2820
+ "grad_norm": 0.11624684184789658,
2821
+ "learning_rate": 9.657534246575343e-06,
2822
+ "loss": 1.1662,
2823
+ "step": 402
2824
+ },
2825
+ {
2826
+ "epoch": 0.5888046753721121,
2827
+ "grad_norm": 0.11101629585027695,
2828
+ "learning_rate": 9.623287671232878e-06,
2829
+ "loss": 1.218,
2830
+ "step": 403
2831
+ },
2832
+ {
2833
+ "epoch": 0.5902657291571546,
2834
+ "grad_norm": 0.13213178515434265,
2835
+ "learning_rate": 9.589041095890411e-06,
2836
+ "loss": 1.133,
2837
+ "step": 404
2838
+ },
2839
+ {
2840
+ "epoch": 0.591726782942197,
2841
+ "grad_norm": 0.11517394334077835,
2842
+ "learning_rate": 9.554794520547946e-06,
2843
+ "loss": 1.1966,
2844
+ "step": 405
2845
+ },
2846
+ {
2847
+ "epoch": 0.5931878367272395,
2848
+ "grad_norm": 0.10408038645982742,
2849
+ "learning_rate": 9.52054794520548e-06,
2850
+ "loss": 1.1859,
2851
+ "step": 406
2852
+ },
2853
+ {
2854
+ "epoch": 0.594648890512282,
2855
+ "grad_norm": 0.1159515306353569,
2856
+ "learning_rate": 9.486301369863015e-06,
2857
+ "loss": 1.2286,
2858
+ "step": 407
2859
+ },
2860
+ {
2861
+ "epoch": 0.5961099442973244,
2862
+ "grad_norm": 0.11420222371816635,
2863
+ "learning_rate": 9.452054794520548e-06,
2864
+ "loss": 1.0952,
2865
+ "step": 408
2866
+ },
2867
+ {
2868
+ "epoch": 0.5975709980823669,
2869
+ "grad_norm": 0.11413077265024185,
2870
+ "learning_rate": 9.417808219178083e-06,
2871
+ "loss": 1.2159,
2872
+ "step": 409
2873
+ },
2874
+ {
2875
+ "epoch": 0.5990320518674094,
2876
+ "grad_norm": 0.12136485427618027,
2877
+ "learning_rate": 9.383561643835618e-06,
2878
+ "loss": 1.2166,
2879
+ "step": 410
2880
+ },
2881
+ {
2882
+ "epoch": 0.6004931056524518,
2883
+ "grad_norm": 0.12264648824930191,
2884
+ "learning_rate": 9.34931506849315e-06,
2885
+ "loss": 1.1057,
2886
+ "step": 411
2887
+ },
2888
+ {
2889
+ "epoch": 0.6019541594374943,
2890
+ "grad_norm": 0.10724509507417679,
2891
+ "learning_rate": 9.315068493150685e-06,
2892
+ "loss": 1.1633,
2893
+ "step": 412
2894
+ },
2895
+ {
2896
+ "epoch": 0.6034152132225368,
2897
+ "grad_norm": 0.11786479502916336,
2898
+ "learning_rate": 9.28082191780822e-06,
2899
+ "loss": 1.1942,
2900
+ "step": 413
2901
+ },
2902
+ {
2903
+ "epoch": 0.6048762670075792,
2904
+ "grad_norm": 0.10697019845247269,
2905
+ "learning_rate": 9.246575342465755e-06,
2906
+ "loss": 1.265,
2907
+ "step": 414
2908
+ },
2909
+ {
2910
+ "epoch": 0.6063373207926217,
2911
+ "grad_norm": 0.11874634027481079,
2912
+ "learning_rate": 9.212328767123288e-06,
2913
+ "loss": 1.2373,
2914
+ "step": 415
2915
+ },
2916
+ {
2917
+ "epoch": 0.6077983745776642,
2918
+ "grad_norm": 0.11465580761432648,
2919
+ "learning_rate": 9.178082191780823e-06,
2920
+ "loss": 1.1977,
2921
+ "step": 416
2922
+ },
2923
+ {
2924
+ "epoch": 0.6092594283627066,
2925
+ "grad_norm": 0.11860576272010803,
2926
+ "learning_rate": 9.143835616438357e-06,
2927
+ "loss": 1.1474,
2928
+ "step": 417
2929
+ },
2930
+ {
2931
+ "epoch": 0.6107204821477491,
2932
+ "grad_norm": 0.11340127140283585,
2933
+ "learning_rate": 9.10958904109589e-06,
2934
+ "loss": 1.219,
2935
+ "step": 418
2936
+ },
2937
+ {
2938
+ "epoch": 0.6121815359327916,
2939
+ "grad_norm": 0.1260974407196045,
2940
+ "learning_rate": 9.075342465753425e-06,
2941
+ "loss": 1.1837,
2942
+ "step": 419
2943
+ },
2944
+ {
2945
+ "epoch": 0.613642589717834,
2946
+ "grad_norm": 0.1299670934677124,
2947
+ "learning_rate": 9.04109589041096e-06,
2948
+ "loss": 1.0615,
2949
+ "step": 420
2950
+ },
2951
+ {
2952
+ "epoch": 0.6151036435028765,
2953
+ "grad_norm": 0.10845065861940384,
2954
+ "learning_rate": 9.006849315068495e-06,
2955
+ "loss": 1.2173,
2956
+ "step": 421
2957
+ },
2958
+ {
2959
+ "epoch": 0.616564697287919,
2960
+ "grad_norm": 0.10730204731225967,
2961
+ "learning_rate": 8.972602739726028e-06,
2962
+ "loss": 1.1314,
2963
+ "step": 422
2964
+ },
2965
+ {
2966
+ "epoch": 0.6180257510729614,
2967
+ "grad_norm": 0.10890056192874908,
2968
+ "learning_rate": 8.938356164383562e-06,
2969
+ "loss": 1.2128,
2970
+ "step": 423
2971
+ },
2972
+ {
2973
+ "epoch": 0.6194868048580038,
2974
+ "grad_norm": 0.11053816974163055,
2975
+ "learning_rate": 8.904109589041097e-06,
2976
+ "loss": 1.2554,
2977
+ "step": 424
2978
+ },
2979
+ {
2980
+ "epoch": 0.6209478586430462,
2981
+ "grad_norm": 0.1105181872844696,
2982
+ "learning_rate": 8.86986301369863e-06,
2983
+ "loss": 1.1943,
2984
+ "step": 425
2985
+ },
2986
+ {
2987
+ "epoch": 0.6224089124280887,
2988
+ "grad_norm": 0.12110709398984909,
2989
+ "learning_rate": 8.835616438356165e-06,
2990
+ "loss": 1.2072,
2991
+ "step": 426
2992
+ },
2993
+ {
2994
+ "epoch": 0.6238699662131312,
2995
+ "grad_norm": 0.12286946922540665,
2996
+ "learning_rate": 8.8013698630137e-06,
2997
+ "loss": 1.2465,
2998
+ "step": 427
2999
+ },
3000
+ {
3001
+ "epoch": 0.6253310199981736,
3002
+ "grad_norm": 0.132927805185318,
3003
+ "learning_rate": 8.767123287671233e-06,
3004
+ "loss": 1.1644,
3005
+ "step": 428
3006
  }
3007
  ],
3008
  "logging_steps": 1,
 
3022
  "attributes": {}
3023
  }
3024
  },
3025
+ "total_flos": 4.8293970753705984e+17,
3026
  "train_batch_size": 4,
3027
  "trial_name": null,
3028
  "trial_params": null