joeylieb commited on
Commit
531fb14
·
verified ·
1 Parent(s): e12f226

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +153 -3
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e1c292adb4571bcaa540df997b7c0994c85f8419f91bb4efad4442f0634536
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59abd203968c30b325ef963aefd158405ffb0c164592824677d3b5d2487de3e8
3
  size 497774208
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00fd9310f86fbfb542d904f836dc8bd2b7ead76be499b38f559436d9efe5bc7a
3
  size 995642298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ac204cab4a817e8100cdb033c0c8ab1372a7b86cfb3c0706f570c2892f63b4
3
  size 995642298
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bd98c522c8acadc7236c4dec9a6d7f2c45122d9cdb2534e9e6ca864d9b22d54
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f57f0509bd63dc113aa1ccd67357a9fe454b5bd996f35077379f952d2eb5851
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7efd851823c41e299dc26218b34ebe2f67e3f195ba01b8ffc6443fa63eb93a3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bc67182f48043705bcf8b44274c98ceceabf129e73caaf0b5381b0cb1f4e36f
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.416276407534603,
5
  "eval_steps": 100,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,156 @@
607
  "eval_samples_per_second": 12.103,
608
  "eval_steps_per_second": 1.513,
609
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 100,
@@ -626,7 +776,7 @@
626
  "attributes": {}
627
  }
628
  },
629
- "total_flos": 8361345024000000.0,
630
  "train_batch_size": 8,
631
  "trial_name": null,
632
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5203455094182537,
5
  "eval_steps": 100,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_samples_per_second": 12.103,
608
  "eval_steps_per_second": 1.513,
609
  "step": 4000
610
+ },
611
+ {
612
+ "epoch": 0.426683317722968,
613
+ "grad_norm": 6.157552242279053,
614
+ "learning_rate": 4.2888611371283866e-05,
615
+ "loss": 3.3412,
616
+ "step": 4100
617
+ },
618
+ {
619
+ "epoch": 0.426683317722968,
620
+ "eval_loss": 3.834092140197754,
621
+ "eval_runtime": 3534.2463,
622
+ "eval_samples_per_second": 11.864,
623
+ "eval_steps_per_second": 1.483,
624
+ "step": 4100
625
+ },
626
+ {
627
+ "epoch": 0.4370902279113331,
628
+ "grad_norm": 6.643843650817871,
629
+ "learning_rate": 4.271516286814445e-05,
630
+ "loss": 3.2945,
631
+ "step": 4200
632
+ },
633
+ {
634
+ "epoch": 0.4370902279113331,
635
+ "eval_loss": 3.8426012992858887,
636
+ "eval_runtime": 4149.9023,
637
+ "eval_samples_per_second": 10.104,
638
+ "eval_steps_per_second": 1.263,
639
+ "step": 4200
640
+ },
641
+ {
642
+ "epoch": 0.4474971380996982,
643
+ "grad_norm": 6.451257705688477,
644
+ "learning_rate": 4.254171436500503e-05,
645
+ "loss": 3.2203,
646
+ "step": 4300
647
+ },
648
+ {
649
+ "epoch": 0.4474971380996982,
650
+ "eval_loss": 3.840031147003174,
651
+ "eval_runtime": 4060.1081,
652
+ "eval_samples_per_second": 10.328,
653
+ "eval_steps_per_second": 1.291,
654
+ "step": 4300
655
+ },
656
+ {
657
+ "epoch": 0.45790404828806325,
658
+ "grad_norm": 7.092510223388672,
659
+ "learning_rate": 4.2368265861865615e-05,
660
+ "loss": 3.2779,
661
+ "step": 4400
662
+ },
663
+ {
664
+ "epoch": 0.45790404828806325,
665
+ "eval_loss": 3.8369994163513184,
666
+ "eval_runtime": 4092.0714,
667
+ "eval_samples_per_second": 10.247,
668
+ "eval_steps_per_second": 1.281,
669
+ "step": 4400
670
+ },
671
+ {
672
+ "epoch": 0.46831095847642834,
673
+ "grad_norm": 6.0258588790893555,
674
+ "learning_rate": 4.2194817358726196e-05,
675
+ "loss": 3.1867,
676
+ "step": 4500
677
+ },
678
+ {
679
+ "epoch": 0.46831095847642834,
680
+ "eval_loss": 3.837413787841797,
681
+ "eval_runtime": 4068.8274,
682
+ "eval_samples_per_second": 10.306,
683
+ "eval_steps_per_second": 1.288,
684
+ "step": 4500
685
+ },
686
+ {
687
+ "epoch": 0.47871786866479343,
688
+ "grad_norm": 6.42568302154541,
689
+ "learning_rate": 4.202136885558678e-05,
690
+ "loss": 3.2981,
691
+ "step": 4600
692
+ },
693
+ {
694
+ "epoch": 0.47871786866479343,
695
+ "eval_loss": 3.8490710258483887,
696
+ "eval_runtime": 4072.501,
697
+ "eval_samples_per_second": 10.296,
698
+ "eval_steps_per_second": 1.287,
699
+ "step": 4600
700
+ },
701
+ {
702
+ "epoch": 0.48912477885315847,
703
+ "grad_norm": 4.5895466804504395,
704
+ "learning_rate": 4.1847920352447364e-05,
705
+ "loss": 3.2523,
706
+ "step": 4700
707
+ },
708
+ {
709
+ "epoch": 0.48912477885315847,
710
+ "eval_loss": 3.8223540782928467,
711
+ "eval_runtime": 3505.4411,
712
+ "eval_samples_per_second": 11.962,
713
+ "eval_steps_per_second": 1.495,
714
+ "step": 4700
715
+ },
716
+ {
717
+ "epoch": 0.49953168904152356,
718
+ "grad_norm": 4.485264778137207,
719
+ "learning_rate": 4.1674471849307945e-05,
720
+ "loss": 3.2375,
721
+ "step": 4800
722
+ },
723
+ {
724
+ "epoch": 0.49953168904152356,
725
+ "eval_loss": 3.847806692123413,
726
+ "eval_runtime": 3468.0824,
727
+ "eval_samples_per_second": 12.091,
728
+ "eval_steps_per_second": 1.511,
729
+ "step": 4800
730
+ },
731
+ {
732
+ "epoch": 0.5099385992298886,
733
+ "grad_norm": 6.584381103515625,
734
+ "learning_rate": 4.1501023346168526e-05,
735
+ "loss": 3.2327,
736
+ "step": 4900
737
+ },
738
+ {
739
+ "epoch": 0.5099385992298886,
740
+ "eval_loss": 3.849238872528076,
741
+ "eval_runtime": 3470.3976,
742
+ "eval_samples_per_second": 12.083,
743
+ "eval_steps_per_second": 1.51,
744
+ "step": 4900
745
+ },
746
+ {
747
+ "epoch": 0.5203455094182537,
748
+ "grad_norm": 7.7780303955078125,
749
+ "learning_rate": 4.1327574843029107e-05,
750
+ "loss": 3.2209,
751
+ "step": 5000
752
+ },
753
+ {
754
+ "epoch": 0.5203455094182537,
755
+ "eval_loss": 3.855090856552124,
756
+ "eval_runtime": 3470.2986,
757
+ "eval_samples_per_second": 12.083,
758
+ "eval_steps_per_second": 1.511,
759
+ "step": 5000
760
  }
761
  ],
762
  "logging_steps": 100,
 
776
  "attributes": {}
777
  }
778
  },
779
+ "total_flos": 1.045168128e+16,
780
  "train_batch_size": 8,
781
  "trial_name": null,
782
  "trial_params": null