ardaspear commited on
Commit
6c361bc
·
verified ·
1 Parent(s): 6f33e93

Training in progress, step 445, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f14d35b2279e1bcb7fb4b7b70f56a7bc720d14962682ca3c0bbb2a32c572689
3
  size 80013120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b1d1f985338e732480dad91d3b0c295a5acc699977017e5ced149d3591f017
3
  size 80013120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:181e4d5770ede9dfda344e6d3f86cf5fb5b89a6d145168efd1058fc35a3da9fa
3
  size 41120084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a75b1d231151f0dd236e71618c7ec5e0bb749f4609fe5f72d57da7bff6262c7e
3
  size 41120084
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d342406ff6e89eb8cdfff5b6dd64949295d3a0a84d5fbc6fc4e72a9f7457db32
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d719228f4ad72adb2eba882474a882697da2ca692f9ff39a991618eed3a74316
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d110b385a7dffb4d471adbd45c914f5fc5f8ea9533c165316859b91f2d4cdd1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7329c50730a00f35c6eac05c0e64503a6ca7eb6faad2aa5a4d5c0dd51c1d12ee
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7563308947664603,
5
  "eval_steps": 112,
6
- "global_step": 336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -508,6 +508,160 @@
508
  "eval_samples_per_second": 12.752,
509
  "eval_steps_per_second": 6.376,
510
  "step": 336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  }
512
  ],
513
  "logging_steps": 5,
@@ -522,12 +676,12 @@
522
  "should_evaluate": false,
523
  "should_log": false,
524
  "should_save": true,
525
- "should_training_stop": false
526
  },
527
  "attributes": {}
528
  }
529
  },
530
- "total_flos": 1.1042063598904934e+17,
531
  "train_batch_size": 2,
532
  "trial_name": null,
533
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0016882386043895,
5
  "eval_steps": 112,
6
+ "global_step": 445,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
508
  "eval_samples_per_second": 12.752,
509
  "eval_steps_per_second": 6.376,
510
  "step": 336
511
+ },
512
+ {
513
+ "epoch": 0.7653348339898706,
514
+ "grad_norm": 0.010809916071593761,
515
+ "learning_rate": 1.3700225403843469e-05,
516
+ "loss": 0.0008,
517
+ "step": 340
518
+ },
519
+ {
520
+ "epoch": 0.7765897580191333,
521
+ "grad_norm": 0.045122772455215454,
522
+ "learning_rate": 1.2482508892179884e-05,
523
+ "loss": 0.0011,
524
+ "step": 345
525
+ },
526
+ {
527
+ "epoch": 0.7878446820483962,
528
+ "grad_norm": 0.008580501191318035,
529
+ "learning_rate": 1.1313708031358183e-05,
530
+ "loss": 0.0017,
531
+ "step": 350
532
+ },
533
+ {
534
+ "epoch": 0.799099606077659,
535
+ "grad_norm": 0.002904525026679039,
536
+ "learning_rate": 1.0195346714717813e-05,
537
+ "loss": 0.0006,
538
+ "step": 355
539
+ },
540
+ {
541
+ "epoch": 0.8103545301069218,
542
+ "grad_norm": 0.015455417335033417,
543
+ "learning_rate": 9.12888307205541e-06,
544
+ "loss": 0.0008,
545
+ "step": 360
546
+ },
547
+ {
548
+ "epoch": 0.8216094541361846,
549
+ "grad_norm": 0.012445817701518536,
550
+ "learning_rate": 8.115707568501768e-06,
551
+ "loss": 0.0009,
552
+ "step": 365
553
+ },
554
+ {
555
+ "epoch": 0.8328643781654473,
556
+ "grad_norm": 0.05810451880097389,
557
+ "learning_rate": 7.157141191620548e-06,
558
+ "loss": 0.0009,
559
+ "step": 370
560
+ },
561
+ {
562
+ "epoch": 0.8441193021947102,
563
+ "grad_norm": 0.006151565816253424,
564
+ "learning_rate": 6.2544337290925185e-06,
565
+ "loss": 0.0008,
566
+ "step": 375
567
+ },
568
+ {
569
+ "epoch": 0.855374226223973,
570
+ "grad_norm": 0.033290039747953415,
571
+ "learning_rate": 5.408762139230888e-06,
572
+ "loss": 0.0008,
573
+ "step": 380
574
+ },
575
+ {
576
+ "epoch": 0.8666291502532358,
577
+ "grad_norm": 0.005590901710093021,
578
+ "learning_rate": 4.621229016452156e-06,
579
+ "loss": 0.0018,
580
+ "step": 385
581
+ },
582
+ {
583
+ "epoch": 0.8778840742824986,
584
+ "grad_norm": 0.08342321962118149,
585
+ "learning_rate": 3.892861153703342e-06,
586
+ "loss": 0.001,
587
+ "step": 390
588
+ },
589
+ {
590
+ "epoch": 0.8891389983117614,
591
+ "grad_norm": 0.008743366226553917,
592
+ "learning_rate": 3.2246082037199532e-06,
593
+ "loss": 0.0007,
594
+ "step": 395
595
+ },
596
+ {
597
+ "epoch": 0.9003939223410242,
598
+ "grad_norm": 0.006391066592186689,
599
+ "learning_rate": 2.6173414408598827e-06,
600
+ "loss": 0.0008,
601
+ "step": 400
602
+ },
603
+ {
604
+ "epoch": 0.911648846370287,
605
+ "grad_norm": 0.016693001613020897,
606
+ "learning_rate": 2.0718526251279346e-06,
607
+ "loss": 0.0007,
608
+ "step": 405
609
+ },
610
+ {
611
+ "epoch": 0.9229037703995498,
612
+ "grad_norm": 0.00829355325549841,
613
+ "learning_rate": 1.5888529698718346e-06,
614
+ "loss": 0.001,
615
+ "step": 410
616
+ },
617
+ {
618
+ "epoch": 0.9341586944288126,
619
+ "grad_norm": 0.007485538721084595,
620
+ "learning_rate": 1.1689722144956671e-06,
621
+ "loss": 0.0015,
622
+ "step": 415
623
+ },
624
+ {
625
+ "epoch": 0.9454136184580754,
626
+ "grad_norm": 0.030309610068798065,
627
+ "learning_rate": 8.127578033998662e-07,
628
+ "loss": 0.0009,
629
+ "step": 420
630
+ },
631
+ {
632
+ "epoch": 0.9566685424873382,
633
+ "grad_norm": 0.020800478756427765,
634
+ "learning_rate": 5.206741722181386e-07,
635
+ "loss": 0.0008,
636
+ "step": 425
637
+ },
638
+ {
639
+ "epoch": 0.967923466516601,
640
+ "grad_norm": 0.0036028597969561815,
641
+ "learning_rate": 2.9310214228202013e-07,
642
+ "loss": 0.0007,
643
+ "step": 430
644
+ },
645
+ {
646
+ "epoch": 0.9791783905458639,
647
+ "grad_norm": 0.0036092002410441637,
648
+ "learning_rate": 1.3033842410251075e-07,
649
+ "loss": 0.0008,
650
+ "step": 435
651
+ },
652
+ {
653
+ "epoch": 0.9904333145751266,
654
+ "grad_norm": 0.004002279601991177,
655
+ "learning_rate": 3.259523051615254e-08,
656
+ "loss": 0.0008,
657
+ "step": 440
658
+ },
659
+ {
660
+ "epoch": 1.0016882386043895,
661
+ "grad_norm": 0.08439312875270844,
662
+ "learning_rate": 0.0,
663
+ "loss": 0.0013,
664
+ "step": 445
665
  }
666
  ],
667
  "logging_steps": 5,
 
676
  "should_evaluate": false,
677
  "should_log": false,
678
  "should_save": true,
679
+ "should_training_stop": true
680
  },
681
  "attributes": {}
682
  }
683
  },
684
+ "total_flos": 1.462005370107986e+17,
685
  "train_batch_size": 2,
686
  "trial_name": null,
687
  "trial_params": null