DuongTrongChi commited on
Commit
c366f35
·
verified ·
1 Parent(s): 2ae22e7

Training in progress, step 676, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cc5125f76d6dbaaf8f53b7058f9db944f682d3b54268cdcab102643bbb5c715
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c1e90dbecf9635856d092c6cddea8202536da475af28c5df57f9a15b232128
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2353d647b00d14aca06f778f9d43a65a8201b5792af6fb89150d357af16ee31c
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99bb6870709dc61c780604e0ba8b8967af8b3b68a8fe57d1de47cc64ae9e2f69
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4dfc30b5618d8f9126fd758d49456abcb3bac7a76ca1747eea78894ae958013
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9967ef256882f56c127a1407616df2fb585de0b861d9905ab72b987597cec7ec
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7308641975308642,
5
  "eval_steps": 500,
6
- "global_step": 666,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4669,6 +4669,76 @@
4669
  "learning_rate": 6.041923551171394e-06,
4670
  "loss": 1.2012,
4671
  "step": 666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4672
  }
4673
  ],
4674
  "logging_steps": 1,
@@ -4688,7 +4758,7 @@
4688
  "attributes": {}
4689
  }
4690
  },
4691
- "total_flos": 6.904814083900785e+17,
4692
  "train_batch_size": 4,
4693
  "trial_name": null,
4694
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.741838134430727,
5
  "eval_steps": 500,
6
+ "global_step": 676,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4669
  "learning_rate": 6.041923551171394e-06,
4670
  "loss": 1.2012,
4671
  "step": 666
4672
+ },
4673
+ {
4674
+ "epoch": 0.7319615912208505,
4675
+ "grad_norm": 0.14305201172828674,
4676
+ "learning_rate": 6.017262638717633e-06,
4677
+ "loss": 1.2075,
4678
+ "step": 667
4679
+ },
4680
+ {
4681
+ "epoch": 0.7330589849108368,
4682
+ "grad_norm": 0.1388700008392334,
4683
+ "learning_rate": 5.9926017262638725e-06,
4684
+ "loss": 1.1049,
4685
+ "step": 668
4686
+ },
4687
+ {
4688
+ "epoch": 0.7341563786008231,
4689
+ "grad_norm": 0.13110363483428955,
4690
+ "learning_rate": 5.967940813810111e-06,
4691
+ "loss": 1.1915,
4692
+ "step": 669
4693
+ },
4694
+ {
4695
+ "epoch": 0.7352537722908093,
4696
+ "grad_norm": 0.1336205154657364,
4697
+ "learning_rate": 5.94327990135635e-06,
4698
+ "loss": 1.1189,
4699
+ "step": 670
4700
+ },
4701
+ {
4702
+ "epoch": 0.7363511659807956,
4703
+ "grad_norm": 0.15483205020427704,
4704
+ "learning_rate": 5.91861898890259e-06,
4705
+ "loss": 1.0508,
4706
+ "step": 671
4707
+ },
4708
+ {
4709
+ "epoch": 0.7374485596707819,
4710
+ "grad_norm": 0.1405985802412033,
4711
+ "learning_rate": 5.893958076448829e-06,
4712
+ "loss": 1.1348,
4713
+ "step": 672
4714
+ },
4715
+ {
4716
+ "epoch": 0.7385459533607682,
4717
+ "grad_norm": 0.13037075102329254,
4718
+ "learning_rate": 5.869297163995068e-06,
4719
+ "loss": 1.1437,
4720
+ "step": 673
4721
+ },
4722
+ {
4723
+ "epoch": 0.7396433470507544,
4724
+ "grad_norm": 0.12945199012756348,
4725
+ "learning_rate": 5.844636251541308e-06,
4726
+ "loss": 1.1265,
4727
+ "step": 674
4728
+ },
4729
+ {
4730
+ "epoch": 0.7407407407407407,
4731
+ "grad_norm": 0.1295364648103714,
4732
+ "learning_rate": 5.8199753390875466e-06,
4733
+ "loss": 1.1266,
4734
+ "step": 675
4735
+ },
4736
+ {
4737
+ "epoch": 0.741838134430727,
4738
+ "grad_norm": 0.12387209385633469,
4739
+ "learning_rate": 5.7953144266337855e-06,
4740
+ "loss": 1.184,
4741
+ "step": 676
4742
  }
4743
  ],
4744
  "logging_steps": 1,
 
4758
  "attributes": {}
4759
  }
4760
  },
4761
+ "total_flos": 7.010987710203372e+17,
4762
  "train_batch_size": 4,
4763
  "trial_name": null,
4764
  "trial_params": null