DuongTrongChi commited on
Commit
4af5e2d
·
verified ·
1 Parent(s): 4fe1bcd

Training in progress, step 684, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47c1e90dbecf9635856d092c6cddea8202536da475af28c5df57f9a15b232128
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bb3d5a1f2bc1aa0163e2600885712b21040ba69d0674c75e1d94918847b2c42
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99bb6870709dc61c780604e0ba8b8967af8b3b68a8fe57d1de47cc64ae9e2f69
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db8f3ea3079591a8d48d7a67e7bbe0855bf2ee0d183b3e3c3acbf5c02c8cbf2
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9967ef256882f56c127a1407616df2fb585de0b861d9905ab72b987597cec7ec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aedc1d040a37f21c3823ca8487bdb8d19041f448765618e91b99e64d27abfb7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.741838134430727,
5
  "eval_steps": 500,
6
- "global_step": 676,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4739,6 +4739,62 @@
4739
  "learning_rate": 5.7953144266337855e-06,
4740
  "loss": 1.184,
4741
  "step": 676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4742
  }
4743
  ],
4744
  "logging_steps": 1,
@@ -4758,7 +4814,7 @@
4758
  "attributes": {}
4759
  }
4760
  },
4761
- "total_flos": 7.010987710203372e+17,
4762
  "train_batch_size": 4,
4763
  "trial_name": null,
4764
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7506172839506173,
5
  "eval_steps": 500,
6
+ "global_step": 684,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4739
  "learning_rate": 5.7953144266337855e-06,
4740
  "loss": 1.184,
4741
  "step": 676
4742
+ },
4743
+ {
4744
+ "epoch": 0.7429355281207133,
4745
+ "grad_norm": 0.12598906457424164,
4746
+ "learning_rate": 5.770653514180024e-06,
4747
+ "loss": 1.1155,
4748
+ "step": 677
4749
+ },
4750
+ {
4751
+ "epoch": 0.7440329218106996,
4752
+ "grad_norm": 0.13896718621253967,
4753
+ "learning_rate": 5.745992601726265e-06,
4754
+ "loss": 1.1824,
4755
+ "step": 678
4756
+ },
4757
+ {
4758
+ "epoch": 0.7451303155006859,
4759
+ "grad_norm": 0.13778887689113617,
4760
+ "learning_rate": 5.721331689272504e-06,
4761
+ "loss": 1.1126,
4762
+ "step": 679
4763
+ },
4764
+ {
4765
+ "epoch": 0.7462277091906722,
4766
+ "grad_norm": 0.12722033262252808,
4767
+ "learning_rate": 5.696670776818743e-06,
4768
+ "loss": 1.1672,
4769
+ "step": 680
4770
+ },
4771
+ {
4772
+ "epoch": 0.7473251028806585,
4773
+ "grad_norm": 0.13544504344463348,
4774
+ "learning_rate": 5.6720098643649825e-06,
4775
+ "loss": 1.1423,
4776
+ "step": 681
4777
+ },
4778
+ {
4779
+ "epoch": 0.7484224965706447,
4780
+ "grad_norm": 0.14108151197433472,
4781
+ "learning_rate": 5.6473489519112214e-06,
4782
+ "loss": 1.1087,
4783
+ "step": 682
4784
+ },
4785
+ {
4786
+ "epoch": 0.749519890260631,
4787
+ "grad_norm": 0.15130096673965454,
4788
+ "learning_rate": 5.62268803945746e-06,
4789
+ "loss": 1.0687,
4790
+ "step": 683
4791
+ },
4792
+ {
4793
+ "epoch": 0.7506172839506173,
4794
+ "grad_norm": 0.15002749860286713,
4795
+ "learning_rate": 5.5980271270037e-06,
4796
+ "loss": 1.0799,
4797
+ "step": 684
4798
  }
4799
  ],
4800
  "logging_steps": 1,
 
4814
  "attributes": {}
4815
  }
4816
  },
4817
+ "total_flos": 7.092525781016617e+17,
4818
  "train_batch_size": 4,
4819
  "trial_name": null,
4820
  "trial_params": null