leixa commited on
Commit
ceca4a2
1 Parent(s): 0751e21

Training in progress, step 370, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10462e7bb907e56f44911b66dc9d08e780a22980744541436cfa57fd33089b28
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51e93f9b07950fc8df683cdec91d87256088ace10683cbac5af9cede43278671
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1288067af2f92d91e698359b4e37f06863ae4944657fba1418f6b5221da7ac34
3
  size 325340244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe04d3e67db8c78a8e60170320e230af5095c9aa115ae93543feddf6eaca0c1f
3
  size 325340244
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fad81ea991ac687f6089a33e4df7b4989f6dc0a113bcc48d47ff9341825e8c3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157fa6bb405b0cfd2ed1c6356aa0d6ccce428abe09b3b4fbf8636f324e8c3d18
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a23e869a07ea343caba66ddef1ef2a01435b58bcc15218ec50d03fed44b9143
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0e4fd9f0c05ddccf08c8e0de389cb9163fcb1e7234c97d791d86337bdc1d10
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.766734279918864,
5
  "eval_steps": 31,
6
- "global_step": 341,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -894,6 +894,76 @@
894
  "eval_samples_per_second": 15.463,
895
  "eval_steps_per_second": 1.933,
896
  "step": 341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897
  }
898
  ],
899
  "logging_steps": 3,
@@ -908,12 +978,12 @@
908
  "should_evaluate": false,
909
  "should_log": false,
910
  "should_save": true,
911
- "should_training_stop": false
912
  },
913
  "attributes": {}
914
  }
915
  },
916
- "total_flos": 4.531169671218463e+17,
917
  "train_batch_size": 8,
918
  "trial_name": null,
919
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.002028397565923,
5
  "eval_steps": 31,
6
+ "global_step": 370,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
894
  "eval_samples_per_second": 15.463,
895
  "eval_steps_per_second": 1.933,
896
  "step": 341
897
+ },
898
+ {
899
+ "epoch": 2.7748478701825556,
900
+ "grad_norm": 5.9872822761535645,
901
+ "learning_rate": 1.4852136862001764e-06,
902
+ "loss": 1.0829,
903
+ "step": 342
904
+ },
905
+ {
906
+ "epoch": 2.7991886409736306,
907
+ "grad_norm": 8.11224365234375,
908
+ "learning_rate": 1.1851996440033319e-06,
909
+ "loss": 0.8662,
910
+ "step": 345
911
+ },
912
+ {
913
+ "epoch": 2.8235294117647056,
914
+ "grad_norm": 5.697979927062988,
915
+ "learning_rate": 9.186408276168013e-07,
916
+ "loss": 0.985,
917
+ "step": 348
918
+ },
919
+ {
920
+ "epoch": 2.847870182555781,
921
+ "grad_norm": 5.0387678146362305,
922
+ "learning_rate": 6.857199231384282e-07,
923
+ "loss": 0.7812,
924
+ "step": 351
925
+ },
926
+ {
927
+ "epoch": 2.872210953346856,
928
+ "grad_norm": 5.872809886932373,
929
+ "learning_rate": 4.865965629214819e-07,
930
+ "loss": 0.7931,
931
+ "step": 354
932
+ },
933
+ {
934
+ "epoch": 2.896551724137931,
935
+ "grad_norm": 6.345264911651611,
936
+ "learning_rate": 3.214072161706272e-07,
937
+ "loss": 0.9912,
938
+ "step": 357
939
+ },
940
+ {
941
+ "epoch": 2.920892494929006,
942
+ "grad_norm": 8.418841361999512,
943
+ "learning_rate": 1.9026509541272275e-07,
944
+ "loss": 0.886,
945
+ "step": 360
946
+ },
947
+ {
948
+ "epoch": 2.945233265720081,
949
+ "grad_norm": 7.243491172790527,
950
+ "learning_rate": 9.3260078906654e-08,
951
+ "loss": 0.8986,
952
+ "step": 363
953
+ },
954
+ {
955
+ "epoch": 2.969574036511156,
956
+ "grad_norm": 5.745831489562988,
957
+ "learning_rate": 3.04586490452119e-08,
958
+ "loss": 0.7857,
959
+ "step": 366
960
+ },
961
+ {
962
+ "epoch": 2.9939148073022315,
963
+ "grad_norm": 6.222906112670898,
964
+ "learning_rate": 1.903846791434516e-09,
965
+ "loss": 1.0429,
966
+ "step": 369
967
  }
968
  ],
969
  "logging_steps": 3,
 
978
  "should_evaluate": false,
979
  "should_log": false,
980
  "should_save": true,
981
+ "should_training_stop": true
982
  },
983
  "attributes": {}
984
  }
985
  },
986
+ "total_flos": 4.914102986248028e+17,
987
  "train_batch_size": 8,
988
  "trial_name": null,
989
  "trial_params": null