leixa commited on
Commit
11563bc
1 Parent(s): e009f19

Training in progress, step 378, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acd89bab327e82c9d70eeec005795d508bf6adaacfc92ddd8f20f724469a6234
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204a349fb5f2880ebc9716e2c999f98555ac7db9085761fb01ccb8be60805c18
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbbe25b88e42f5ce81a38f5cf504c4dcaa7e05557dd4f368b68f622afa912480
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f474ba23721d4d9a1c85d01088a42d6297ce6fce23f3e15e68800d582b9d412b
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd03e0f639b7da9ed1de154fce4cfdfc9d7d7afd7ed92a06fd4ed6e19dbfb56b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b43a77608e3595ef8ce792c3fd5462a71b9b7958002088c96e8d041e0e2ab5b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10affc9ced28dcfaf0d40e3497a97c8e7416bd057324538f99a7e1756fd84408
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac207b57c6cefba3838e335ba7ebf320ffdaee8162f1c0afc72ea9ad9f0725f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0944625407166124,
5
  "eval_steps": 42,
6
- "global_step": 336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -863,6 +863,112 @@
863
  "eval_samples_per_second": 23.319,
864
  "eval_steps_per_second": 5.864,
865
  "step": 336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  }
867
  ],
868
  "logging_steps": 3,
@@ -882,7 +988,7 @@
882
  "attributes": {}
883
  }
884
  },
885
- "total_flos": 2.53383140179968e+17,
886
  "train_batch_size": 4,
887
  "trial_name": null,
888
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.231270358306189,
5
  "eval_steps": 42,
6
+ "global_step": 378,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
863
  "eval_samples_per_second": 23.319,
864
  "eval_steps_per_second": 5.864,
865
  "step": 336
866
+ },
867
+ {
868
+ "epoch": 1.1042345276872965,
869
+ "grad_norm": 2.279667615890503,
870
+ "learning_rate": 2.43550361297047e-05,
871
+ "loss": 0.8684,
872
+ "step": 339
873
+ },
874
+ {
875
+ "epoch": 1.1140065146579805,
876
+ "grad_norm": 2.2219250202178955,
877
+ "learning_rate": 2.353425010381063e-05,
878
+ "loss": 0.8651,
879
+ "step": 342
880
+ },
881
+ {
882
+ "epoch": 1.1237785016286646,
883
+ "grad_norm": 2.2543816566467285,
884
+ "learning_rate": 2.272325493947257e-05,
885
+ "loss": 0.8661,
886
+ "step": 345
887
+ },
888
+ {
889
+ "epoch": 1.1335504885993486,
890
+ "grad_norm": 2.2331888675689697,
891
+ "learning_rate": 2.192235065998126e-05,
892
+ "loss": 0.7047,
893
+ "step": 348
894
+ },
895
+ {
896
+ "epoch": 1.1433224755700326,
897
+ "grad_norm": 2.6700170040130615,
898
+ "learning_rate": 2.1131833555559037e-05,
899
+ "loss": 0.7296,
900
+ "step": 351
901
+ },
902
+ {
903
+ "epoch": 1.1530944625407167,
904
+ "grad_norm": 2.4180705547332764,
905
+ "learning_rate": 2.0351996073748713e-05,
906
+ "loss": 0.7674,
907
+ "step": 354
908
+ },
909
+ {
910
+ "epoch": 1.1628664495114007,
911
+ "grad_norm": 2.7486183643341064,
912
+ "learning_rate": 1.9583126711224343e-05,
913
+ "loss": 0.9823,
914
+ "step": 357
915
+ },
916
+ {
917
+ "epoch": 1.1726384364820848,
918
+ "grad_norm": 2.257678747177124,
919
+ "learning_rate": 1.8825509907063327e-05,
920
+ "loss": 0.8594,
921
+ "step": 360
922
+ },
923
+ {
924
+ "epoch": 1.1824104234527688,
925
+ "grad_norm": 2.3134396076202393,
926
+ "learning_rate": 1.807942593751973e-05,
927
+ "loss": 0.8651,
928
+ "step": 363
929
+ },
930
+ {
931
+ "epoch": 1.1921824104234529,
932
+ "grad_norm": 2.2541582584381104,
933
+ "learning_rate": 1.7345150812337564e-05,
934
+ "loss": 0.839,
935
+ "step": 366
936
+ },
937
+ {
938
+ "epoch": 1.201954397394137,
939
+ "grad_norm": 2.1207261085510254,
940
+ "learning_rate": 1.66229561726426e-05,
941
+ "loss": 0.7365,
942
+ "step": 369
943
+ },
944
+ {
945
+ "epoch": 1.211726384364821,
946
+ "grad_norm": 2.189333915710449,
947
+ "learning_rate": 1.5913109190450032e-05,
948
+ "loss": 0.7796,
949
+ "step": 372
950
+ },
951
+ {
952
+ "epoch": 1.221498371335505,
953
+ "grad_norm": 2.466726541519165,
954
+ "learning_rate": 1.5215872469825682e-05,
955
+ "loss": 0.8155,
956
+ "step": 375
957
+ },
958
+ {
959
+ "epoch": 1.231270358306189,
960
+ "grad_norm": 2.5175869464874268,
961
+ "learning_rate": 1.4531503949737108e-05,
962
+ "loss": 0.705,
963
+ "step": 378
964
+ },
965
+ {
966
+ "epoch": 1.231270358306189,
967
+ "eval_loss": 1.314825177192688,
968
+ "eval_runtime": 22.1791,
969
+ "eval_samples_per_second": 23.31,
970
+ "eval_steps_per_second": 5.861,
971
+ "step": 378
972
  }
973
  ],
974
  "logging_steps": 3,
 
988
  "attributes": {}
989
  }
990
  },
991
+ "total_flos": 2.8506192533363098e+17,
992
  "train_batch_size": 4,
993
  "trial_name": null,
994
  "trial_params": null