CocoRoF commited on
Commit
ee8aed3
·
verified ·
1 Parent(s): 8b6c1bb

Training in progress, step 125, checkpoint

Browse files
last-checkpoint/2_Dense/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bc2edab840a77bbfe5f20e704bb8120b1f6f291c9a5bbbe6a18c982dc2722de
3
  size 2362528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cb909a27144c79c2c3696fd456cf85205a067d02dcbcb09a888374d047b2284
3
  size 2362528
last-checkpoint/README.md CHANGED
@@ -63,7 +63,7 @@ model-index:
63
  type: test_triplet
64
  metrics:
65
  - type: cosine_accuracy
66
- value: 0.9753350019454956
67
  name: Cosine Accuracy
68
  ---
69
 
@@ -166,7 +166,7 @@ You can finetune this model on your own dataset.
166
 
167
  | Metric | Value |
168
  |:--------------------|:-----------|
169
- | **cosine_accuracy** | **0.9753** |
170
 
171
  <!--
172
  ## Bias, Risks and Limitations
@@ -370,6 +370,8 @@ You can finetune this model on your own dataset.
370
  </details>
371
 
372
  ### Training Logs
 
 
373
  | Epoch | Step | Training Loss | Validation Loss | test_triplet_cosine_accuracy |
374
  |:------:|:----:|:-------------:|:---------------:|:----------------------------:|
375
  | 0.0185 | 1 | 2.3684 | - | - |
@@ -472,7 +474,33 @@ You can finetune this model on your own dataset.
472
  | 1.8148 | 98 | 0.4057 | - | - |
473
  | 1.8333 | 99 | 0.4018 | - | - |
474
  | 1.8519 | 100 | 0.3852 | 0.0139 | 0.9753 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
 
476
 
477
  ### Framework Versions
478
  - Python: 3.11.10
 
63
  type: test_triplet
64
  metrics:
65
  - type: cosine_accuracy
66
+ value: 0.9766299724578857
67
  name: Cosine Accuracy
68
  ---
69
 
 
166
 
167
  | Metric | Value |
168
  |:--------------------|:-----------|
169
+ | **cosine_accuracy** | **0.9766** |
170
 
171
  <!--
172
  ## Bias, Risks and Limitations
 
370
  </details>
371
 
372
  ### Training Logs
373
+ <details><summary>Click to expand</summary>
374
+
375
  | Epoch | Step | Training Loss | Validation Loss | test_triplet_cosine_accuracy |
376
  |:------:|:----:|:-------------:|:---------------:|:----------------------------:|
377
  | 0.0185 | 1 | 2.3684 | - | - |
 
474
  | 1.8148 | 98 | 0.4057 | - | - |
475
  | 1.8333 | 99 | 0.4018 | - | - |
476
  | 1.8519 | 100 | 0.3852 | 0.0139 | 0.9753 |
477
+ | 1.8704 | 101 | 0.389 | - | - |
478
+ | 1.8889 | 102 | 0.3801 | - | - |
479
+ | 1.9074 | 103 | 0.3896 | - | - |
480
+ | 1.9259 | 104 | 0.3759 | - | - |
481
+ | 1.9444 | 105 | 0.3614 | - | - |
482
+ | 1.9630 | 106 | 0.3616 | - | - |
483
+ | 1.9815 | 107 | 0.3422 | - | - |
484
+ | 2.0 | 108 | 0.3516 | - | - |
485
+ | 2.0185 | 109 | 0.3507 | - | - |
486
+ | 2.0370 | 110 | 0.3387 | - | - |
487
+ | 2.0556 | 111 | 0.343 | - | - |
488
+ | 2.0741 | 112 | 0.3335 | - | - |
489
+ | 2.0926 | 113 | 0.3356 | - | - |
490
+ | 2.1111 | 114 | 0.3262 | - | - |
491
+ | 2.1296 | 115 | 0.3236 | - | - |
492
+ | 2.1481 | 116 | 0.3201 | - | - |
493
+ | 2.1667 | 117 | 0.3267 | - | - |
494
+ | 2.1852 | 118 | 0.3148 | - | - |
495
+ | 2.2037 | 119 | 0.3106 | - | - |
496
+ | 2.2222 | 120 | 0.3033 | - | - |
497
+ | 2.2407 | 121 | 0.3065 | - | - |
498
+ | 2.2593 | 122 | 0.3144 | - | - |
499
+ | 2.2778 | 123 | 0.3038 | - | - |
500
+ | 2.2963 | 124 | 0.2964 | - | - |
501
+ | 2.3148 | 125 | 0.2815 | 0.0107 | 0.9766 |
502
 
503
+ </details>
504
 
505
  ### Framework Versions
506
  - Python: 3.11.10
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cf21804db3b475e0f953ae046d41f8cf0ecc31ce24f13fc0c4fa9b702e4ab53
3
  size 735216376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e94ca9f9fdd6d49ffff540c9f9d385935382f10e3f2943f92e30e1871f347f83
3
  size 735216376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83d5ea6076f9859b0a0b9ff851c88a8658b27fb970c2a054b0b2952ebda0c8b2
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e92ade79793f892079f888a400ffaf066ba0e86ba5464de20ec73386ac0cbe65
3
  size 1475248442
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71f11c6b026c1ef9ec012b219c00f24af3d116a2489ba304ebb1fa9f0e82d280
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7736b135b017b69655ba8b256de8aea1a6b07e6818a8621590b426bb44756637
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ffe07e147e923a0a1c121885b8b7ade373df47b3f078ebe483a45fa79477914
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4074f79493d0a292d6a1c4465b81109d6a1fd1e81ec948afd225baecf3487025
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b402f0d95df397ac48c3f3db93bac7a34c1270c1302ca39e50c2b66a3645c5e5
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:769c6529f9b35f0d735e9083fceb569f3e17436ad61cffeb840ec9c5da71bd58
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:832e1c226b1aab106b771f778427840b6b0c09b5674859444ff09cec187c038a
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b85d33ea8e722e42a1ff66b9ce272cb9fb9e0bf7b4a30dbf8e7f3c63963848
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f35a9520af17a24370a86cd2c2af5bd42798f12a1da50372b73482457ee56cd
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:563e34ff36505679e01719934aab5fd2d7d8d57d14bfea65f629ed1ab03f260a
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:515c0fb40b1a80f763f29a3d225e792aa31d38a0b9619d71d7362dfb3aa21fce
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:556732e873de3e6d20bf7c6814e688f9cabff60282f4996c2ec00cdd4ebdfc0e
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b20c13ddeeaef6d134aaf7ca88a5d802ba35a97bcb197b4da04d48ebd4abb770
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e5661b0152037bd6dced5ec3cf2d9c483f640876f636848f4954dda8bcbc8a
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b851002d15c6e1fcb4881e92545ee2cd473f551520616162c774a4d4dec02a73
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcc6b05d88fc5258c37994b60862c65e18759b3bd71f7365208bf093d47b2f78
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7025b02393ca356786e9e3d4bcb42323989e5922a4e2d6aa883ff8a88a39cd54
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eed4f655e6aa8c517c2a6ce3c52b9c67030900524cdae3a5f415994b0a09da3
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8518518518518519,
5
  "eval_steps": 25,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -743,6 +743,190 @@
743
  "eval_steps_per_second": 1.084,
744
  "eval_test_triplet_cosine_accuracy": 0.9753350019454956,
745
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  }
747
  ],
748
  "logging_steps": 1.0,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.314814814814815,
5
  "eval_steps": 25,
6
+ "global_step": 125,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
743
  "eval_steps_per_second": 1.084,
744
  "eval_test_triplet_cosine_accuracy": 0.9753350019454956,
745
  "step": 100
746
+ },
747
+ {
748
+ "epoch": 1.8703703703703702,
749
+ "grad_norm": 0.5853272080421448,
750
+ "learning_rate": 3.840304182509506e-06,
751
+ "loss": 0.389,
752
+ "step": 101
753
+ },
754
+ {
755
+ "epoch": 1.8888888888888888,
756
+ "grad_norm": 0.5876256823539734,
757
+ "learning_rate": 3.8783269961977185e-06,
758
+ "loss": 0.3801,
759
+ "step": 102
760
+ },
761
+ {
762
+ "epoch": 1.9074074074074074,
763
+ "grad_norm": 0.5726855993270874,
764
+ "learning_rate": 3.916349809885932e-06,
765
+ "loss": 0.3896,
766
+ "step": 103
767
+ },
768
+ {
769
+ "epoch": 1.925925925925926,
770
+ "grad_norm": 0.5248777270317078,
771
+ "learning_rate": 3.9543726235741444e-06,
772
+ "loss": 0.3759,
773
+ "step": 104
774
+ },
775
+ {
776
+ "epoch": 1.9444444444444444,
777
+ "grad_norm": 0.5145537257194519,
778
+ "learning_rate": 3.992395437262358e-06,
779
+ "loss": 0.3614,
780
+ "step": 105
781
+ },
782
+ {
783
+ "epoch": 1.9629629629629628,
784
+ "grad_norm": 0.5256109237670898,
785
+ "learning_rate": 4.03041825095057e-06,
786
+ "loss": 0.3616,
787
+ "step": 106
788
+ },
789
+ {
790
+ "epoch": 1.9814814814814814,
791
+ "grad_norm": 0.5038356184959412,
792
+ "learning_rate": 4.068441064638784e-06,
793
+ "loss": 0.3422,
794
+ "step": 107
795
+ },
796
+ {
797
+ "epoch": 2.0,
798
+ "grad_norm": 0.4888509213924408,
799
+ "learning_rate": 4.106463878326996e-06,
800
+ "loss": 0.3516,
801
+ "step": 108
802
+ },
803
+ {
804
+ "epoch": 2.0185185185185186,
805
+ "grad_norm": 0.4632491171360016,
806
+ "learning_rate": 4.14448669201521e-06,
807
+ "loss": 0.3507,
808
+ "step": 109
809
+ },
810
+ {
811
+ "epoch": 2.037037037037037,
812
+ "grad_norm": 0.47965696454048157,
813
+ "learning_rate": 4.182509505703423e-06,
814
+ "loss": 0.3387,
815
+ "step": 110
816
+ },
817
+ {
818
+ "epoch": 2.0555555555555554,
819
+ "grad_norm": 0.46976569294929504,
820
+ "learning_rate": 4.2205323193916355e-06,
821
+ "loss": 0.343,
822
+ "step": 111
823
+ },
824
+ {
825
+ "epoch": 2.074074074074074,
826
+ "grad_norm": 0.4613853096961975,
827
+ "learning_rate": 4.258555133079848e-06,
828
+ "loss": 0.3335,
829
+ "step": 112
830
+ },
831
+ {
832
+ "epoch": 2.0925925925925926,
833
+ "grad_norm": 0.44832512736320496,
834
+ "learning_rate": 4.2965779467680614e-06,
835
+ "loss": 0.3356,
836
+ "step": 113
837
+ },
838
+ {
839
+ "epoch": 2.111111111111111,
840
+ "grad_norm": 0.42677590250968933,
841
+ "learning_rate": 4.334600760456274e-06,
842
+ "loss": 0.3262,
843
+ "step": 114
844
+ },
845
+ {
846
+ "epoch": 2.1296296296296298,
847
+ "grad_norm": 0.4223364293575287,
848
+ "learning_rate": 4.372623574144487e-06,
849
+ "loss": 0.3236,
850
+ "step": 115
851
+ },
852
+ {
853
+ "epoch": 2.148148148148148,
854
+ "grad_norm": 0.47711557149887085,
855
+ "learning_rate": 4.4106463878327e-06,
856
+ "loss": 0.3201,
857
+ "step": 116
858
+ },
859
+ {
860
+ "epoch": 2.1666666666666665,
861
+ "grad_norm": 0.412469744682312,
862
+ "learning_rate": 4.448669201520912e-06,
863
+ "loss": 0.3267,
864
+ "step": 117
865
+ },
866
+ {
867
+ "epoch": 2.185185185185185,
868
+ "grad_norm": 0.41616860032081604,
869
+ "learning_rate": 4.486692015209126e-06,
870
+ "loss": 0.3148,
871
+ "step": 118
872
+ },
873
+ {
874
+ "epoch": 2.2037037037037037,
875
+ "grad_norm": 0.39600443840026855,
876
+ "learning_rate": 4.524714828897338e-06,
877
+ "loss": 0.3106,
878
+ "step": 119
879
+ },
880
+ {
881
+ "epoch": 2.2222222222222223,
882
+ "grad_norm": 0.378671258687973,
883
+ "learning_rate": 4.562737642585552e-06,
884
+ "loss": 0.3033,
885
+ "step": 120
886
+ },
887
+ {
888
+ "epoch": 2.240740740740741,
889
+ "grad_norm": 0.3655548393726349,
890
+ "learning_rate": 4.600760456273764e-06,
891
+ "loss": 0.3065,
892
+ "step": 121
893
+ },
894
+ {
895
+ "epoch": 2.259259259259259,
896
+ "grad_norm": 0.38126257061958313,
897
+ "learning_rate": 4.638783269961978e-06,
898
+ "loss": 0.3144,
899
+ "step": 122
900
+ },
901
+ {
902
+ "epoch": 2.2777777777777777,
903
+ "grad_norm": 0.3792165517807007,
904
+ "learning_rate": 4.67680608365019e-06,
905
+ "loss": 0.3038,
906
+ "step": 123
907
+ },
908
+ {
909
+ "epoch": 2.2962962962962963,
910
+ "grad_norm": 0.3334355056285858,
911
+ "learning_rate": 4.7148288973384035e-06,
912
+ "loss": 0.2964,
913
+ "step": 124
914
+ },
915
+ {
916
+ "epoch": 2.314814814814815,
917
+ "grad_norm": 0.33771565556526184,
918
+ "learning_rate": 4.752851711026617e-06,
919
+ "loss": 0.2815,
920
+ "step": 125
921
+ },
922
+ {
923
+ "epoch": 2.314814814814815,
924
+ "eval_loss": 0.010651620104908943,
925
+ "eval_runtime": 1437.6428,
926
+ "eval_samples_per_second": 139.117,
927
+ "eval_steps_per_second": 1.087,
928
+ "eval_test_triplet_cosine_accuracy": 0.9766299724578857,
929
+ "step": 125
930
  }
931
  ],
932
  "logging_steps": 1.0,