Training in progress, step 125, checkpoint

Browse files

Files changed (14) hide show

last-checkpoint/2_Dense/model.safetensors +1 -1
last-checkpoint/README.md +30 -2
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +186 -2

last-checkpoint/2_Dense/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bc2edab840a77bbfe5f20e704bb8120b1f6f291c9a5bbbe6a18c982dc2722de
 size 2362528

 version https://git-lfs.github.com/spec/v1
+oid sha256:4cb909a27144c79c2c3696fd456cf85205a067d02dcbcb09a888374d047b2284
 size 2362528

last-checkpoint/README.md CHANGED Viewed

@@ -63,7 +63,7 @@ model-index:
       type: test_triplet
     metrics:
     - type: cosine_accuracy
-      value: 0.9753350019454956
       name: Cosine Accuracy
 ---
@@ -166,7 +166,7 @@ You can finetune this model on your own dataset.
 | Metric              | Value      |
 |:--------------------|:-----------|
-| **cosine_accuracy** | **0.9753** |
 <!--
 ## Bias, Risks and Limitations
@@ -370,6 +370,8 @@ You can finetune this model on your own dataset.
 </details>
 ### Training Logs
 | Epoch  | Step | Training Loss | Validation Loss | test_triplet_cosine_accuracy |
 |:------:|:----:|:-------------:|:---------------:|:----------------------------:|
 | 0.0185 | 1    | 2.3684        | -               | -                            |
@@ -472,7 +474,33 @@ You can finetune this model on your own dataset.
 | 1.8148 | 98   | 0.4057        | -               | -                            |
 | 1.8333 | 99   | 0.4018        | -               | -                            |
 | 1.8519 | 100  | 0.3852        | 0.0139          | 0.9753                       |
 ### Framework Versions
 - Python: 3.11.10

       type: test_triplet
     metrics:
     - type: cosine_accuracy
+      value: 0.9766299724578857
       name: Cosine Accuracy
 ---
 | Metric              | Value      |
 |:--------------------|:-----------|
+| **cosine_accuracy** | **0.9766** |
 <!--
 ## Bias, Risks and Limitations
 </details>
 ### Training Logs
+<details><summary>Click to expand</summary>
 | Epoch  | Step | Training Loss | Validation Loss | test_triplet_cosine_accuracy |
 |:------:|:----:|:-------------:|:---------------:|:----------------------------:|
 | 0.0185 | 1    | 2.3684        | -               | -                            |
 | 1.8148 | 98   | 0.4057        | -               | -                            |
 | 1.8333 | 99   | 0.4018        | -               | -                            |
 | 1.8519 | 100  | 0.3852        | 0.0139          | 0.9753                       |
+| 1.8704 | 101  | 0.389         | -               | -                            |
+| 1.8889 | 102  | 0.3801        | -               | -                            |
+| 1.9074 | 103  | 0.3896        | -               | -                            |
+| 1.9259 | 104  | 0.3759        | -               | -                            |
+| 1.9444 | 105  | 0.3614        | -               | -                            |
+| 1.9630 | 106  | 0.3616        | -               | -                            |
+| 1.9815 | 107  | 0.3422        | -               | -                            |
+| 2.0    | 108  | 0.3516        | -               | -                            |
+| 2.0185 | 109  | 0.3507        | -               | -                            |
+| 2.0370 | 110  | 0.3387        | -               | -                            |
+| 2.0556 | 111  | 0.343         | -               | -                            |
+| 2.0741 | 112  | 0.3335        | -               | -                            |
+| 2.0926 | 113  | 0.3356        | -               | -                            |
+| 2.1111 | 114  | 0.3262        | -               | -                            |
+| 2.1296 | 115  | 0.3236        | -               | -                            |
+| 2.1481 | 116  | 0.3201        | -               | -                            |
+| 2.1667 | 117  | 0.3267        | -               | -                            |
+| 2.1852 | 118  | 0.3148        | -               | -                            |
+| 2.2037 | 119  | 0.3106        | -               | -                            |
+| 2.2222 | 120  | 0.3033        | -               | -                            |
+| 2.2407 | 121  | 0.3065        | -               | -                            |
+| 2.2593 | 122  | 0.3144        | -               | -                            |
+| 2.2778 | 123  | 0.3038        | -               | -                            |
+| 2.2963 | 124  | 0.2964        | -               | -                            |
+| 2.3148 | 125  | 0.2815        | 0.0107          | 0.9766                       |
+</details>
 ### Framework Versions
 - Python: 3.11.10

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cf21804db3b475e0f953ae046d41f8cf0ecc31ce24f13fc0c4fa9b702e4ab53
 size 735216376

 version https://git-lfs.github.com/spec/v1
+oid sha256:e94ca9f9fdd6d49ffff540c9f9d385935382f10e3f2943f92e30e1871f347f83
 size 735216376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83d5ea6076f9859b0a0b9ff851c88a8658b27fb970c2a054b0b2952ebda0c8b2
 size 1475248442

 version https://git-lfs.github.com/spec/v1
+oid sha256:e92ade79793f892079f888a400ffaf066ba0e86ba5464de20ec73386ac0cbe65
 size 1475248442

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71f11c6b026c1ef9ec012b219c00f24af3d116a2489ba304ebb1fa9f0e82d280
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:7736b135b017b69655ba8b256de8aea1a6b07e6818a8621590b426bb44756637
 size 15920

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ffe07e147e923a0a1c121885b8b7ade373df47b3f078ebe483a45fa79477914
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:4074f79493d0a292d6a1c4465b81109d6a1fd1e81ec948afd225baecf3487025
 size 15920

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b402f0d95df397ac48c3f3db93bac7a34c1270c1302ca39e50c2b66a3645c5e5
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:769c6529f9b35f0d735e9083fceb569f3e17436ad61cffeb840ec9c5da71bd58
 size 15920

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:832e1c226b1aab106b771f778427840b6b0c09b5674859444ff09cec187c038a
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:55b85d33ea8e722e42a1ff66b9ce272cb9fb9e0bf7b4a30dbf8e7f3c63963848
 size 15920

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f35a9520af17a24370a86cd2c2af5bd42798f12a1da50372b73482457ee56cd
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:563e34ff36505679e01719934aab5fd2d7d8d57d14bfea65f629ed1ab03f260a
 size 15920

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:515c0fb40b1a80f763f29a3d225e792aa31d38a0b9619d71d7362dfb3aa21fce
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:556732e873de3e6d20bf7c6814e688f9cabff60282f4996c2ec00cdd4ebdfc0e
 size 15920

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b20c13ddeeaef6d134aaf7ca88a5d802ba35a97bcb197b4da04d48ebd4abb770
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5e5661b0152037bd6dced5ec3cf2d9c483f640876f636848f4954dda8bcbc8a
 size 15920

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b851002d15c6e1fcb4881e92545ee2cd473f551520616162c774a4d4dec02a73
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:fcc6b05d88fc5258c37994b60862c65e18759b3bd71f7365208bf093d47b2f78
 size 15920

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7025b02393ca356786e9e3d4bcb42323989e5922a4e2d6aa883ff8a88a39cd54
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:2eed4f655e6aa8c517c2a6ce3c52b9c67030900524cdae3a5f415994b0a09da3
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.8518518518518519,
   "eval_steps": 25,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -743,6 +743,190 @@
       "eval_steps_per_second": 1.084,
       "eval_test_triplet_cosine_accuracy": 0.9753350019454956,
       "step": 100
     }
   ],
   "logging_steps": 1.0,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.314814814814815,
   "eval_steps": 25,
+  "global_step": 125,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_steps_per_second": 1.084,
       "eval_test_triplet_cosine_accuracy": 0.9753350019454956,
       "step": 100
+    },
+    {
+      "epoch": 1.8703703703703702,
+      "grad_norm": 0.5853272080421448,
+      "learning_rate": 3.840304182509506e-06,
+      "loss": 0.389,
+      "step": 101
+    },
+    {
+      "epoch": 1.8888888888888888,
+      "grad_norm": 0.5876256823539734,
+      "learning_rate": 3.8783269961977185e-06,
+      "loss": 0.3801,
+      "step": 102
+    },
+    {
+      "epoch": 1.9074074074074074,
+      "grad_norm": 0.5726855993270874,
+      "learning_rate": 3.916349809885932e-06,
+      "loss": 0.3896,
+      "step": 103
+    },
+    {
+      "epoch": 1.925925925925926,
+      "grad_norm": 0.5248777270317078,
+      "learning_rate": 3.9543726235741444e-06,
+      "loss": 0.3759,
+      "step": 104
+    },
+    {
+      "epoch": 1.9444444444444444,
+      "grad_norm": 0.5145537257194519,
+      "learning_rate": 3.992395437262358e-06,
+      "loss": 0.3614,
+      "step": 105
+    },
+    {
+      "epoch": 1.9629629629629628,
+      "grad_norm": 0.5256109237670898,
+      "learning_rate": 4.03041825095057e-06,
+      "loss": 0.3616,
+      "step": 106
+    },
+    {
+      "epoch": 1.9814814814814814,
+      "grad_norm": 0.5038356184959412,
+      "learning_rate": 4.068441064638784e-06,
+      "loss": 0.3422,
+      "step": 107
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.4888509213924408,
+      "learning_rate": 4.106463878326996e-06,
+      "loss": 0.3516,
+      "step": 108
+    },
+    {
+      "epoch": 2.0185185185185186,
+      "grad_norm": 0.4632491171360016,
+      "learning_rate": 4.14448669201521e-06,
+      "loss": 0.3507,
+      "step": 109
+    },
+    {
+      "epoch": 2.037037037037037,
+      "grad_norm": 0.47965696454048157,
+      "learning_rate": 4.182509505703423e-06,
+      "loss": 0.3387,
+      "step": 110
+    },
+    {
+      "epoch": 2.0555555555555554,
+      "grad_norm": 0.46976569294929504,
+      "learning_rate": 4.2205323193916355e-06,
+      "loss": 0.343,
+      "step": 111
+    },
+    {
+      "epoch": 2.074074074074074,
+      "grad_norm": 0.4613853096961975,
+      "learning_rate": 4.258555133079848e-06,
+      "loss": 0.3335,
+      "step": 112
+    },
+    {
+      "epoch": 2.0925925925925926,
+      "grad_norm": 0.44832512736320496,
+      "learning_rate": 4.2965779467680614e-06,
+      "loss": 0.3356,
+      "step": 113
+    },
+    {
+      "epoch": 2.111111111111111,
+      "grad_norm": 0.42677590250968933,
+      "learning_rate": 4.334600760456274e-06,
+      "loss": 0.3262,
+      "step": 114
+    },
+    {
+      "epoch": 2.1296296296296298,
+      "grad_norm": 0.4223364293575287,
+      "learning_rate": 4.372623574144487e-06,
+      "loss": 0.3236,
+      "step": 115
+    },
+    {
+      "epoch": 2.148148148148148,
+      "grad_norm": 0.47711557149887085,
+      "learning_rate": 4.4106463878327e-06,
+      "loss": 0.3201,
+      "step": 116
+    },
+    {
+      "epoch": 2.1666666666666665,
+      "grad_norm": 0.412469744682312,
+      "learning_rate": 4.448669201520912e-06,
+      "loss": 0.3267,
+      "step": 117
+    },
+    {
+      "epoch": 2.185185185185185,
+      "grad_norm": 0.41616860032081604,
+      "learning_rate": 4.486692015209126e-06,
+      "loss": 0.3148,
+      "step": 118
+    },
+    {
+      "epoch": 2.2037037037037037,
+      "grad_norm": 0.39600443840026855,
+      "learning_rate": 4.524714828897338e-06,
+      "loss": 0.3106,
+      "step": 119
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 0.378671258687973,
+      "learning_rate": 4.562737642585552e-06,
+      "loss": 0.3033,
+      "step": 120
+    },
+    {
+      "epoch": 2.240740740740741,
+      "grad_norm": 0.3655548393726349,
+      "learning_rate": 4.600760456273764e-06,
+      "loss": 0.3065,
+      "step": 121
+    },
+    {
+      "epoch": 2.259259259259259,
+      "grad_norm": 0.38126257061958313,
+      "learning_rate": 4.638783269961978e-06,
+      "loss": 0.3144,
+      "step": 122
+    },
+    {
+      "epoch": 2.2777777777777777,
+      "grad_norm": 0.3792165517807007,
+      "learning_rate": 4.67680608365019e-06,
+      "loss": 0.3038,
+      "step": 123
+    },
+    {
+      "epoch": 2.2962962962962963,
+      "grad_norm": 0.3334355056285858,
+      "learning_rate": 4.7148288973384035e-06,
+      "loss": 0.2964,
+      "step": 124
+    },
+    {
+      "epoch": 2.314814814814815,
+      "grad_norm": 0.33771565556526184,
+      "learning_rate": 4.752851711026617e-06,
+      "loss": 0.2815,
+      "step": 125
+    },
+    {
+      "epoch": 2.314814814814815,
+      "eval_loss": 0.010651620104908943,
+      "eval_runtime": 1437.6428,
+      "eval_samples_per_second": 139.117,
+      "eval_steps_per_second": 1.087,
+      "eval_test_triplet_cosine_accuracy": 0.9766299724578857,
+      "step": 125
     }
   ],
   "logging_steps": 1.0,