Training in progress, step 125, checkpoint
Browse files- last-checkpoint/2_Dense/model.safetensors +1 -1
- last-checkpoint/README.md +30 -2
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +186 -2
last-checkpoint/2_Dense/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2362528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cb909a27144c79c2c3696fd456cf85205a067d02dcbcb09a888374d047b2284
|
| 3 |
size 2362528
|
last-checkpoint/README.md
CHANGED
|
@@ -63,7 +63,7 @@ model-index:
|
|
| 63 |
type: test_triplet
|
| 64 |
metrics:
|
| 65 |
- type: cosine_accuracy
|
| 66 |
-
value: 0.
|
| 67 |
name: Cosine Accuracy
|
| 68 |
---
|
| 69 |
|
|
@@ -166,7 +166,7 @@ You can finetune this model on your own dataset.
|
|
| 166 |
|
| 167 |
| Metric | Value |
|
| 168 |
|:--------------------|:-----------|
|
| 169 |
-
| **cosine_accuracy** | **0.
|
| 170 |
|
| 171 |
<!--
|
| 172 |
## Bias, Risks and Limitations
|
|
@@ -370,6 +370,8 @@ You can finetune this model on your own dataset.
|
|
| 370 |
</details>
|
| 371 |
|
| 372 |
### Training Logs
|
|
|
|
|
|
|
| 373 |
| Epoch | Step | Training Loss | Validation Loss | test_triplet_cosine_accuracy |
|
| 374 |
|:------:|:----:|:-------------:|:---------------:|:----------------------------:|
|
| 375 |
| 0.0185 | 1 | 2.3684 | - | - |
|
|
@@ -472,7 +474,33 @@ You can finetune this model on your own dataset.
|
|
| 472 |
| 1.8148 | 98 | 0.4057 | - | - |
|
| 473 |
| 1.8333 | 99 | 0.4018 | - | - |
|
| 474 |
| 1.8519 | 100 | 0.3852 | 0.0139 | 0.9753 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
|
|
|
|
| 476 |
|
| 477 |
### Framework Versions
|
| 478 |
- Python: 3.11.10
|
|
|
|
| 63 |
type: test_triplet
|
| 64 |
metrics:
|
| 65 |
- type: cosine_accuracy
|
| 66 |
+
value: 0.9766299724578857
|
| 67 |
name: Cosine Accuracy
|
| 68 |
---
|
| 69 |
|
|
|
|
| 166 |
|
| 167 |
| Metric | Value |
|
| 168 |
|:--------------------|:-----------|
|
| 169 |
+
| **cosine_accuracy** | **0.9766** |
|
| 170 |
|
| 171 |
<!--
|
| 172 |
## Bias, Risks and Limitations
|
|
|
|
| 370 |
</details>
|
| 371 |
|
| 372 |
### Training Logs
|
| 373 |
+
<details><summary>Click to expand</summary>
|
| 374 |
+
|
| 375 |
| Epoch | Step | Training Loss | Validation Loss | test_triplet_cosine_accuracy |
|
| 376 |
|:------:|:----:|:-------------:|:---------------:|:----------------------------:|
|
| 377 |
| 0.0185 | 1 | 2.3684 | - | - |
|
|
|
|
| 474 |
| 1.8148 | 98 | 0.4057 | - | - |
|
| 475 |
| 1.8333 | 99 | 0.4018 | - | - |
|
| 476 |
| 1.8519 | 100 | 0.3852 | 0.0139 | 0.9753 |
|
| 477 |
+
| 1.8704 | 101 | 0.389 | - | - |
|
| 478 |
+
| 1.8889 | 102 | 0.3801 | - | - |
|
| 479 |
+
| 1.9074 | 103 | 0.3896 | - | - |
|
| 480 |
+
| 1.9259 | 104 | 0.3759 | - | - |
|
| 481 |
+
| 1.9444 | 105 | 0.3614 | - | - |
|
| 482 |
+
| 1.9630 | 106 | 0.3616 | - | - |
|
| 483 |
+
| 1.9815 | 107 | 0.3422 | - | - |
|
| 484 |
+
| 2.0 | 108 | 0.3516 | - | - |
|
| 485 |
+
| 2.0185 | 109 | 0.3507 | - | - |
|
| 486 |
+
| 2.0370 | 110 | 0.3387 | - | - |
|
| 487 |
+
| 2.0556 | 111 | 0.343 | - | - |
|
| 488 |
+
| 2.0741 | 112 | 0.3335 | - | - |
|
| 489 |
+
| 2.0926 | 113 | 0.3356 | - | - |
|
| 490 |
+
| 2.1111 | 114 | 0.3262 | - | - |
|
| 491 |
+
| 2.1296 | 115 | 0.3236 | - | - |
|
| 492 |
+
| 2.1481 | 116 | 0.3201 | - | - |
|
| 493 |
+
| 2.1667 | 117 | 0.3267 | - | - |
|
| 494 |
+
| 2.1852 | 118 | 0.3148 | - | - |
|
| 495 |
+
| 2.2037 | 119 | 0.3106 | - | - |
|
| 496 |
+
| 2.2222 | 120 | 0.3033 | - | - |
|
| 497 |
+
| 2.2407 | 121 | 0.3065 | - | - |
|
| 498 |
+
| 2.2593 | 122 | 0.3144 | - | - |
|
| 499 |
+
| 2.2778 | 123 | 0.3038 | - | - |
|
| 500 |
+
| 2.2963 | 124 | 0.2964 | - | - |
|
| 501 |
+
| 2.3148 | 125 | 0.2815 | 0.0107 | 0.9766 |
|
| 502 |
|
| 503 |
+
</details>
|
| 504 |
|
| 505 |
### Framework Versions
|
| 506 |
- Python: 3.11.10
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735216376
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e94ca9f9fdd6d49ffff540c9f9d385935382f10e3f2943f92e30e1871f347f83
|
| 3 |
size 735216376
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475248442
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e92ade79793f892079f888a400ffaf066ba0e86ba5464de20ec73386ac0cbe65
|
| 3 |
size 1475248442
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7736b135b017b69655ba8b256de8aea1a6b07e6818a8621590b426bb44756637
|
| 3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4074f79493d0a292d6a1c4465b81109d6a1fd1e81ec948afd225baecf3487025
|
| 3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:769c6529f9b35f0d735e9083fceb569f3e17436ad61cffeb840ec9c5da71bd58
|
| 3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55b85d33ea8e722e42a1ff66b9ce272cb9fb9e0bf7b4a30dbf8e7f3c63963848
|
| 3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:563e34ff36505679e01719934aab5fd2d7d8d57d14bfea65f629ed1ab03f260a
|
| 3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:556732e873de3e6d20bf7c6814e688f9cabff60282f4996c2ec00cdd4ebdfc0e
|
| 3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5e5661b0152037bd6dced5ec3cf2d9c483f640876f636848f4954dda8bcbc8a
|
| 3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcc6b05d88fc5258c37994b60862c65e18759b3bd71f7365208bf093d47b2f78
|
| 3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2eed4f655e6aa8c517c2a6ce3c52b9c67030900524cdae3a5f415994b0a09da3
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 25,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -743,6 +743,190 @@
|
|
| 743 |
"eval_steps_per_second": 1.084,
|
| 744 |
"eval_test_triplet_cosine_accuracy": 0.9753350019454956,
|
| 745 |
"step": 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
}
|
| 747 |
],
|
| 748 |
"logging_steps": 1.0,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.314814814814815,
|
| 5 |
"eval_steps": 25,
|
| 6 |
+
"global_step": 125,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 743 |
"eval_steps_per_second": 1.084,
|
| 744 |
"eval_test_triplet_cosine_accuracy": 0.9753350019454956,
|
| 745 |
"step": 100
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 1.8703703703703702,
|
| 749 |
+
"grad_norm": 0.5853272080421448,
|
| 750 |
+
"learning_rate": 3.840304182509506e-06,
|
| 751 |
+
"loss": 0.389,
|
| 752 |
+
"step": 101
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 1.8888888888888888,
|
| 756 |
+
"grad_norm": 0.5876256823539734,
|
| 757 |
+
"learning_rate": 3.8783269961977185e-06,
|
| 758 |
+
"loss": 0.3801,
|
| 759 |
+
"step": 102
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"epoch": 1.9074074074074074,
|
| 763 |
+
"grad_norm": 0.5726855993270874,
|
| 764 |
+
"learning_rate": 3.916349809885932e-06,
|
| 765 |
+
"loss": 0.3896,
|
| 766 |
+
"step": 103
|
| 767 |
+
},
|
| 768 |
+
{
|
| 769 |
+
"epoch": 1.925925925925926,
|
| 770 |
+
"grad_norm": 0.5248777270317078,
|
| 771 |
+
"learning_rate": 3.9543726235741444e-06,
|
| 772 |
+
"loss": 0.3759,
|
| 773 |
+
"step": 104
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 1.9444444444444444,
|
| 777 |
+
"grad_norm": 0.5145537257194519,
|
| 778 |
+
"learning_rate": 3.992395437262358e-06,
|
| 779 |
+
"loss": 0.3614,
|
| 780 |
+
"step": 105
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 1.9629629629629628,
|
| 784 |
+
"grad_norm": 0.5256109237670898,
|
| 785 |
+
"learning_rate": 4.03041825095057e-06,
|
| 786 |
+
"loss": 0.3616,
|
| 787 |
+
"step": 106
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"epoch": 1.9814814814814814,
|
| 791 |
+
"grad_norm": 0.5038356184959412,
|
| 792 |
+
"learning_rate": 4.068441064638784e-06,
|
| 793 |
+
"loss": 0.3422,
|
| 794 |
+
"step": 107
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 2.0,
|
| 798 |
+
"grad_norm": 0.4888509213924408,
|
| 799 |
+
"learning_rate": 4.106463878326996e-06,
|
| 800 |
+
"loss": 0.3516,
|
| 801 |
+
"step": 108
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 2.0185185185185186,
|
| 805 |
+
"grad_norm": 0.4632491171360016,
|
| 806 |
+
"learning_rate": 4.14448669201521e-06,
|
| 807 |
+
"loss": 0.3507,
|
| 808 |
+
"step": 109
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 2.037037037037037,
|
| 812 |
+
"grad_norm": 0.47965696454048157,
|
| 813 |
+
"learning_rate": 4.182509505703423e-06,
|
| 814 |
+
"loss": 0.3387,
|
| 815 |
+
"step": 110
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 2.0555555555555554,
|
| 819 |
+
"grad_norm": 0.46976569294929504,
|
| 820 |
+
"learning_rate": 4.2205323193916355e-06,
|
| 821 |
+
"loss": 0.343,
|
| 822 |
+
"step": 111
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 2.074074074074074,
|
| 826 |
+
"grad_norm": 0.4613853096961975,
|
| 827 |
+
"learning_rate": 4.258555133079848e-06,
|
| 828 |
+
"loss": 0.3335,
|
| 829 |
+
"step": 112
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 2.0925925925925926,
|
| 833 |
+
"grad_norm": 0.44832512736320496,
|
| 834 |
+
"learning_rate": 4.2965779467680614e-06,
|
| 835 |
+
"loss": 0.3356,
|
| 836 |
+
"step": 113
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 2.111111111111111,
|
| 840 |
+
"grad_norm": 0.42677590250968933,
|
| 841 |
+
"learning_rate": 4.334600760456274e-06,
|
| 842 |
+
"loss": 0.3262,
|
| 843 |
+
"step": 114
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 2.1296296296296298,
|
| 847 |
+
"grad_norm": 0.4223364293575287,
|
| 848 |
+
"learning_rate": 4.372623574144487e-06,
|
| 849 |
+
"loss": 0.3236,
|
| 850 |
+
"step": 115
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 2.148148148148148,
|
| 854 |
+
"grad_norm": 0.47711557149887085,
|
| 855 |
+
"learning_rate": 4.4106463878327e-06,
|
| 856 |
+
"loss": 0.3201,
|
| 857 |
+
"step": 116
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 2.1666666666666665,
|
| 861 |
+
"grad_norm": 0.412469744682312,
|
| 862 |
+
"learning_rate": 4.448669201520912e-06,
|
| 863 |
+
"loss": 0.3267,
|
| 864 |
+
"step": 117
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 2.185185185185185,
|
| 868 |
+
"grad_norm": 0.41616860032081604,
|
| 869 |
+
"learning_rate": 4.486692015209126e-06,
|
| 870 |
+
"loss": 0.3148,
|
| 871 |
+
"step": 118
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 2.2037037037037037,
|
| 875 |
+
"grad_norm": 0.39600443840026855,
|
| 876 |
+
"learning_rate": 4.524714828897338e-06,
|
| 877 |
+
"loss": 0.3106,
|
| 878 |
+
"step": 119
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 2.2222222222222223,
|
| 882 |
+
"grad_norm": 0.378671258687973,
|
| 883 |
+
"learning_rate": 4.562737642585552e-06,
|
| 884 |
+
"loss": 0.3033,
|
| 885 |
+
"step": 120
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 2.240740740740741,
|
| 889 |
+
"grad_norm": 0.3655548393726349,
|
| 890 |
+
"learning_rate": 4.600760456273764e-06,
|
| 891 |
+
"loss": 0.3065,
|
| 892 |
+
"step": 121
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 2.259259259259259,
|
| 896 |
+
"grad_norm": 0.38126257061958313,
|
| 897 |
+
"learning_rate": 4.638783269961978e-06,
|
| 898 |
+
"loss": 0.3144,
|
| 899 |
+
"step": 122
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 2.2777777777777777,
|
| 903 |
+
"grad_norm": 0.3792165517807007,
|
| 904 |
+
"learning_rate": 4.67680608365019e-06,
|
| 905 |
+
"loss": 0.3038,
|
| 906 |
+
"step": 123
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 2.2962962962962963,
|
| 910 |
+
"grad_norm": 0.3334355056285858,
|
| 911 |
+
"learning_rate": 4.7148288973384035e-06,
|
| 912 |
+
"loss": 0.2964,
|
| 913 |
+
"step": 124
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 2.314814814814815,
|
| 917 |
+
"grad_norm": 0.33771565556526184,
|
| 918 |
+
"learning_rate": 4.752851711026617e-06,
|
| 919 |
+
"loss": 0.2815,
|
| 920 |
+
"step": 125
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 2.314814814814815,
|
| 924 |
+
"eval_loss": 0.010651620104908943,
|
| 925 |
+
"eval_runtime": 1437.6428,
|
| 926 |
+
"eval_samples_per_second": 139.117,
|
| 927 |
+
"eval_steps_per_second": 1.087,
|
| 928 |
+
"eval_test_triplet_cosine_accuracy": 0.9766299724578857,
|
| 929 |
+
"step": 125
|
| 930 |
}
|
| 931 |
],
|
| 932 |
"logging_steps": 1.0,
|