bobox's picture
d
75fdb07 verified
|
raw
history blame
66.8 kB
metadata
base_model: BXresearch/DeBERTa2-0.9B-ST-v2
datasets:
  - sentence-transformers/stsb
language:
  - en
library_name: sentence-transformers
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
  - cosine_accuracy
  - cosine_accuracy_threshold
  - cosine_f1
  - cosine_f1_threshold
  - cosine_precision
  - cosine_recall
  - cosine_ap
  - dot_accuracy
  - dot_accuracy_threshold
  - dot_f1
  - dot_f1_threshold
  - dot_precision
  - dot_recall
  - dot_ap
  - manhattan_accuracy
  - manhattan_accuracy_threshold
  - manhattan_f1
  - manhattan_f1_threshold
  - manhattan_precision
  - manhattan_recall
  - manhattan_ap
  - euclidean_accuracy
  - euclidean_accuracy_threshold
  - euclidean_f1
  - euclidean_f1_threshold
  - euclidean_precision
  - euclidean_recall
  - euclidean_ap
  - max_accuracy
  - max_accuracy_threshold
  - max_f1
  - max_f1_threshold
  - max_precision
  - max_recall
  - max_ap
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:5749
  - loss:AnglELoss
widget:
  - source_sentence: Left side of a silver train engine.
    sentences:
      - A close-up of a black train engine.
      - Two boys are in midair jumping into an inground pool.
      - An older Asian couple poses with a newborn baby at the dinner table.
  - source_sentence: Four girls in swimsuits are playing volleyball at the beach.
    sentences:
      - A little girl is walking down a hallway.
      - The man is erasing the chalk board.
      - Four women in bikinis are playing volleyball on the beach.
  - source_sentence: A woman is cooking meat.
    sentences:
      - The dogs are alone in the forest.
      - A man is speaking.
      - A dog jumps through a hoop.
  - source_sentence: A person is folding a square paper piece.
    sentences:
      - A woman is carrying her baby.
      - A person folds a piece of paper.
      - A dog is trying to get through his dog door.
  - source_sentence: The boy is playing the piano.
    sentences:
      - The woman is pouring oil into the pan.
      - A small black and white dog is swimming in water.
      - Two brown dogs are playing with each other in the snow.
model-index:
  - name: SentenceTransformer based on BXresearch/DeBERTa2-0.9B-ST-v2
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test
          type: sts-test
        metrics:
          - type: pearson_cosine
            value: 0.9174070307741418
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.9292509717696739
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.9282688885676256
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.9298350652202988
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.9286763713344532
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.9301882421673056
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.9015673628485675
            name: Pearson Dot
          - type: spearman_dot
            value: 0.9062672614479156
            name: Spearman Dot
          - type: pearson_max
            value: 0.9286763713344532
            name: Pearson Max
          - type: spearman_max
            value: 0.9301882421673056
            name: Spearman Max
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: allNLI dev
          type: allNLI-dev
        metrics:
          - type: cosine_accuracy
            value: 0.75390625
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.7934484481811523
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.6263736263736264
            name: Cosine F1
          - type: cosine_f1_threshold
            value: 0.7287859916687012
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.5643564356435643
            name: Cosine Precision
          - type: cosine_recall
            value: 0.7037037037037037
            name: Cosine Recall
          - type: cosine_ap
            value: 0.5952488621962656
            name: Cosine Ap
          - type: dot_accuracy
            value: 0.74609375
            name: Dot Accuracy
          - type: dot_accuracy_threshold
            value: 853.7699584960938
            name: Dot Accuracy Threshold
          - type: dot_f1
            value: 0.6106194690265486
            name: Dot F1
          - type: dot_f1_threshold
            value: 685.536865234375
            name: Dot F1 Threshold
          - type: dot_precision
            value: 0.47586206896551725
            name: Dot Precision
          - type: dot_recall
            value: 0.8518518518518519
            name: Dot Recall
          - type: dot_ap
            value: 0.5773093883122924
            name: Dot Ap
          - type: manhattan_accuracy
            value: 0.75390625
            name: Manhattan Accuracy
          - type: manhattan_accuracy_threshold
            value: 654.8433227539062
            name: Manhattan Accuracy Threshold
          - type: manhattan_f1
            value: 0.6244343891402715
            name: Manhattan F1
          - type: manhattan_f1_threshold
            value: 811.658203125
            name: Manhattan F1 Threshold
          - type: manhattan_precision
            value: 0.4928571428571429
            name: Manhattan Precision
          - type: manhattan_recall
            value: 0.8518518518518519
            name: Manhattan Recall
          - type: manhattan_ap
            value: 0.596555546112473
            name: Manhattan Ap
          - type: euclidean_accuracy
            value: 0.75390625
            name: Euclidean Accuracy
          - type: euclidean_accuracy_threshold
            value: 21.04879379272461
            name: Euclidean Accuracy Threshold
          - type: euclidean_f1
            value: 0.6244343891402715
            name: Euclidean F1
          - type: euclidean_f1_threshold
            value: 26.11341094970703
            name: Euclidean F1 Threshold
          - type: euclidean_precision
            value: 0.4928571428571429
            name: Euclidean Precision
          - type: euclidean_recall
            value: 0.8518518518518519
            name: Euclidean Recall
          - type: euclidean_ap
            value: 0.595001077180561
            name: Euclidean Ap
          - type: max_accuracy
            value: 0.75390625
            name: Max Accuracy
          - type: max_accuracy_threshold
            value: 853.7699584960938
            name: Max Accuracy Threshold
          - type: max_f1
            value: 0.6263736263736264
            name: Max F1
          - type: max_f1_threshold
            value: 811.658203125
            name: Max F1 Threshold
          - type: max_precision
            value: 0.5643564356435643
            name: Max Precision
          - type: max_recall
            value: 0.8518518518518519
            name: Max Recall
          - type: max_ap
            value: 0.596555546112473
            name: Max Ap
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: Qnli dev
          type: Qnli-dev
        metrics:
          - type: cosine_accuracy
            value: 0.71484375
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.7152643799781799
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.7224334600760456
            name: Cosine F1
          - type: cosine_f1_threshold
            value: 0.6804982423782349
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.6785714285714286
            name: Cosine Precision
          - type: cosine_recall
            value: 0.7723577235772358
            name: Cosine Recall
          - type: cosine_ap
            value: 0.7550328500735501
            name: Cosine Ap
          - type: dot_accuracy
            value: 0.69140625
            name: Dot Accuracy
          - type: dot_accuracy_threshold
            value: 720.3964233398438
            name: Dot Accuracy Threshold
          - type: dot_f1
            value: 0.7058823529411764
            name: Dot F1
          - type: dot_f1_threshold
            value: 706.561279296875
            name: Dot F1 Threshold
          - type: dot_precision
            value: 0.6442953020134228
            name: Dot Precision
          - type: dot_recall
            value: 0.7804878048780488
            name: Dot Recall
          - type: dot_ap
            value: 0.7012253433472802
            name: Dot Ap
          - type: manhattan_accuracy
            value: 0.72265625
            name: Manhattan Accuracy
          - type: manhattan_accuracy_threshold
            value: 760.7179565429688
            name: Manhattan Accuracy Threshold
          - type: manhattan_f1
            value: 0.7279693486590038
            name: Manhattan F1
          - type: manhattan_f1_threshold
            value: 807.8878173828125
            name: Manhattan F1 Threshold
          - type: manhattan_precision
            value: 0.6884057971014492
            name: Manhattan Precision
          - type: manhattan_recall
            value: 0.7723577235772358
            name: Manhattan Recall
          - type: manhattan_ap
            value: 0.7705323139232185
            name: Manhattan Ap
          - type: euclidean_accuracy
            value: 0.7265625
            name: Euclidean Accuracy
          - type: euclidean_accuracy_threshold
            value: 25.634429931640625
            name: Euclidean Accuracy Threshold
          - type: euclidean_f1
            value: 0.7244094488188976
            name: Euclidean F1
          - type: euclidean_f1_threshold
            value: 25.634429931640625
            name: Euclidean F1 Threshold
          - type: euclidean_precision
            value: 0.7022900763358778
            name: Euclidean Precision
          - type: euclidean_recall
            value: 0.7479674796747967
            name: Euclidean Recall
          - type: euclidean_ap
            value: 0.7674294690555423
            name: Euclidean Ap
          - type: max_accuracy
            value: 0.7265625
            name: Max Accuracy
          - type: max_accuracy_threshold
            value: 760.7179565429688
            name: Max Accuracy Threshold
          - type: max_f1
            value: 0.7279693486590038
            name: Max F1
          - type: max_f1_threshold
            value: 807.8878173828125
            name: Max F1 Threshold
          - type: max_precision
            value: 0.7022900763358778
            name: Max Precision
          - type: max_recall
            value: 0.7804878048780488
            name: Max Recall
          - type: max_ap
            value: 0.7705323139232185
            name: Max Ap

SentenceTransformer based on BXresearch/DeBERTa2-0.9B-ST-v2

This is a sentence-transformers model finetuned from BXresearch/DeBERTa2-0.9B-ST-v2 on the sentence-transformers/stsb dataset. It maps sentences & paragraphs to a 1536-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: DebertaV2Model 
  (1): Pooling({'word_embedding_dimension': 1536, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("bobox/DeBERTa2-0.9B-ST-stsb")
# Run inference
sentences = [
    'The boy is playing the piano.',
    'The woman is pouring oil into the pan.',
    'A small black and white dog is swimming in water.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 1536]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.9174
spearman_cosine 0.9293
pearson_manhattan 0.9283
spearman_manhattan 0.9298
pearson_euclidean 0.9287
spearman_euclidean 0.9302
pearson_dot 0.9016
spearman_dot 0.9063
pearson_max 0.9287
spearman_max 0.9302

Binary Classification

Metric Value
cosine_accuracy 0.7539
cosine_accuracy_threshold 0.7934
cosine_f1 0.6264
cosine_f1_threshold 0.7288
cosine_precision 0.5644
cosine_recall 0.7037
cosine_ap 0.5952
dot_accuracy 0.7461
dot_accuracy_threshold 853.77
dot_f1 0.6106
dot_f1_threshold 685.5369
dot_precision 0.4759
dot_recall 0.8519
dot_ap 0.5773
manhattan_accuracy 0.7539
manhattan_accuracy_threshold 654.8433
manhattan_f1 0.6244
manhattan_f1_threshold 811.6582
manhattan_precision 0.4929
manhattan_recall 0.8519
manhattan_ap 0.5966
euclidean_accuracy 0.7539
euclidean_accuracy_threshold 21.0488
euclidean_f1 0.6244
euclidean_f1_threshold 26.1134
euclidean_precision 0.4929
euclidean_recall 0.8519
euclidean_ap 0.595
max_accuracy 0.7539
max_accuracy_threshold 853.77
max_f1 0.6264
max_f1_threshold 811.6582
max_precision 0.5644
max_recall 0.8519
max_ap 0.5966

Binary Classification

Metric Value
cosine_accuracy 0.7148
cosine_accuracy_threshold 0.7153
cosine_f1 0.7224
cosine_f1_threshold 0.6805
cosine_precision 0.6786
cosine_recall 0.7724
cosine_ap 0.755
dot_accuracy 0.6914
dot_accuracy_threshold 720.3964
dot_f1 0.7059
dot_f1_threshold 706.5613
dot_precision 0.6443
dot_recall 0.7805
dot_ap 0.7012
manhattan_accuracy 0.7227
manhattan_accuracy_threshold 760.718
manhattan_f1 0.728
manhattan_f1_threshold 807.8878
manhattan_precision 0.6884
manhattan_recall 0.7724
manhattan_ap 0.7705
euclidean_accuracy 0.7266
euclidean_accuracy_threshold 25.6344
euclidean_f1 0.7244
euclidean_f1_threshold 25.6344
euclidean_precision 0.7023
euclidean_recall 0.748
euclidean_ap 0.7674
max_accuracy 0.7266
max_accuracy_threshold 760.718
max_f1 0.728
max_f1_threshold 807.8878
max_precision 0.7023
max_recall 0.7805
max_ap 0.7705

Training Details

Training Dataset

sentence-transformers/stsb

  • Dataset: sentence-transformers/stsb at ab7a5ac
  • Size: 5,749 training samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 6 tokens
    • mean: 9.81 tokens
    • max: 27 tokens
    • min: 5 tokens
    • mean: 9.74 tokens
    • max: 25 tokens
    • min: 0.0
    • mean: 0.54
    • max: 1.0
  • Samples:
    sentence1 sentence2 score
    A plane is taking off. An air plane is taking off. 1.0
    A man is playing a large flute. A man is playing a flute. 0.76
    A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 0.76
  • Loss: AnglELoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "pairwise_angle_sim"
    }
    

Evaluation Dataset

sentence-transformers/stsb

  • Dataset: sentence-transformers/stsb at ab7a5ac
  • Size: 512 evaluation samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 6 tokens
    • mean: 11.16 tokens
    • max: 26 tokens
    • min: 6 tokens
    • mean: 11.17 tokens
    • max: 23 tokens
    • min: 0.0
    • mean: 0.47
    • max: 1.0
  • Samples:
    sentence1 sentence2 score
    A man with a hard hat is dancing. A man wearing a hard hat is dancing. 1.0
    A young child is riding a horse. A child is riding a horse. 0.95
    A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 1.0
  • Loss: AnglELoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "pairwise_angle_sim"
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_eval_batch_size: 256
  • gradient_accumulation_steps: 2
  • learning_rate: 1.5e-05
  • weight_decay: 5e-05
  • num_train_epochs: 2
  • lr_scheduler_type: cosine_with_min_lr
  • lr_scheduler_kwargs: {'num_cycles': 0.5, 'min_lr': 2e-06}
  • warmup_ratio: 0.2
  • save_safetensors: False
  • fp16: True
  • push_to_hub: True
  • hub_model_id: bobox/DeBERTa2-0.9B-ST-stsb-checkpoints-tmp
  • hub_strategy: all_checkpoints
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 8
  • per_device_eval_batch_size: 256
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 2
  • eval_accumulation_steps: None
  • learning_rate: 1.5e-05
  • weight_decay: 5e-05
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 2
  • max_steps: -1
  • lr_scheduler_type: cosine_with_min_lr
  • lr_scheduler_kwargs: {'num_cycles': 0.5, 'min_lr': 2e-06}
  • warmup_ratio: 0.2
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: False
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: True
  • resume_from_checkpoint: None
  • hub_model_id: bobox/DeBERTa2-0.9B-ST-stsb-checkpoints-tmp
  • hub_strategy: all_checkpoints
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Click to expand
Epoch Step Training Loss loss Qnli-dev_max_ap allNLI-dev_max_ap sts-test_spearman_cosine
0.0056 2 2.6549 - - - -
0.0111 4 2.7355 - - - -
0.0167 6 3.6211 - - - -
0.0223 8 3.0686 - - - -
0.0278 10 3.4113 - - - -
0.0334 12 2.4857 - - - -
0.0389 14 2.4288 - - - -
0.0445 16 2.6203 - - - -
0.0501 18 2.7441 - - - -
0.0556 20 3.4263 - - - -
0.0612 22 2.3565 - - - -
0.0668 24 2.5596 - - - -
0.0723 26 3.0866 - - - -
0.0779 28 3.223 - - - -
0.0834 30 2.012 - - - -
0.0890 32 3.2829 - - - -
0.0946 34 3.9277 - - - -
0.1001 36 2.785 2.6652 0.7960 0.6275 0.9294
0.1057 38 3.4966 - - - -
0.1113 40 2.5923 - - - -
0.1168 42 3.4418 - - - -
0.1224 44 2.6519 - - - -
0.1280 46 3.7746 - - - -
0.1335 48 2.6736 - - - -
0.1391 50 3.6764 - - - -
0.1446 52 3.5311 - - - -
0.1502 54 2.5869 - - - -
0.1558 56 3.183 - - - -
0.1613 58 2.747 - - - -
0.1669 60 1.965 - - - -
0.1725 62 2.1785 - - - -
0.1780 64 2.5788 - - - -
0.1836 66 3.1776 - - - -
0.1892 68 2.6464 - - - -
0.1947 70 2.7619 - - - -
0.2003 72 3.0911 2.6171 0.7923 0.6295 0.9276
0.2058 74 2.4308 - - - -
0.2114 76 3.2068 - - - -
0.2170 78 2.4081 - - - -
0.2225 80 2.3257 - - - -
0.2281 82 3.0499 - - - -
0.2337 84 3.2518 - - - -
0.2392 86 2.7876 - - - -
0.2448 88 2.7898 - - - -
0.2503 90 2.7116 - - - -
0.2559 92 3.0505 - - - -
0.2615 94 2.5901 - - - -
0.2670 96 1.9563 - - - -
0.2726 98 2.1006 - - - -
0.2782 100 2.1853 - - - -
0.2837 102 2.327 - - - -
0.2893 104 1.9937 - - - -
0.2949 106 2.543 - - - -
0.3004 108 1.9826 2.4596 0.7919 0.6329 0.9341
0.3060 110 3.0746 - - - -
0.3115 112 2.4145 - - - -
0.3171 114 2.244 - - - -
0.3227 116 2.78 - - - -
0.3282 118 2.8323 - - - -
0.3338 120 2.4639 - - - -
0.3394 122 2.9216 - - - -
0.3449 124 2.0747 - - - -
0.3505 126 2.7573 - - - -
0.3561 128 3.7019 - - - -
0.3616 130 3.3155 - - - -
0.3672 132 3.625 - - - -
0.3727 134 3.2889 - - - -
0.3783 136 3.5936 - - - -
0.3839 138 3.5932 - - - -
0.3894 140 3.0457 - - - -
0.3950 142 3.093 - - - -
0.4006 144 2.7189 2.4698 0.7752 0.5896 0.9346
0.4061 146 3.2578 - - - -
0.4117 148 3.3581 - - - -
0.4172 150 2.9734 - - - -
0.4228 152 3.0514 - - - -
0.4284 154 3.1983 - - - -
0.4339 156 2.9033 - - - -
0.4395 158 2.9279 - - - -
0.4451 160 3.1336 - - - -
0.4506 162 3.1467 - - - -
0.4562 164 3.0381 - - - -
0.4618 166 3.068 - - - -
0.4673 168 3.0261 - - - -
0.4729 170 3.2867 - - - -
0.4784 172 2.8474 - - - -
0.4840 174 2.7982 - - - -
0.4896 176 2.7945 - - - -
0.4951 178 3.1312 - - - -
0.5007 180 2.9704 2.4640 0.7524 0.6033 0.9242
0.5063 182 2.9856 - - - -
0.5118 184 3.014 - - - -
0.5174 186 3.0125 - - - -
0.5229 188 2.8149 - - - -
0.5285 190 2.7954 - - - -
0.5341 192 3.078 - - - -
0.5396 194 2.955 - - - -
0.5452 196 2.9468 - - - -
0.5508 198 3.0791 - - - -
0.5563 200 2.998 - - - -
0.5619 202 2.9068 - - - -
0.5675 204 2.8283 - - - -
0.5730 206 2.9216 - - - -
0.5786 208 3.3441 - - - -
0.5841 210 3.0 - - - -
0.5897 212 2.9023 - - - -
0.5953 214 2.8177 - - - -
0.6008 216 2.8262 2.4979 0.7899 0.6037 0.9260
0.6064 218 2.7832 - - - -
0.6120 220 3.0085 - - - -
0.6175 222 2.8762 - - - -
0.6231 224 3.147 - - - -
0.6287 226 3.4262 - - - -
0.6342 228 2.8271 - - - -
0.6398 230 2.4024 - - - -
0.6453 232 2.7556 - - - -
0.6509 234 3.4652 - - - -
0.6565 236 2.7235 - - - -
0.6620 238 2.6498 - - - -
0.6676 240 3.0933 - - - -
0.6732 242 3.1193 - - - -
0.6787 244 2.7249 - - - -
0.6843 246 2.8931 - - - -
0.6898 248 2.7913 - - - -
0.6954 250 2.6933 - - - -
0.7010 252 2.5632 2.4585 0.7700 0.6065 0.9298
0.7065 254 2.8347 - - - -
0.7121 256 2.3827 - - - -
0.7177 258 2.9065 - - - -
0.7232 260 2.8162 - - - -
0.7288 262 2.5485 - - - -
0.7344 264 2.5751 - - - -
0.7399 266 2.9056 - - - -
0.7455 268 3.1397 - - - -
0.7510 270 3.3107 - - - -
0.7566 272 2.9024 - - - -
0.7622 274 2.2307 - - - -
0.7677 276 3.0097 - - - -
0.7733 278 3.1406 - - - -
0.7789 280 2.6786 - - - -
0.7844 282 2.8882 - - - -
0.7900 284 2.7215 - - - -
0.7955 286 3.4188 - - - -
0.8011 288 2.9901 2.4414 0.7665 0.6023 0.9288
0.8067 290 2.5144 - - - -
0.8122 292 3.1932 - - - -
0.8178 294 2.9733 - - - -
0.8234 296 2.6895 - - - -
0.8289 298 2.678 - - - -
0.8345 300 2.5462 - - - -
0.8401 302 2.6911 - - - -
0.8456 304 2.8404 - - - -
0.8512 306 2.5358 - - - -
0.8567 308 3.1245 - - - -
0.8623 310 2.3404 - - - -
0.8679 312 3.0751 - - - -
0.8734 314 2.7005 - - - -
0.8790 316 2.7387 - - - -
0.8846 318 2.7227 - - - -
0.8901 320 2.9085 - - - -
0.8957 322 3.3239 - - - -
0.9013 324 2.4256 2.4106 0.7644 0.6087 0.9304
0.9068 326 2.5059 - - - -
0.9124 328 2.5387 - - - -
0.9179 330 2.899 - - - -
0.9235 332 2.7256 - - - -
0.9291 334 2.4862 - - - -
0.9346 336 3.0014 - - - -
0.9402 338 2.4164 - - - -
0.9458 340 2.3148 - - - -
0.9513 342 2.9414 - - - -
0.9569 344 2.4435 - - - -
0.9624 346 2.6286 - - - -
0.9680 348 2.1744 - - - -
0.9736 350 2.5866 - - - -
0.9791 352 2.8333 - - - -
0.9847 354 2.3544 - - - -
0.9903 356 2.5397 - - - -
0.9958 358 3.4058 - - - -
1.0014 360 2.2904 2.4089 0.7888 0.6104 0.9338
1.0070 362 2.7925 - - - -
1.0125 364 2.6415 - - - -
1.0181 366 2.724 - - - -
1.0236 368 2.569 - - - -
1.0292 370 2.808 - - - -
1.0348 372 2.4672 - - - -
1.0403 374 2.3964 - - - -
1.0459 376 2.3518 - - - -
1.0515 378 2.7617 - - - -
1.0570 380 2.5651 - - - -
1.0626 382 2.2623 - - - -
1.0682 384 2.2048 - - - -
1.0737 386 2.1426 - - - -
1.0793 388 1.8182 - - - -
1.0848 390 2.3166 - - - -
1.0904 392 2.4101 - - - -
1.0960 394 2.8932 - - - -
1.1015 396 3.0201 2.4217 0.7851 0.6205 0.9301
1.1071 398 2.6101 - - - -
1.1127 400 2.3627 - - - -
1.1182 402 2.5402 - - - -
1.1238 404 2.695 - - - -
1.1293 406 3.0563 - - - -
1.1349 408 2.2296 - - - -
1.1405 410 3.057 - - - -
1.1460 412 2.8023 - - - -
1.1516 414 2.6492 - - - -
1.1572 416 2.2406 - - - -
1.1627 418 1.7195 - - - -
1.1683 420 2.2773 - - - -
1.1739 422 2.3639 - - - -
1.1794 424 2.3348 - - - -
1.1850 426 2.6791 - - - -
1.1905 428 2.3621 - - - -
1.1961 430 2.5224 - - - -
1.2017 432 2.4063 2.4724 0.7628 0.6043 0.9270
1.2072 434 1.9713 - - - -
1.2128 436 2.4265 - - - -
1.2184 438 2.0827 - - - -
1.2239 440 2.0696 - - - -
1.2295 442 2.7507 - - - -
1.2350 444 2.5436 - - - -
1.2406 446 2.4039 - - - -
1.2462 448 2.4229 - - - -
1.2517 450 2.323 - - - -
1.2573 452 2.6099 - - - -
1.2629 454 2.0329 - - - -
1.2684 456 1.8797 - - - -
1.2740 458 1.4485 - - - -
1.2796 460 1.6794 - - - -
1.2851 462 2.0934 - - - -
1.2907 464 1.9579 - - - -
1.2962 466 1.9288 - - - -
1.3018 468 1.5874 2.5056 0.7833 0.5948 0.9345
1.3074 470 1.8715 - - - -
1.3129 472 1.3778 - - - -
1.3185 474 2.2242 - - - -
1.3241 476 2.4031 - - - -
1.3296 478 1.924 - - - -
1.3352 480 1.7895 - - - -
1.3408 482 2.0349 - - - -
1.3463 484 1.8116 - - - -
1.3519 486 2.353 - - - -
1.3574 488 3.4263 - - - -
1.3630 490 4.0606 - - - -
1.3686 492 2.7423 - - - -
1.3741 494 2.8461 - - - -
1.3797 496 3.0742 - - - -
1.3853 498 2.2054 - - - -
1.3908 500 2.6009 - - - -
1.3964 502 2.242 - - - -
1.4019 504 2.9416 2.5288 0.7969 0.6010 0.9323
1.4075 506 3.8179 - - - -
1.4131 508 3.0147 - - - -
1.4186 510 2.2185 - - - -
1.4242 512 3.0323 - - - -
1.4298 514 2.6922 - - - -
1.4353 516 2.6219 - - - -
1.4409 518 2.4365 - - - -
1.4465 520 3.1643 - - - -
1.4520 522 2.5548 - - - -
1.4576 524 2.3798 - - - -
1.4631 526 2.6361 - - - -
1.4687 528 2.6859 - - - -
1.4743 530 2.6071 - - - -
1.4798 532 2.2565 - - - -
1.4854 534 2.2415 - - - -
1.4910 536 2.4591 - - - -
1.4965 538 2.6729 - - - -
1.5021 540 2.3898 2.5025 0.7881 0.5978 0.9300
1.5076 542 2.4614 - - - -
1.5132 544 2.5447 - - - -
1.5188 546 2.502 - - - -
1.5243 548 2.1892 - - - -
1.5299 550 2.7081 - - - -
1.5355 552 2.5523 - - - -
1.5410 554 2.3571 - - - -
1.5466 556 2.7694 - - - -
1.5522 558 2.2 - - - -
1.5577 560 2.4179 - - - -
1.5633 562 2.3914 - - - -
1.5688 564 2.1722 - - - -
1.5744 566 2.345 - - - -
1.5800 568 3.0069 - - - -
1.5855 570 2.4231 - - - -
1.5911 572 2.3597 - - - -
1.5967 574 2.143 - - - -
1.6022 576 2.6288 2.5368 0.7943 0.6048 0.9265
1.6078 578 2.3905 - - - -
1.6134 580 2.1823 - - - -
1.6189 582 2.367 - - - -
1.6245 584 2.8189 - - - -
1.6300 586 2.6536 - - - -
1.6356 588 2.2134 - - - -
1.6412 590 1.6949 - - - -
1.6467 592 2.2029 - - - -
1.6523 594 3.0223 - - - -
1.6579 596 2.239 - - - -
1.6634 598 2.3388 - - - -
1.6690 600 2.3066 - - - -
1.6745 602 2.4762 - - - -
1.6801 604 1.9503 - - - -
1.6857 606 2.1252 - - - -
1.6912 608 1.8253 - - - -
1.6968 610 2.2938 - - - -
1.7024 612 1.9489 2.5747 0.7675 0.5964 0.9267
1.7079 614 1.9238 - - - -
1.7135 616 1.8171 - - - -
1.7191 618 2.2371 - - - -
1.7246 620 2.4901 - - - -
1.7302 622 1.8503 - - - -
1.7357 624 2.017 - - - -
1.7413 626 2.3069 - - - -
1.7469 628 2.444 - - - -
1.7524 630 1.9606 - - - -
1.7580 632 2.2364 - - - -
1.7636 634 1.8711 - - - -
1.7691 636 2.4233 - - - -
1.7747 638 2.4065 - - - -
1.7803 640 2.0725 - - - -
1.7858 642 2.0578 - - - -
1.7914 644 2.2066 - - - -
1.7969 646 1.7767 - - - -
1.8025 648 2.7388 2.5685 0.7663 0.5959 0.9292
1.8081 650 1.854 - - - -
1.8136 652 2.7337 - - - -
1.8192 654 2.4477 - - - -
1.8248 656 2.4818 - - - -
1.8303 658 1.8592 - - - -
1.8359 660 1.8396 - - - -
1.8414 662 2.3893 - - - -
1.8470 664 2.0139 - - - -
1.8526 666 2.8837 - - - -
1.8581 668 2.0342 - - - -
1.8637 670 1.8857 - - - -
1.8693 672 2.1147 - - - -
1.8748 674 1.6263 - - - -
1.8804 676 2.2987 - - - -
1.8860 678 1.9678 - - - -
1.8915 680 1.9999 - - - -
1.8971 682 2.2802 - - - -
1.9026 684 1.9666 2.5536 0.7717 0.5967 0.9289
1.9082 686 1.8156 - - - -
1.9138 688 1.9542 - - - -
1.9193 690 1.859 - - - -
1.9249 692 1.6237 - - - -
1.9305 694 2.3085 - - - -
1.9360 696 2.1461 - - - -
1.9416 698 1.7024 - - - -
1.9471 700 2.2181 - - - -
1.9527 702 2.4782 - - - -
1.9583 704 1.7378 - - - -
1.9638 706 2.0422 - - - -
1.9694 708 1.7577 - - - -
1.9750 710 2.0209 - - - -
1.9805 712 2.0372 - - - -
1.9861 714 2.0915 - - - -
1.9917 716 1.603 - - - -
1.9972 718 1.7111 2.5566 0.7705 0.5966 0.9293

Framework Versions

  • Python: 3.10.12
  • Sentence Transformers: 3.0.1
  • Transformers: 4.42.4
  • PyTorch: 2.4.0+cu121
  • Accelerate: 0.32.1
  • Datasets: 2.21.0
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

AnglELoss

@misc{li2023angleoptimized,
    title={AnglE-optimized Text Embeddings}, 
    author={Xianming Li and Jing Li},
    year={2023},
    eprint={2309.12871},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}