Omartificial-Intelligence-Space's picture
Add new SentenceTransformer model.
b581354 verified
|
raw
history blame
49.4 kB
metadata
base_model: aubmindlab/bert-base-arabertv02
datasets: []
language: []
library_name: sentence-transformers
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:1000000
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
widget:
  - source_sentence: فتى يرتدي اللون الأحمر ينزلق على متن عربة نفخة
    sentences:
      - اثنان من الشباب الآسيويين يتسكعون
      - فتى يلعب على عربة نفخة
      - فتى يثقب سكيناً في عربة نفخة
  - source_sentence: عامل بناء يقف على رافعة يضع ذراعًا كبيرًا على قمة قمة قيد الإنشاء.
    sentences:
      - الاطفال يركبون عربة متعة
      - شخص يقف
      - لا أحد يقف
  - source_sentence: رجل مع حفرة طاقة كبيرة يقف بجانب ابنته مع خرطوم المكنسة الكهربائية.
    sentences:
      - جنديان يحملان أسلحة
      - رجل يحمل مثقاب يقف بجانب فتاة تحمل خرطوم كهربائي
      - الرجل والفتاة يرسمون الجدران
  - source_sentence: رجل يرتدي قميص أسود يعزف على الجيتار.
    sentences:
      - الرجل يرتدي الأسود.
      - هناك رجل يفرغ
      - الرجل يرتدي قميصاً أزرق.
  - source_sentence: رجل يرتدي قميص (فيجاس) الأحمر يجلس على طاولة ويلعب بالكاميرا
    sentences:
      - رجل يلعب بالكاميرا
      - فتى يقفز في الهواء
      - الرجل يقف ويأخذ الصور
model-index:
  - name: SentenceTransformer based on aubmindlab/bert-base-arabertv02
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 768
          type: sts-test-768
        metrics:
          - type: pearson_cosine
            value: 0.8137491067613172
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8139804248887779
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.805239691712325
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8071457719582591
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8053105962459932
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8078084689219578
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8019135317246738
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7961388104098682
            name: Spearman Dot
          - type: pearson_max
            value: 0.8137491067613172
            name: Pearson Max
          - type: spearman_max
            value: 0.8139804248887779
            name: Spearman Max
          - type: pearson_cosine
            value: 0.8137491067613172
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8139804248887779
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.805239691712325
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8071457719582591
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8053105962459932
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8078084689219578
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8019135317246738
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7961388104098682
            name: Spearman Dot
          - type: pearson_max
            value: 0.8137491067613172
            name: Pearson Max
          - type: spearman_max
            value: 0.8139804248887779
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 512
          type: sts-test-512
        metrics:
          - type: pearson_cosine
            value: 0.8127890716639393
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.813769735512917
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8045619532064516
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.806084784718251
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8047817340341926
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8067787363048019
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7985706834990611
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7926669266198092
            name: Spearman Dot
          - type: pearson_max
            value: 0.8127890716639393
            name: Pearson Max
          - type: spearman_max
            value: 0.813769735512917
            name: Spearman Max
          - type: pearson_cosine
            value: 0.8127890716639393
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.813769735512917
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8045619532064516
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.806084784718251
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8047817340341926
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8067787363048019
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7985706834990611
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7926669266198092
            name: Spearman Dot
          - type: pearson_max
            value: 0.8127890716639393
            name: Pearson Max
          - type: spearman_max
            value: 0.813769735512917
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 256
          type: sts-test-256
        metrics:
          - type: pearson_cosine
            value: 0.810388221021721
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8138356923403065
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8015100804443567
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8026219149891689
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8016089017435591
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8030480833628191
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.792265476718613
            name: Pearson Dot
          - type: spearman_dot
            value: 0.787067391010805
            name: Spearman Dot
          - type: pearson_max
            value: 0.810388221021721
            name: Pearson Max
          - type: spearman_max
            value: 0.8138356923403065
            name: Spearman Max
          - type: pearson_cosine
            value: 0.810388221021721
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8138356923403065
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8015100804443567
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8026219149891689
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8016089017435591
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8030480833628191
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.792265476718613
            name: Pearson Dot
          - type: spearman_dot
            value: 0.787067391010805
            name: Spearman Dot
          - type: pearson_max
            value: 0.810388221021721
            name: Pearson Max
          - type: spearman_max
            value: 0.8138356923403065
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 128
          type: sts-test-128
        metrics:
          - type: pearson_cosine
            value: 0.8071777671061434
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8128987608664245
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7969339482985063
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7972524285093451
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7971979787664204
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.797866628579141
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7752745908442699
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7685950685903284
            name: Spearman Dot
          - type: pearson_max
            value: 0.8071777671061434
            name: Pearson Max
          - type: spearman_max
            value: 0.8128987608664245
            name: Spearman Max
          - type: pearson_cosine
            value: 0.8071777671061434
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8128987608664245
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7969339482985063
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7972524285093451
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7971979787664204
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.797866628579141
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7752745908442699
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7685950685903284
            name: Spearman Dot
          - type: pearson_max
            value: 0.8071777671061434
            name: Pearson Max
          - type: spearman_max
            value: 0.8128987608664245
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 64
          type: sts-test-64
        metrics:
          - type: pearson_cosine
            value: 0.7992861493805723
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.809205854296297
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7841737408240652
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7848704254075567
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7865782078684138
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.7874610680426495
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7341564461014968
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7244607540987561
            name: Spearman Dot
          - type: pearson_max
            value: 0.7992861493805723
            name: Pearson Max
          - type: spearman_max
            value: 0.809205854296297
            name: Spearman Max
          - type: pearson_cosine
            value: 0.7992861493805723
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.809205854296297
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7841737408240652
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7848704254075567
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7865782078684138
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.7874610680426495
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7341564461014968
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7244607540987561
            name: Spearman Dot
          - type: pearson_max
            value: 0.7992861493805723
            name: Pearson Max
          - type: spearman_max
            value: 0.809205854296297
            name: Spearman Max

SentenceTransformer based on aubmindlab/bert-base-arabertv02

This is a sentence-transformers model finetuned from aubmindlab/bert-base-arabertv02. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: aubmindlab/bert-base-arabertv02
  • Maximum Sequence Length: 512 tokens
  • Output Dimensionality: 768 tokens
  • Similarity Function: Cosine Similarity

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("Omartificial-Intelligence-Space/Arabert-matro-v4")
# Run inference
sentences = [
    'رجل يرتدي قميص (فيجاس) الأحمر يجلس على طاولة ويلعب بالكاميرا',
    'رجل يلعب بالكاميرا',
    'الرجل يقف ويأخذ الصور',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.8137
spearman_cosine 0.814
pearson_manhattan 0.8052
spearman_manhattan 0.8071
pearson_euclidean 0.8053
spearman_euclidean 0.8078
pearson_dot 0.8019
spearman_dot 0.7961
pearson_max 0.8137
spearman_max 0.814

Semantic Similarity

Metric Value
pearson_cosine 0.8128
spearman_cosine 0.8138
pearson_manhattan 0.8046
spearman_manhattan 0.8061
pearson_euclidean 0.8048
spearman_euclidean 0.8068
pearson_dot 0.7986
spearman_dot 0.7927
pearson_max 0.8128
spearman_max 0.8138

Semantic Similarity

Metric Value
pearson_cosine 0.8104
spearman_cosine 0.8138
pearson_manhattan 0.8015
spearman_manhattan 0.8026
pearson_euclidean 0.8016
spearman_euclidean 0.803
pearson_dot 0.7923
spearman_dot 0.7871
pearson_max 0.8104
spearman_max 0.8138

Semantic Similarity

Metric Value
pearson_cosine 0.8072
spearman_cosine 0.8129
pearson_manhattan 0.7969
spearman_manhattan 0.7973
pearson_euclidean 0.7972
spearman_euclidean 0.7979
pearson_dot 0.7753
spearman_dot 0.7686
pearson_max 0.8072
spearman_max 0.8129

Semantic Similarity

Metric Value
pearson_cosine 0.7993
spearman_cosine 0.8092
pearson_manhattan 0.7842
spearman_manhattan 0.7849
pearson_euclidean 0.7866
spearman_euclidean 0.7875
pearson_dot 0.7342
spearman_dot 0.7245
pearson_max 0.7993
spearman_max 0.8092

Semantic Similarity

Metric Value
pearson_cosine 0.8137
spearman_cosine 0.814
pearson_manhattan 0.8052
spearman_manhattan 0.8071
pearson_euclidean 0.8053
spearman_euclidean 0.8078
pearson_dot 0.8019
spearman_dot 0.7961
pearson_max 0.8137
spearman_max 0.814

Semantic Similarity

Metric Value
pearson_cosine 0.8128
spearman_cosine 0.8138
pearson_manhattan 0.8046
spearman_manhattan 0.8061
pearson_euclidean 0.8048
spearman_euclidean 0.8068
pearson_dot 0.7986
spearman_dot 0.7927
pearson_max 0.8128
spearman_max 0.8138

Semantic Similarity

Metric Value
pearson_cosine 0.8104
spearman_cosine 0.8138
pearson_manhattan 0.8015
spearman_manhattan 0.8026
pearson_euclidean 0.8016
spearman_euclidean 0.803
pearson_dot 0.7923
spearman_dot 0.7871
pearson_max 0.8104
spearman_max 0.8138

Semantic Similarity

Metric Value
pearson_cosine 0.8072
spearman_cosine 0.8129
pearson_manhattan 0.7969
spearman_manhattan 0.7973
pearson_euclidean 0.7972
spearman_euclidean 0.7979
pearson_dot 0.7753
spearman_dot 0.7686
pearson_max 0.8072
spearman_max 0.8129

Semantic Similarity

Metric Value
pearson_cosine 0.7993
spearman_cosine 0.8092
pearson_manhattan 0.7842
spearman_manhattan 0.7849
pearson_euclidean 0.7866
spearman_euclidean 0.7875
pearson_dot 0.7342
spearman_dot 0.7245
pearson_max 0.7993
spearman_max 0.8092

Training Details

Training Dataset

Unnamed Dataset

  • Size: 1,000,000 training samples
  • Columns: anchor, positive, and negative
  • Approximate statistics based on the first 1000 samples:
    anchor positive negative
    type string string string
    details
    • min: 4 tokens
    • mean: 12.0 tokens
    • max: 69 tokens
    • min: 4 tokens
    • mean: 31.78 tokens
    • max: 174 tokens
    • min: 4 tokens
    • mean: 30.79 tokens
    • max: 216 tokens
  • Samples:
    anchor positive negative
    ما الذي تتجنبه؟ ما الذي تحاولين تجنبه دائماً؟ أنا في حالة اكتئاب ماذا يجب أن أفعل؟
    رجل يقف عند لافتة صفراء رجل يقترب من علامة رجل بجانب لافتة زرقاء
    لماذا قام (مودي) بحظر أوراق نقدية بقيمة 500 و 1000 روبية؟ لماذا قام مودي بإلغاء عملة الـ 500 و 1000 روبية؟ وما سبب إدخال عملة الـ 2000 روبية فجأة؟ ما هو أفضل خيار بعد الانتهاء من البكالوريوس في الهندسة الميكانيكية؟
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Evaluation Dataset

Omartificial-Intelligence-Space/arabic-n_li-triplet

  • Dataset: Omartificial-Intelligence-Space/arabic-n_li-triplet
  • Size: 6,584 evaluation samples
  • Columns: anchor, positive, and negative
  • Approximate statistics based on the first 1000 samples:
    anchor positive negative
    type string string string
    details
    • min: 4 tokens
    • mean: 14.87 tokens
    • max: 70 tokens
    • min: 4 tokens
    • mean: 7.54 tokens
    • max: 26 tokens
    • min: 4 tokens
    • mean: 8.14 tokens
    • max: 23 tokens
  • Samples:
    anchor positive negative
    امرأتان يتعانقان بينما يحملان حزمة إمرأتان يحملان حزمة الرجال يتشاجرون خارج مطعم
    طفلين صغيرين يرتديان قميصاً أزرق، أحدهما يرتدي الرقم 9 والآخر يرتدي الرقم 2 يقفان على خطوات خشبية في الحمام ويغسلان أيديهما في المغسلة. طفلين يرتديان قميصاً مرقماً يغسلون أيديهم طفلين يرتديان سترة يذهبان إلى المدرسة
    رجل يبيع الدونات لعميل خلال معرض عالمي أقيم في مدينة أنجليس رجل يبيع الدونات لعميل امرأة تشرب قهوتها في مقهى صغير
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • per_device_train_batch_size: 64
  • per_device_eval_batch_size: 64
  • warmup_ratio: 0.1
  • fp16: True
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: no
  • prediction_loss_only: True
  • per_device_train_batch_size: 64
  • per_device_eval_batch_size: 64
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • torch_empty_cache_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 3
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • eval_use_gather_object: False
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss sts-test-128_spearman_cosine sts-test-256_spearman_cosine sts-test-512_spearman_cosine sts-test-64_spearman_cosine sts-test-768_spearman_cosine
0.0384 200 9.7813 - - - - -
0.0768 400 4.4771 - - - - -
0.1152 600 3.754 - - - - -
0.1536 800 3.4086 - - - - -
0.1920 1000 3.1323 - - - - -
0.2304 1200 2.9257 - - - - -
0.2688 1400 2.8363 - - - - -
0.3072 1600 2.6156 - - - - -
0.3456 1800 2.5428 - - - - -
0.3840 2000 2.4927 - - - - -
0.4223 2200 2.4 - - - - -
0.4607 2400 2.3193 - - - - -
0.4991 2600 2.2363 - - - - -
0.5375 2800 2.1929 - - - - -
0.5759 3000 2.1396 - - - - -
0.6143 3200 2.0481 - - - - -
0.6527 3400 2.0299 - - - - -
0.6911 3600 1.9895 - - - - -
0.7295 3800 1.9889 - - - - -
0.7679 4000 1.9319 - - - - -
0.8063 4200 1.8865 - - - - -
0.8447 4400 1.8349 - - - - -
0.8831 4600 1.8047 - - - - -
0.9215 4800 1.8009 - - - - -
0.9599 5000 1.7962 - - - - -
0.9983 5200 1.7231 - - - - -
1.0367 5400 0.0288 - - - - -
1.0751 5600 0.0 - - - - -
1.1135 5800 0.0 - - - - -
1.1519 6000 0.0 - - - - -
1.1902 6200 0.0 - - - - -
1.0056 6400 0.2935 - - - - -
1.0440 6600 1.7571 - - - - -
1.0824 6800 1.6487 - - - - -
1.1208 7000 1.6513 - - - - -
1.1591 7200 1.5466 - - - - -
1.1975 7400 1.4583 - - - - -
1.2359 7600 1.3805 - - - - -
1.2743 7800 1.3264 - - - - -
1.3127 8000 1.1898 - - - - -
1.3511 8200 1.1961 - - - - -
1.3895 8400 1.1749 - - - - -
1.4279 8600 1.1438 - - - - -
1.4663 8800 1.1481 - - - - -
1.5047 9000 1.089 - - - - -
1.5431 9200 1.1063 - - - - -
1.5815 9400 1.0759 - - - - -
1.6199 9600 1.0215 - - - - -
1.6583 9800 1.0244 - - - - -
1.6967 10000 1.0546 - - - - -
1.7351 10200 1.0355 - - - - -
1.7735 10400 1.0078 - - - - -
1.8119 10600 1.0102 - - - - -
1.8503 10800 0.9899 - - - - -
1.8887 11000 0.971 - - - - -
1.9270 11200 0.9676 - - - - -
1.9654 11400 0.9707 - - - - -
2.0038 11600 0.8222 - - - - -
2.0422 11800 0.0 - - - - -
2.0806 12000 0.0 - - - - -
2.1190 12200 0.0 - - - - -
2.1574 12400 0.0 - - - - -
2.1958 12600 0.0 - - - - -
2.0111 12800 0.2783 - - - - -
2.0495 13000 0.8261 - - - - -
2.0879 13200 0.868 - - - - -
2.1263 13400 0.8653 - - - - -
2.1647 13600 0.8647 - - - - -
2.2031 13800 0.8085 - - - - -
2.2415 14000 0.8122 - - - - -
2.2799 14200 0.7647 - - - - -
2.3183 14400 0.6959 - - - - -
2.3567 14600 0.7228 - - - - -
2.3951 14800 0.7303 - - - - -
2.4335 15000 0.7056 - - - - -
2.4719 15200 0.737 - - - - -
2.5103 15400 0.7016 - - - - -
2.5487 15600 0.7183 - - - - -
2.5538 15627 - 0.8129 0.8138 0.8138 0.8092 0.8140

Framework Versions

  • Python: 3.10.12
  • Sentence Transformers: 3.0.1
  • Transformers: 4.43.1
  • PyTorch: 2.2.2
  • Accelerate: 0.33.0
  • Datasets: 2.19.0
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply}, 
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}