karimouda's picture
Update README.md
a520977 verified
|
raw
history blame
25.4 kB
metadata
base_model: aubmindlab/bert-base-arabertv02
library_name: sentence-transformers
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - loss:CosineSimilarityLoss
  - mteb
model-index:
  - name: silma-ai/silma-embeddding-matryoshka-v0.1
    results:
      - dataset:
          config: ar
          name: MTEB MassiveIntentClassification (ar)
          revision: 4672e20407010da34463acc759c162ca9734bca6
          split: test
          type: mteb/amazon_massive_intent
        metrics:
          - type: accuracy
            value: 56.445864156018835
          - type: f1
            value: 53.58282538318122
          - type: f1_weighted
            value: 56.821808211639315
          - type: main_score
            value: 56.445864156018835
        task:
          type: Classification
      - dataset:
          config: en
          name: MTEB MassiveIntentClassification (en)
          revision: 4672e20407010da34463acc759c162ca9734bca6
          split: test
          type: mteb/amazon_massive_intent
        metrics:
          - type: accuracy
            value: 47.40080699394754
          - type: f1
            value: 44.729286773524755
          - type: f1_weighted
            value: 47.83506683571795
          - type: main_score
            value: 47.40080699394754
        task:
          type: Classification
      - dataset:
          config: ar
          name: MTEB MassiveIntentClassification (ar)
          revision: 4672e20407010da34463acc759c162ca9734bca6
          split: validation
          type: mteb/amazon_massive_intent
        metrics:
          - type: accuracy
            value: 56.97983275946876
          - type: f1
            value: 53.809263807080086
          - type: f1_weighted
            value: 57.14993215193604
          - type: main_score
            value: 56.97983275946876
        task:
          type: Classification
      - dataset:
          config: en
          name: MTEB MassiveIntentClassification (en)
          revision: 4672e20407010da34463acc759c162ca9734bca6
          split: validation
          type: mteb/amazon_massive_intent
        metrics:
          - type: accuracy
            value: 47.683226758485006
          - type: f1
            value: 44.905317333393775
          - type: f1_weighted
            value: 48.051379514830195
          - type: main_score
            value: 47.683226758485006
        task:
          type: Classification
      - dataset:
          config: ar
          name: MTEB MassiveScenarioClassification (ar)
          revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
          split: test
          type: mteb/amazon_massive_scenario
        metrics:
          - type: accuracy
            value: 63.31876260928042
          - type: f1
            value: 63.197056314678754
          - type: f1_weighted
            value: 62.7166315473092
          - type: main_score
            value: 63.31876260928042
        task:
          type: Classification
      - dataset:
          config: en
          name: MTEB MassiveScenarioClassification (en)
          revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
          split: test
          type: mteb/amazon_massive_scenario
        metrics:
          - type: accuracy
            value: 53.35574983187627
          - type: f1
            value: 50.35837223252574
          - type: f1_weighted
            value: 54.11644042208904
          - type: main_score
            value: 53.35574983187627
        task:
          type: Classification
      - dataset:
          config: ar
          name: MTEB MassiveScenarioClassification (ar)
          revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
          split: validation
          type: mteb/amazon_massive_scenario
        metrics:
          - type: accuracy
            value: 62.26758484997541
          - type: f1
            value: 62.477928166560325
          - type: f1_weighted
            value: 61.92238394647396
          - type: main_score
            value: 62.26758484997541
        task:
          type: Classification
      - dataset:
          config: en
          name: MTEB MassiveScenarioClassification (en)
          revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
          split: validation
          type: mteb/amazon_massive_scenario
        metrics:
          - type: accuracy
            value: 52.62174126906049
          - type: f1
            value: 50.470501485026716
          - type: f1_weighted
            value: 53.16459392827557
          - type: main_score
            value: 52.62174126906049
        task:
          type: Classification
      - dataset:
          config: en-en
          name: MTEB STS17 (en-en)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 74.33941506827517
          - type: cosine_spearman
            value: 74.42197838273297
          - type: euclidean_pearson
            value: 75.33836191339782
          - type: euclidean_spearman
            value: 74.37385193453852
          - type: main_score
            value: 74.42197838273297
          - type: manhattan_pearson
            value: 75.41881517194568
          - type: manhattan_spearman
            value: 74.47237277057877
          - type: pearson
            value: 74.33941645999855
          - type: spearman
            value: 74.42197838273297
        task:
          type: STS
      - dataset:
          config: nl-en
          name: MTEB STS17 (nl-en)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 31.84872826199112
          - type: cosine_spearman
            value: 32.22496230755917
          - type: euclidean_pearson
            value: 21.830860533929688
          - type: euclidean_spearman
            value: 21.38205815348658
          - type: main_score
            value: 32.22496230755917
          - type: manhattan_pearson
            value: 21.852430479395576
          - type: manhattan_spearman
            value: 21.37848326556159
          - type: pearson
            value: 31.84872485436001
          - type: spearman
            value: 32.22496230755917
        task:
          type: STS
      - dataset:
          config: en-ar
          name: MTEB STS17 (en-ar)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 43.37529327788584
          - type: cosine_spearman
            value: 42.763149514327225
          - type: euclidean_pearson
            value: 39.625411905897394
          - type: euclidean_spearman
            value: 39.26727199746294
          - type: main_score
            value: 42.763149514327225
          - type: manhattan_pearson
            value: 40.49857681486655
          - type: manhattan_spearman
            value: 40.63669314166475
          - type: pearson
            value: 43.37529078998193
          - type: spearman
            value: 42.763149514327225
        task:
          type: STS
      - dataset:
          config: en-tr
          name: MTEB STS17 (en-tr)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 17.16722415938186
          - type: cosine_spearman
            value: 15.590330355526344
          - type: euclidean_pearson
            value: 4.430499555984906
          - type: euclidean_spearman
            value: 2.729050802084264
          - type: main_score
            value: 15.590330355526344
          - type: manhattan_pearson
            value: 2.805408490135879
          - type: manhattan_spearman
            value: 1.5237347692119627
          - type: pearson
            value: 17.167228709176676
          - type: spearman
            value: 15.590330355526344
        task:
          type: STS
      - dataset:
          config: fr-en
          name: MTEB STS17 (fr-en)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 36.093945717347395
          - type: cosine_spearman
            value: 37.33997345407934
          - type: euclidean_pearson
            value: 23.156103022485055
          - type: euclidean_spearman
            value: 20.62925594786342
          - type: main_score
            value: 37.33997345407934
          - type: manhattan_pearson
            value: 22.035024322719813
          - type: manhattan_spearman
            value: 19.147522562438795
          - type: pearson
            value: 36.09395175426761
          - type: spearman
            value: 37.33997345407934
        task:
          type: STS
      - dataset:
          config: en-de
          name: MTEB STS17 (en-de)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 29.064411455563
          - type: cosine_spearman
            value: 29.232781114344697
          - type: euclidean_pearson
            value: 16.90458086330736
          - type: euclidean_spearman
            value: 17.462020565289887
          - type: main_score
            value: 29.232781114344697
          - type: manhattan_pearson
            value: 16.882446230243286
          - type: manhattan_spearman
            value: 17.06144091941576
          - type: pearson
            value: 29.06441922605839
          - type: spearman
            value: 29.232781114344697
        task:
          type: STS
      - dataset:
          config: es-en
          name: MTEB STS17 (es-en)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 27.686316587339473
          - type: cosine_spearman
            value: 28.650995973102205
          - type: euclidean_pearson
            value: 12.954885279630565
          - type: euclidean_spearman
            value: 11.970815927480198
          - type: main_score
            value: 28.650995973102205
          - type: manhattan_pearson
            value: 12.079730127474948
          - type: manhattan_spearman
            value: 10.606967901984147
          - type: pearson
            value: 27.68631836666537
          - type: spearman
            value: 28.650995973102205
        task:
          type: STS
      - dataset:
          config: ar-ar
          name: MTEB STS17 (ar-ar)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 84.12612492708037
          - type: cosine_spearman
            value: 84.24703763883515
          - type: euclidean_pearson
            value: 81.38085140113648
          - type: euclidean_spearman
            value: 83.17403450502965
          - type: main_score
            value: 84.24703763883515
          - type: manhattan_pearson
            value: 81.18466522597414
          - type: manhattan_spearman
            value: 82.61184409962614
          - type: pearson
            value: 84.12612546419625
          - type: spearman
            value: 84.25077492152536
        task:
          type: STS
      - dataset:
          config: it-en
          name: MTEB STS17 (it-en)
          revision: faeb762787bd10488a50c8b5be4a3b82e411949c
          split: test
          type: mteb/sts17-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 27.697680546701868
          - type: cosine_spearman
            value: 25.19277336255784
          - type: euclidean_pearson
            value: 13.964798090314115
          - type: euclidean_spearman
            value: 10.512169361528596
          - type: main_score
            value: 25.19277336255784
          - type: manhattan_pearson
            value: 13.537525485694433
          - type: manhattan_spearman
            value: 10.334001560105834
          - type: pearson
            value: 27.697681880242325
          - type: spearman
            value: 25.19277336255784
        task:
          type: STS
      - dataset:
          config: de-en
          name: MTEB STS22.v2 (de-en)
          revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
          split: test
          type: mteb/sts22-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 32.87548760760924
          - type: cosine_spearman
            value: 30.69782036694315
          - type: euclidean_pearson
            value: 29.925045225262142
          - type: euclidean_spearman
            value: 34.076021250318334
          - type: main_score
            value: 30.69782036694315
          - type: manhattan_pearson
            value: 30.815090565180945
          - type: manhattan_spearman
            value: 34.91615861045259
          - type: pearson
            value: 32.8754813614174
          - type: spearman
            value: 30.69782036694315
        task:
          type: STS
      - dataset:
          config: zh-en
          name: MTEB STS22.v2 (zh-en)
          revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
          split: test
          type: mteb/sts22-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 23.93269292232737
          - type: cosine_spearman
            value: 16.781461291066496
          - type: euclidean_pearson
            value: 20.87679825681155
          - type: euclidean_spearman
            value: 13.764510796592536
          - type: main_score
            value: 16.781461291066496
          - type: manhattan_pearson
            value: 23.416430850444588
          - type: manhattan_spearman
            value: 17.10405713909058
          - type: pearson
            value: 23.932682034899777
          - type: spearman
            value: 16.781461291066496
        task:
          type: STS
      - dataset:
          config: ar
          name: MTEB STS22.v2 (ar)
          revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
          split: test
          type: mteb/sts22-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 51.73784691362425
          - type: cosine_spearman
            value: 60.01035490847343
          - type: euclidean_pearson
            value: 52.717195602630305
          - type: euclidean_spearman
            value: 60.22164097529916
          - type: main_score
            value: 60.01035490847343
          - type: manhattan_pearson
            value: 53.04979941729716
          - type: manhattan_spearman
            value: 60.393100473647706
          - type: pearson
            value: 51.73784381247053
          - type: spearman
            value: 60.020906672817276
        task:
          type: STS
      - dataset:
          config: es-en
          name: MTEB STS22.v2 (es-en)
          revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
          split: test
          type: mteb/sts22-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 47.917244237624864
          - type: cosine_spearman
            value: 53.23173373821509
          - type: euclidean_pearson
            value: 48.172861539004636
          - type: euclidean_spearman
            value: 53.32970069145014
          - type: main_score
            value: 53.23173373821509
          - type: manhattan_pearson
            value: 48.163716825216646
          - type: manhattan_spearman
            value: 53.77963871495307
          - type: pearson
            value: 47.91724405724847
          - type: spearman
            value: 53.23173373821509
        task:
          type: STS
      - dataset:
          config: pl-en
          name: MTEB STS22.v2 (pl-en)
          revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
          split: test
          type: mteb/sts22-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 43.66748993183993
          - type: cosine_spearman
            value: 38.518248671828594
          - type: euclidean_pearson
            value: 50.475058499541134
          - type: euclidean_spearman
            value: 44.76070858743843
          - type: main_score
            value: 38.518248671828594
          - type: manhattan_pearson
            value: 50.576185727010014
          - type: manhattan_spearman
            value: 45.5306304403841
          - type: pearson
            value: 43.66750472144702
          - type: spearman
            value: 38.518248671828594
        task:
          type: STS
      - dataset:
          config: en
          name: MTEB STS22.v2 (en)
          revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
          split: test
          type: mteb/sts22-crosslingual-sts
        metrics:
          - type: cosine_pearson
            value: 56.41373213565263
          - type: cosine_spearman
            value: 59.03774516602592
          - type: euclidean_pearson
            value: 54.173092638047294
          - type: euclidean_spearman
            value: 59.130444355085885
          - type: main_score
            value: 59.03774516602592
          - type: manhattan_pearson
            value: 54.18950361517434
          - type: manhattan_spearman
            value: 58.78927227383971
          - type: pearson
            value: 56.413733329868045
          - type: spearman
            value: 59.03774516602592
        task:
          type: STS
license: apache-2.0
language:
  - ar
  - en

SILMA Arabic Matryoshka Embedding Model 0.1

The SILMA Arabic Matryoshka Embedding Model 0.1 is an advanced Arabic text embedding model designed to produce powerful, contextually rich representations of text, facilitating a wide range of applications, from semantic search to document classification.

This model leverages the innovative Matryoshka Embedding technique which can be used in different dimensions to optimize the speed, storage, and accuracy trade-offs.

Usage

Direct Usage (Sentence Transformers)

First, install the Sentence Transformers library:

pip install -U sentence-transformers

Then load the model

from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
import pandas as pd

model_name = "silma-ai/silma-embeddding-matryoshka-0.1"
model = SentenceTransformer(model_name)

Samples

Using Matryoshka, you can specify the first (n) dimensions to represent each text.

In the following samples, you can check how each dimension affects the cosine similarity between a query and the two inputs.

You can notice the in most cases, even too low dimension (i.e. 8) can produce acceptable semantic similarity scores.

[+] Short Sentence Similarity

query = "الطقس اليوم مشمس"
sentence_1 = "الجو اليوم كان مشمسًا ورائعًا"
sentence_2 = "الطقس اليوم غائم"

scores = []
for dim in [768, 256, 48, 16, 8]:

    query_embedding = model.encode(query)[:dim]

    sent1_score = cos_sim(query_embedding, model.encode(sentence_1)[:dim])[0][0].tolist()
    sent2_score = cos_sim(query_embedding, model.encode(sentence_2)[:dim])[0][0].tolist()

    scores.append({
        "dim": dim,
        "valid_top": sent1_score > sent2_score,
        "sent1_score": sent1_score,
        "sent2_score": sent2_score,
    })

scores_df = pd.DataFrame(scores)
print(scores_df.to_markdown(index=False))

# |   dim | valid_top   |   sent1_score |   sent2_score |
# |------:|:------------|--------------:|--------------:|
# |   768 | True        |      0.479942 |      0.233572 |
# |   256 | True        |      0.509289 |      0.208452 |
# |    48 | True        |      0.598825 |      0.191677 |
# |    16 | True        |      0.917707 |      0.458854 |
# |     8 | True        |      0.948563 |      0.675662 |

[+] Long Sentence Similarity

query = "الكتاب يتحدث عن أهمية الذكاء الاصطناعي في تطوير المجتمعات الحديثة"
sentence_1 = "في هذا الكتاب، يناقش الكاتب كيف يمكن للتكنولوجيا أن تغير العالم"
sentence_2 = "الكاتب يتحدث عن أساليب الطبخ التقليدية في دول البحر الأبيض المتوسط"

scores = []
for dim in [768, 256, 48, 16, 8]:

    query_embedding = model.encode(query)[:dim]

    sent1_score = cos_sim(query_embedding, model.encode(sentence_1)[:dim])[0][0].tolist()
    sent2_score = cos_sim(query_embedding, model.encode(sentence_2)[:dim])[0][0].tolist()

    scores.append({
        "dim": dim,
        "valid_top": sent1_score > sent2_score,
        "sent1_score": sent1_score,
        "sent2_score": sent2_score,
    })

scores_df = pd.DataFrame(scores)
print(scores_df.to_markdown(index=False))

# |   dim | valid_top   |   sent1_score |   sent2_score |
# |------:|:------------|--------------:|--------------:|
# |   768 | True        |      0.637418 |      0.262693 |
# |   256 | True        |      0.614761 |      0.268267 |
# |    48 | True        |      0.758887 |      0.384649 |
# |    16 | True        |      0.885737 |      0.204213 |
# |     8 | True        |      0.918684 |      0.146478 |

[+] Question to Paragraph Matching

query = "ما هي فوائد ممارسة الرياضة؟"
sentence_1 = "ممارسة الرياضة بشكل منتظم تساعد على تحسين الصحة العامة واللياقة البدنية"
sentence_2 = "تعليم الأطفال في سن مبكرة يساعدهم على تطوير المهارات العقلية بسرعة"

scores = []
for dim in [768, 256, 48, 16, 8]:

    query_embedding = model.encode(query)[:dim]

    sent1_score = cos_sim(query_embedding, model.encode(sentence_1)[:dim])[0][0].tolist()
    sent2_score = cos_sim(query_embedding, model.encode(sentence_2)[:dim])[0][0].tolist()

    scores.append({
        "dim": dim,
        "valid_top": sent1_score > sent2_score,
        "sent1_score": sent1_score,
        "sent2_score": sent2_score,
    })

scores_df = pd.DataFrame(scores)
print(scores_df.to_markdown(index=False))

# |   dim | valid_top   |   sent1_score |   sent2_score |
# |------:|:------------|--------------:|--------------:|
# |   768 | True        |      0.520329 |    0.00295128 |
# |   256 | True        |      0.556088 |   -0.017764   |
# |    48 | True        |      0.586194 |   -0.110691   |
# |    16 | True        |      0.606462 |   -0.331682   |
# |     8 | True        |      0.689649 |   -0.359202   |

[+] Message to Intent-Name Mapping

query = "أرغب في حجز تذكرة طيران من دبي الى القاهرة يوم الثلاثاء القادم"
sentence_1 = "حجز رحلة"
sentence_2 = "إلغاء حجز"

scores = []
for dim in [768, 256, 48, 16, 8]:

    query_embedding = model.encode(query)[:dim]

    sent1_score = cos_sim(query_embedding, model.encode(sentence_1)[:dim])[0][0].tolist()
    sent2_score = cos_sim(query_embedding, model.encode(sentence_2)[:dim])[0][0].tolist()

    scores.append({
        "dim": dim,
        "valid_top": sent1_score > sent2_score,
        "sent1_score": sent1_score,
        "sent2_score": sent2_score,
    })

scores_df = pd.DataFrame(scores)
print(scores_df.to_markdown(index=False))

# |   dim | valid_top   |   sent1_score |   sent2_score |
# |------:|:------------|--------------:|--------------:|
# |   768 | True        |     0.476535  |     0.221451  |
# |   256 | True        |     0.392701  |     0.224967  |
# |    48 | True        |     0.316223  |     0.0210683 |
# |    16 | False       |    -0.0242871 |     0.0250766 |
# |     8 | True        |    -0.215241  |    -0.258904  |

Training Details

We curated a dataset silma-ai/silma-arabic-triplets-dataset-v1.0 which contains more than 2.25M records of (anchor, positive and negative) Arabic/English samples. Only the first 600 samples were taken to be the eval dataset, while the rest were used for fine-tuning.

This produced a finetuned Matryoshka model based on aubmindlab/bert-base-arabertv02 with the following hyperparameters:

  • per_device_train_batch_size: 250
  • per_device_eval_batch_size: 10
  • learning_rate: 1e-05
  • num_train_epochs: 3
  • bf16: True
  • dataloader_drop_last: True
  • optim: adamw_torch_fused
  • batch_sampler: no_duplicates

training script

Framework Versions

  • Python: 3.10.14
  • Sentence Transformers: 3.2.0
  • Transformers: 4.45.2
  • PyTorch: 2.3.1
  • Accelerate: 1.0.1
  • Datasets: 3.0.1
  • Tokenizers: 0.20.1

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Citation:

BibTeX:

@misc{silma2024embedding,
  author = {Abu Bakr Soliman, Karim Ouda, SILMA AI},
  title = {SILMA Embedding Matryoshka 0.1},
  year = {2024},
  publisher = {Hugging Face},
  howpublished = {\url{https://huggingface.co/silma-ai/silma-embeddding-matryoshka-0.1}},
}

APA:

Abu Bakr Soliman, Karim Ouda, SILMA AI. (2024). SILMA Embedding Matryoshka STS 0.1 [Model]. Hugging Face. https://huggingface.co/silma-ai/silma-embeddding-matryoshka-0.1

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning},
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}