File size: 25,375 Bytes

---
base_model: aubmindlab/bert-base-arabertv02
library_name: sentence-transformers
metrics:
- pearson_cosine
- spearman_cosine
- pearson_manhattan
- spearman_manhattan
- pearson_euclidean
- spearman_euclidean
- pearson_dot
- spearman_dot
- pearson_max
- spearman_max
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated_from_trainer
- loss:CosineSimilarityLoss
- mteb
model-index:
- name: silma-ai/silma-embeddding-matryoshka-v0.1
  results:
  - dataset:
      config: ar
      name: MTEB MassiveIntentClassification (ar)
      revision: 4672e20407010da34463acc759c162ca9734bca6
      split: test
      type: mteb/amazon_massive_intent
    metrics:
    - type: accuracy
      value: 56.445864156018835
    - type: f1
      value: 53.58282538318122
    - type: f1_weighted
      value: 56.821808211639315
    - type: main_score
      value: 56.445864156018835
    task:
      type: Classification
  - dataset:
      config: en
      name: MTEB MassiveIntentClassification (en)
      revision: 4672e20407010da34463acc759c162ca9734bca6
      split: test
      type: mteb/amazon_massive_intent
    metrics:
    - type: accuracy
      value: 47.40080699394754
    - type: f1
      value: 44.729286773524755
    - type: f1_weighted
      value: 47.83506683571795
    - type: main_score
      value: 47.40080699394754
    task:
      type: Classification
  - dataset:
      config: ar
      name: MTEB MassiveIntentClassification (ar)
      revision: 4672e20407010da34463acc759c162ca9734bca6
      split: validation
      type: mteb/amazon_massive_intent
    metrics:
    - type: accuracy
      value: 56.97983275946876
    - type: f1
      value: 53.809263807080086
    - type: f1_weighted
      value: 57.14993215193604
    - type: main_score
      value: 56.97983275946876
    task:
      type: Classification
  - dataset:
      config: en
      name: MTEB MassiveIntentClassification (en)
      revision: 4672e20407010da34463acc759c162ca9734bca6
      split: validation
      type: mteb/amazon_massive_intent
    metrics:
    - type: accuracy
      value: 47.683226758485006
    - type: f1
      value: 44.905317333393775
    - type: f1_weighted
      value: 48.051379514830195
    - type: main_score
      value: 47.683226758485006
    task:
      type: Classification
  - dataset:
      config: ar
      name: MTEB MassiveScenarioClassification (ar)
      revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
      split: test
      type: mteb/amazon_massive_scenario
    metrics:
    - type: accuracy
      value: 63.31876260928042
    - type: f1
      value: 63.197056314678754
    - type: f1_weighted
      value: 62.7166315473092
    - type: main_score
      value: 63.31876260928042
    task:
      type: Classification
  - dataset:
      config: en
      name: MTEB MassiveScenarioClassification (en)
      revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
      split: test
      type: mteb/amazon_massive_scenario
    metrics:
    - type: accuracy
      value: 53.35574983187627
    - type: f1
      value: 50.35837223252574
    - type: f1_weighted
      value: 54.11644042208904
    - type: main_score
      value: 53.35574983187627
    task:
      type: Classification
  - dataset:
      config: ar
      name: MTEB MassiveScenarioClassification (ar)
      revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
      split: validation
      type: mteb/amazon_massive_scenario
    metrics:
    - type: accuracy
      value: 62.26758484997541
    - type: f1
      value: 62.477928166560325
    - type: f1_weighted
      value: 61.92238394647396
    - type: main_score
      value: 62.26758484997541
    task:
      type: Classification
  - dataset:
      config: en
      name: MTEB MassiveScenarioClassification (en)
      revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
      split: validation
      type: mteb/amazon_massive_scenario
    metrics:
    - type: accuracy
      value: 52.62174126906049
    - type: f1
      value: 50.470501485026716
    - type: f1_weighted
      value: 53.16459392827557
    - type: main_score
      value: 52.62174126906049
    task:
      type: Classification
  - dataset:
      config: en-en
      name: MTEB STS17 (en-en)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 74.33941506827517
    - type: cosine_spearman
      value: 74.42197838273297
    - type: euclidean_pearson
      value: 75.33836191339782
    - type: euclidean_spearman
      value: 74.37385193453852
    - type: main_score
      value: 74.42197838273297
    - type: manhattan_pearson
      value: 75.41881517194568
    - type: manhattan_spearman
      value: 74.47237277057877
    - type: pearson
      value: 74.33941645999855
    - type: spearman
      value: 74.42197838273297
    task:
      type: STS
  - dataset:
      config: nl-en
      name: MTEB STS17 (nl-en)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 31.84872826199112
    - type: cosine_spearman
      value: 32.22496230755917
    - type: euclidean_pearson
      value: 21.830860533929688
    - type: euclidean_spearman
      value: 21.38205815348658
    - type: main_score
      value: 32.22496230755917
    - type: manhattan_pearson
      value: 21.852430479395576
    - type: manhattan_spearman
      value: 21.37848326556159
    - type: pearson
      value: 31.84872485436001
    - type: spearman
      value: 32.22496230755917
    task:
      type: STS
  - dataset:
      config: en-ar
      name: MTEB STS17 (en-ar)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 43.37529327788584
    - type: cosine_spearman
      value: 42.763149514327225
    - type: euclidean_pearson
      value: 39.625411905897394
    - type: euclidean_spearman
      value: 39.26727199746294
    - type: main_score
      value: 42.763149514327225
    - type: manhattan_pearson
      value: 40.49857681486655
    - type: manhattan_spearman
      value: 40.63669314166475
    - type: pearson
      value: 43.37529078998193
    - type: spearman
      value: 42.763149514327225
    task:
      type: STS
  - dataset:
      config: en-tr
      name: MTEB STS17 (en-tr)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 17.16722415938186
    - type: cosine_spearman
      value: 15.590330355526344
    - type: euclidean_pearson
      value: 4.430499555984906
    - type: euclidean_spearman
      value: 2.729050802084264
    - type: main_score
      value: 15.590330355526344
    - type: manhattan_pearson
      value: 2.805408490135879
    - type: manhattan_spearman
      value: 1.5237347692119627
    - type: pearson
      value: 17.167228709176676
    - type: spearman
      value: 15.590330355526344
    task:
      type: STS
  - dataset:
      config: fr-en
      name: MTEB STS17 (fr-en)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 36.093945717347395
    - type: cosine_spearman
      value: 37.33997345407934
    - type: euclidean_pearson
      value: 23.156103022485055
    - type: euclidean_spearman
      value: 20.62925594786342
    - type: main_score
      value: 37.33997345407934
    - type: manhattan_pearson
      value: 22.035024322719813
    - type: manhattan_spearman
      value: 19.147522562438795
    - type: pearson
      value: 36.09395175426761
    - type: spearman
      value: 37.33997345407934
    task:
      type: STS
  - dataset:
      config: en-de
      name: MTEB STS17 (en-de)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 29.064411455563
    - type: cosine_spearman
      value: 29.232781114344697
    - type: euclidean_pearson
      value: 16.90458086330736
    - type: euclidean_spearman
      value: 17.462020565289887
    - type: main_score
      value: 29.232781114344697
    - type: manhattan_pearson
      value: 16.882446230243286
    - type: manhattan_spearman
      value: 17.06144091941576
    - type: pearson
      value: 29.06441922605839
    - type: spearman
      value: 29.232781114344697
    task:
      type: STS
  - dataset:
      config: es-en
      name: MTEB STS17 (es-en)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 27.686316587339473
    - type: cosine_spearman
      value: 28.650995973102205
    - type: euclidean_pearson
      value: 12.954885279630565
    - type: euclidean_spearman
      value: 11.970815927480198
    - type: main_score
      value: 28.650995973102205
    - type: manhattan_pearson
      value: 12.079730127474948
    - type: manhattan_spearman
      value: 10.606967901984147
    - type: pearson
      value: 27.68631836666537
    - type: spearman
      value: 28.650995973102205
    task:
      type: STS
  - dataset:
      config: ar-ar
      name: MTEB STS17 (ar-ar)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 84.12612492708037
    - type: cosine_spearman
      value: 84.24703763883515
    - type: euclidean_pearson
      value: 81.38085140113648
    - type: euclidean_spearman
      value: 83.17403450502965
    - type: main_score
      value: 84.24703763883515
    - type: manhattan_pearson
      value: 81.18466522597414
    - type: manhattan_spearman
      value: 82.61184409962614
    - type: pearson
      value: 84.12612546419625
    - type: spearman
      value: 84.25077492152536
    task:
      type: STS
  - dataset:
      config: it-en
      name: MTEB STS17 (it-en)
      revision: faeb762787bd10488a50c8b5be4a3b82e411949c
      split: test
      type: mteb/sts17-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 27.697680546701868
    - type: cosine_spearman
      value: 25.19277336255784
    - type: euclidean_pearson
      value: 13.964798090314115
    - type: euclidean_spearman
      value: 10.512169361528596
    - type: main_score
      value: 25.19277336255784
    - type: manhattan_pearson
      value: 13.537525485694433
    - type: manhattan_spearman
      value: 10.334001560105834
    - type: pearson
      value: 27.697681880242325
    - type: spearman
      value: 25.19277336255784
    task:
      type: STS
  - dataset:
      config: de-en
      name: MTEB STS22.v2 (de-en)
      revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
      split: test
      type: mteb/sts22-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 32.87548760760924
    - type: cosine_spearman
      value: 30.69782036694315
    - type: euclidean_pearson
      value: 29.925045225262142
    - type: euclidean_spearman
      value: 34.076021250318334
    - type: main_score
      value: 30.69782036694315
    - type: manhattan_pearson
      value: 30.815090565180945
    - type: manhattan_spearman
      value: 34.91615861045259
    - type: pearson
      value: 32.8754813614174
    - type: spearman
      value: 30.69782036694315
    task:
      type: STS
  - dataset:
      config: zh-en
      name: MTEB STS22.v2 (zh-en)
      revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
      split: test
      type: mteb/sts22-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 23.93269292232737
    - type: cosine_spearman
      value: 16.781461291066496
    - type: euclidean_pearson
      value: 20.87679825681155
    - type: euclidean_spearman
      value: 13.764510796592536
    - type: main_score
      value: 16.781461291066496
    - type: manhattan_pearson
      value: 23.416430850444588
    - type: manhattan_spearman
      value: 17.10405713909058
    - type: pearson
      value: 23.932682034899777
    - type: spearman
      value: 16.781461291066496
    task:
      type: STS
  - dataset:
      config: ar
      name: MTEB STS22.v2 (ar)
      revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
      split: test
      type: mteb/sts22-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 51.73784691362425
    - type: cosine_spearman
      value: 60.01035490847343
    - type: euclidean_pearson
      value: 52.717195602630305
    - type: euclidean_spearman
      value: 60.22164097529916
    - type: main_score
      value: 60.01035490847343
    - type: manhattan_pearson
      value: 53.04979941729716
    - type: manhattan_spearman
      value: 60.393100473647706
    - type: pearson
      value: 51.73784381247053
    - type: spearman
      value: 60.020906672817276
    task:
      type: STS
  - dataset:
      config: es-en
      name: MTEB STS22.v2 (es-en)
      revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
      split: test
      type: mteb/sts22-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 47.917244237624864
    - type: cosine_spearman
      value: 53.23173373821509
    - type: euclidean_pearson
      value: 48.172861539004636
    - type: euclidean_spearman
      value: 53.32970069145014
    - type: main_score
      value: 53.23173373821509
    - type: manhattan_pearson
      value: 48.163716825216646
    - type: manhattan_spearman
      value: 53.77963871495307
    - type: pearson
      value: 47.91724405724847
    - type: spearman
      value: 53.23173373821509
    task:
      type: STS
  - dataset:
      config: pl-en
      name: MTEB STS22.v2 (pl-en)
      revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
      split: test
      type: mteb/sts22-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 43.66748993183993
    - type: cosine_spearman
      value: 38.518248671828594
    - type: euclidean_pearson
      value: 50.475058499541134
    - type: euclidean_spearman
      value: 44.76070858743843
    - type: main_score
      value: 38.518248671828594
    - type: manhattan_pearson
      value: 50.576185727010014
    - type: manhattan_spearman
      value: 45.5306304403841
    - type: pearson
      value: 43.66750472144702
    - type: spearman
      value: 38.518248671828594
    task:
      type: STS
  - dataset:
      config: en
      name: MTEB STS22.v2 (en)
      revision: d31f33a128469b20e357535c39b82fb3c3f6f2bd
      split: test
      type: mteb/sts22-crosslingual-sts
    metrics:
    - type: cosine_pearson
      value: 56.41373213565263
    - type: cosine_spearman
      value: 59.03774516602592
    - type: euclidean_pearson
      value: 54.173092638047294
    - type: euclidean_spearman
      value: 59.130444355085885
    - type: main_score
      value: 59.03774516602592
    - type: manhattan_pearson
      value: 54.18950361517434
    - type: manhattan_spearman
      value: 58.78927227383971
    - type: pearson
      value: 56.413733329868045
    - type: spearman
      value: 59.03774516602592
    task:
      type: STS
license: apache-2.0
language:
- ar
- en
---

# SILMA Arabic Matryoshka Embedding Model 0.1

The **SILMA Arabic Matryoshka Embedding Model 0.1** is an advanced Arabic text embedding model designed to produce powerful, contextually rich representations of text, 
facilitating a wide range of applications, from semantic search to document classification.

This model leverages the innovative **Matryoshka** Embedding technique which can be used in different dimensions to optimize the speed, storage, and accuracy trade-offs.

## Usage

### Direct Usage (Sentence Transformers)

First, install the Sentence Transformers library:

```bash
pip install -U sentence-transformers
```

Then load the model

```python
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
import pandas as pd

model_name = "silma-ai/silma-embeddding-matryoshka-0.1"
model = SentenceTransformer(model_name)
```

### Samples

Using Matryoshka, you can specify the first `(n)` dimensions to represent each text.

In the following samples, you can check how each dimension affects the `cosine similarity` between a query and the two inputs.

You can notice the in most cases, even too low dimension (i.e. 8) can produce acceptable semantic similarity scores.

#### [+] Short Sentence Similarity

```python
query = "الطقس اليوم مشمس"
sentence_1 = "الجو اليوم كان مشمسًا ورائعًا"
sentence_2 = "الطقس اليوم غائم"

scores = []
for dim in [768, 256, 48, 16, 8]:

    query_embedding = model.encode(query)[:dim]

    sent1_score = cos_sim(query_embedding, model.encode(sentence_1)[:dim])[0][0].tolist()
    sent2_score = cos_sim(query_embedding, model.encode(sentence_2)[:dim])[0][0].tolist()

    scores.append({
        "dim": dim,
        "valid_top": sent1_score > sent2_score,
        "sent1_score": sent1_score,
        "sent2_score": sent2_score,
    })

scores_df = pd.DataFrame(scores)
print(scores_df.to_markdown(index=False))

# |   dim | valid_top   |   sent1_score |   sent2_score |
# |------:|:------------|--------------:|--------------:|
# |   768 | True        |      0.479942 |      0.233572 |
# |   256 | True        |      0.509289 |      0.208452 |
# |    48 | True        |      0.598825 |      0.191677 |
# |    16 | True        |      0.917707 |      0.458854 |
# |     8 | True        |      0.948563 |      0.675662 |

```

#### [+] Long Sentence Similarity

```python
query = "الكتاب يتحدث عن أهمية الذكاء الاصطناعي في تطوير المجتمعات الحديثة"
sentence_1 = "في هذا الكتاب، يناقش الكاتب كيف يمكن للتكنولوجيا أن تغير العالم"
sentence_2 = "الكاتب يتحدث عن أساليب الطبخ التقليدية في دول البحر الأبيض المتوسط"

scores = []
for dim in [768, 256, 48, 16, 8]:

    query_embedding = model.encode(query)[:dim]

    sent1_score = cos_sim(query_embedding, model.encode(sentence_1)[:dim])[0][0].tolist()
    sent2_score = cos_sim(query_embedding, model.encode(sentence_2)[:dim])[0][0].tolist()

    scores.append({
        "dim": dim,
        "valid_top": sent1_score > sent2_score,
        "sent1_score": sent1_score,
        "sent2_score": sent2_score,
    })

scores_df = pd.DataFrame(scores)
print(scores_df.to_markdown(index=False))

# |   dim | valid_top   |   sent1_score |   sent2_score |
# |------:|:------------|--------------:|--------------:|
# |   768 | True        |      0.637418 |      0.262693 |
# |   256 | True        |      0.614761 |      0.268267 |
# |    48 | True        |      0.758887 |      0.384649 |
# |    16 | True        |      0.885737 |      0.204213 |
# |     8 | True        |      0.918684 |      0.146478 |
```

#### [+] Question to Paragraph Matching

```python
query = "ما هي فوائد ممارسة الرياضة؟"
sentence_1 = "ممارسة الرياضة بشكل منتظم تساعد على تحسين الصحة العامة واللياقة البدنية"
sentence_2 = "تعليم الأطفال في سن مبكرة يساعدهم على تطوير المهارات العقلية بسرعة"

scores = []
for dim in [768, 256, 48, 16, 8]:

    query_embedding = model.encode(query)[:dim]

    sent1_score = cos_sim(query_embedding, model.encode(sentence_1)[:dim])[0][0].tolist()
    sent2_score = cos_sim(query_embedding, model.encode(sentence_2)[:dim])[0][0].tolist()

    scores.append({
        "dim": dim,
        "valid_top": sent1_score > sent2_score,
        "sent1_score": sent1_score,
        "sent2_score": sent2_score,
    })

scores_df = pd.DataFrame(scores)
print(scores_df.to_markdown(index=False))

# |   dim | valid_top   |   sent1_score |   sent2_score |
# |------:|:------------|--------------:|--------------:|
# |   768 | True        |      0.520329 |    0.00295128 |
# |   256 | True        |      0.556088 |   -0.017764   |
# |    48 | True        |      0.586194 |   -0.110691   |
# |    16 | True        |      0.606462 |   -0.331682   |
# |     8 | True        |      0.689649 |   -0.359202   |
```

#### [+] Message to Intent-Name Mapping

```python
query = "أرغب في حجز تذكرة طيران من دبي الى القاهرة يوم الثلاثاء القادم"
sentence_1 = "حجز رحلة"
sentence_2 = "إلغاء حجز"

scores = []
for dim in [768, 256, 48, 16, 8]:

    query_embedding = model.encode(query)[:dim]

    sent1_score = cos_sim(query_embedding, model.encode(sentence_1)[:dim])[0][0].tolist()
    sent2_score = cos_sim(query_embedding, model.encode(sentence_2)[:dim])[0][0].tolist()

    scores.append({
        "dim": dim,
        "valid_top": sent1_score > sent2_score,
        "sent1_score": sent1_score,
        "sent2_score": sent2_score,
    })

scores_df = pd.DataFrame(scores)
print(scores_df.to_markdown(index=False))

# |   dim | valid_top   |   sent1_score |   sent2_score |
# |------:|:------------|--------------:|--------------:|
# |   768 | True        |     0.476535  |     0.221451  |
# |   256 | True        |     0.392701  |     0.224967  |
# |    48 | True        |     0.316223  |     0.0210683 |
# |    16 | False       |    -0.0242871 |     0.0250766 |
# |     8 | True        |    -0.215241  |    -0.258904  |
```

## Training Details

We curated a dataset [silma-ai/silma-arabic-triplets-dataset-v1.0](https://huggingface.co/datasets/silma-ai/silma-arabic-triplets-dataset-v1.0) which
contains more than `2.25M` records of (anchor, positive and negative) Arabic/English samples. 
Only the first `600` samples were taken to be the `eval` dataset, while the rest were used for fine-tuning.

This produced a finetuned `Matryoshka` model based on [aubmindlab/bert-base-arabertv02](https://huggingface.co/aubmindlab/bert-base-arabertv02) with the following hyperparameters:

- `per_device_train_batch_size`: 250
- `per_device_eval_batch_size`: 10
- `learning_rate`: 1e-05
- `num_train_epochs`: 3
- `bf16`: True
- `dataloader_drop_last`: True
- `optim`: adamw_torch_fused
- `batch_sampler`: no_duplicates

**[training script](https://github.com/UKPLab/sentence-transformers/blob/master/examples/training/matryoshka/matryoshka_sts.py)**

### Framework Versions
- Python: 3.10.14
- Sentence Transformers: 3.2.0
- Transformers: 4.45.2
- PyTorch: 2.3.1
- Accelerate: 1.0.1
- Datasets: 3.0.1
- Tokenizers: 0.20.1

### Full Model Architecture

```
SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
```

### Citation:

#### BibTeX:

```bibtex
@misc{silma2024embedding,
  author = {Abu Bakr Soliman, Karim Ouda, SILMA AI},
  title = {SILMA Embedding Matryoshka 0.1},
  year = {2024},
  publisher = {Hugging Face},
  howpublished = {\url{https://huggingface.co/silma-ai/silma-embeddding-matryoshka-0.1}},
}
```

#### APA:

```apa
Abu Bakr Soliman, Karim Ouda, SILMA AI. (2024). SILMA Embedding Matryoshka STS 0.1 [Model]. Hugging Face. https://huggingface.co/silma-ai/silma-embeddding-matryoshka-0.1
```

#### Sentence Transformers
```bibtex
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}
```

#### MatryoshkaLoss
```bibtex
@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning},
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}
```

#### MultipleNegativesRankingLoss
```bibtex
@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
```

<!--
## Glossary

*Clearly define terms in order to be accessible across audiences.*
-->

<!--
## Model Card Authors

*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
-->

<!--
## Model Card Contact

*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
-->