XYZ-embedding-zh-v2 / README.md
fangxq's picture
上传xyz模型
527bc01 verified
metadata
model-index:
  - name: XYZ-embedding-zh-v2
    results:
      - dataset:
          config: default
          name: MTEB CMedQAv1
          revision: None
          split: test
          type: C-MTEB/CMedQAv1
        metrics:
          - type: map
            value: 89.9766367822762
          - type: mrr
            value: 91.88896825396824
          - type: main_score
            value: 89.9766367822762
        task:
          type: Reranking
      - dataset:
          config: default
          name: MTEB CMedQAv2
          revision: None
          split: test
          type: C-MTEB/CMedQAv2
        metrics:
          - type: map
            value: 89.04628340075982
          - type: mrr
            value: 91.21702380952381
          - type: main_score
            value: 89.04628340075982
        task:
          type: Reranking
      - dataset:
          config: default
          name: MTEB CmedqaRetrieval
          revision: None
          split: dev
          type: C-MTEB/CmedqaRetrieval
        metrics:
          - type: map_at_1
            value: 27.796
          - type: map_at_10
            value: 41.498000000000005
          - type: map_at_100
            value: 43.332
          - type: map_at_1000
            value: 43.429
          - type: map_at_3
            value: 37.172
          - type: map_at_5
            value: 39.617000000000004
          - type: mrr_at_1
            value: 42.111
          - type: mrr_at_10
            value: 50.726000000000006
          - type: mrr_at_100
            value: 51.632
          - type: mrr_at_1000
            value: 51.67
          - type: mrr_at_3
            value: 48.429
          - type: mrr_at_5
            value: 49.662
          - type: ndcg_at_1
            value: 42.111
          - type: ndcg_at_10
            value: 48.294
          - type: ndcg_at_100
            value: 55.135999999999996
          - type: ndcg_at_1000
            value: 56.818000000000005
          - type: ndcg_at_3
            value: 43.185
          - type: ndcg_at_5
            value: 45.266
          - type: precision_at_1
            value: 42.111
          - type: precision_at_10
            value: 10.635
          - type: precision_at_100
            value: 1.619
          - type: precision_at_1000
            value: 0.183
          - type: precision_at_3
            value: 24.539
          - type: precision_at_5
            value: 17.644000000000002
          - type: recall_at_1
            value: 27.796
          - type: recall_at_10
            value: 59.034
          - type: recall_at_100
            value: 86.991
          - type: recall_at_1000
            value: 98.304
          - type: recall_at_3
            value: 43.356
          - type: recall_at_5
            value: 49.998
          - type: main_score
            value: 48.294
        task:
          type: Retrieval
      - dataset:
          config: default
          name: MTEB CovidRetrieval
          revision: None
          split: dev
          type: C-MTEB/CovidRetrieval
        metrics:
          - type: map_at_1
            value: 80.479
          - type: map_at_10
            value: 87.984
          - type: map_at_100
            value: 88.036
          - type: map_at_1000
            value: 88.03699999999999
          - type: map_at_3
            value: 87.083
          - type: map_at_5
            value: 87.694
          - type: mrr_at_1
            value: 80.927
          - type: mrr_at_10
            value: 88.046
          - type: mrr_at_100
            value: 88.099
          - type: mrr_at_1000
            value: 88.1
          - type: mrr_at_3
            value: 87.215
          - type: mrr_at_5
            value: 87.768
          - type: ndcg_at_1
            value: 80.927
          - type: ndcg_at_10
            value: 90.756
          - type: ndcg_at_100
            value: 90.96
          - type: ndcg_at_1000
            value: 90.975
          - type: ndcg_at_3
            value: 89.032
          - type: ndcg_at_5
            value: 90.106
          - type: precision_at_1
            value: 80.927
          - type: precision_at_10
            value: 10.011000000000001
          - type: precision_at_100
            value: 1.009
          - type: precision_at_1000
            value: 0.101
          - type: precision_at_3
            value: 31.752999999999997
          - type: precision_at_5
            value: 19.6
          - type: recall_at_1
            value: 80.479
          - type: recall_at_10
            value: 99.05199999999999
          - type: recall_at_100
            value: 99.895
          - type: recall_at_1000
            value: 100
          - type: recall_at_3
            value: 94.494
          - type: recall_at_5
            value: 97.102
          - type: main_score
            value: 90.756
        task:
          type: Retrieval
      - dataset:
          config: default
          name: MTEB DuRetrieval
          revision: None
          split: dev
          type: C-MTEB/DuRetrieval
        metrics:
          - type: map_at_1
            value: 27.853
          - type: map_at_10
            value: 85.13199999999999
          - type: map_at_100
            value: 87.688
          - type: map_at_1000
            value: 87.712
          - type: map_at_3
            value: 59.705
          - type: map_at_5
            value: 75.139
          - type: mrr_at_1
            value: 93.65
          - type: mrr_at_10
            value: 95.682
          - type: mrr_at_100
            value: 95.722
          - type: mrr_at_1000
            value: 95.724
          - type: mrr_at_3
            value: 95.467
          - type: mrr_at_5
            value: 95.612
          - type: ndcg_at_1
            value: 93.65
          - type: ndcg_at_10
            value: 91.155
          - type: ndcg_at_100
            value: 93.183
          - type: ndcg_at_1000
            value: 93.38499999999999
          - type: ndcg_at_3
            value: 90.648
          - type: ndcg_at_5
            value: 89.47699999999999
          - type: precision_at_1
            value: 93.65
          - type: precision_at_10
            value: 43.11
          - type: precision_at_100
            value: 4.854
          - type: precision_at_1000
            value: 0.49100000000000005
          - type: precision_at_3
            value: 81.11699999999999
          - type: precision_at_5
            value: 68.28999999999999
          - type: recall_at_1
            value: 27.853
          - type: recall_at_10
            value: 91.678
          - type: recall_at_100
            value: 98.553
          - type: recall_at_1000
            value: 99.553
          - type: recall_at_3
            value: 61.381
          - type: recall_at_5
            value: 78.605
          - type: main_score
            value: 91.155
        task:
          type: Retrieval
      - dataset:
          config: default
          name: MTEB EcomRetrieval
          revision: None
          split: dev
          type: C-MTEB/EcomRetrieval
        metrics:
          - type: map_at_1
            value: 54.50000000000001
          - type: map_at_10
            value: 65.167
          - type: map_at_100
            value: 65.664
          - type: map_at_1000
            value: 65.67399999999999
          - type: map_at_3
            value: 62.633
          - type: map_at_5
            value: 64.208
          - type: mrr_at_1
            value: 54.50000000000001
          - type: mrr_at_10
            value: 65.167
          - type: mrr_at_100
            value: 65.664
          - type: mrr_at_1000
            value: 65.67399999999999
          - type: mrr_at_3
            value: 62.633
          - type: mrr_at_5
            value: 64.208
          - type: ndcg_at_1
            value: 54.50000000000001
          - type: ndcg_at_10
            value: 70.294
          - type: ndcg_at_100
            value: 72.564
          - type: ndcg_at_1000
            value: 72.841
          - type: ndcg_at_3
            value: 65.128
          - type: ndcg_at_5
            value: 67.96799999999999
          - type: precision_at_1
            value: 54.50000000000001
          - type: precision_at_10
            value: 8.64
          - type: precision_at_100
            value: 0.967
          - type: precision_at_1000
            value: 0.099
          - type: precision_at_3
            value: 24.099999999999998
          - type: precision_at_5
            value: 15.840000000000002
          - type: recall_at_1
            value: 54.50000000000001
          - type: recall_at_10
            value: 86.4
          - type: recall_at_100
            value: 96.7
          - type: recall_at_1000
            value: 98.9
          - type: recall_at_3
            value: 72.3
          - type: recall_at_5
            value: 79.2
          - type: main_score
            value: 70.294
        task:
          type: Retrieval
      - dataset:
          config: default
          name: MTEB MMarcoReranking
          revision: None
          split: dev
          type: C-MTEB/Mmarco-reranking
        metrics:
          - type: map
            value: 37.68251937316638
          - type: mrr
            value: 36.61746031746032
          - type: main_score
            value: 37.68251937316638
        task:
          type: Reranking
      - dataset:
          config: default
          name: MTEB MMarcoRetrieval
          revision: None
          split: dev
          type: C-MTEB/MMarcoRetrieval
        metrics:
          - type: map_at_1
            value: 69.401
          - type: map_at_10
            value: 78.8
          - type: map_at_100
            value: 79.077
          - type: map_at_1000
            value: 79.081
          - type: map_at_3
            value: 76.97
          - type: map_at_5
            value: 78.185
          - type: mrr_at_1
            value: 71.719
          - type: mrr_at_10
            value: 79.327
          - type: mrr_at_100
            value: 79.56400000000001
          - type: mrr_at_1000
            value: 79.56800000000001
          - type: mrr_at_3
            value: 77.736
          - type: mrr_at_5
            value: 78.782
          - type: ndcg_at_1
            value: 71.719
          - type: ndcg_at_10
            value: 82.505
          - type: ndcg_at_100
            value: 83.673
          - type: ndcg_at_1000
            value: 83.786
          - type: ndcg_at_3
            value: 79.07600000000001
          - type: ndcg_at_5
            value: 81.122
          - type: precision_at_1
            value: 71.719
          - type: precision_at_10
            value: 9.924
          - type: precision_at_100
            value: 1.049
          - type: precision_at_1000
            value: 0.106
          - type: precision_at_3
            value: 29.742
          - type: precision_at_5
            value: 18.937
          - type: recall_at_1
            value: 69.401
          - type: recall_at_10
            value: 93.349
          - type: recall_at_100
            value: 98.492
          - type: recall_at_1000
            value: 99.384
          - type: recall_at_3
            value: 84.385
          - type: recall_at_5
            value: 89.237
          - type: main_score
            value: 82.505
        task:
          type: Retrieval
      - dataset:
          config: default
          name: MTEB MedicalRetrieval
          revision: None
          split: dev
          type: C-MTEB/MedicalRetrieval
        metrics:
          - type: map_at_1
            value: 57.8
          - type: map_at_10
            value: 64.696
          - type: map_at_100
            value: 65.294
          - type: map_at_1000
            value: 65.328
          - type: map_at_3
            value: 62.949999999999996
          - type: map_at_5
            value: 64.095
          - type: mrr_at_1
            value: 58.099999999999994
          - type: mrr_at_10
            value: 64.85
          - type: mrr_at_100
            value: 65.448
          - type: mrr_at_1000
            value: 65.482
          - type: mrr_at_3
            value: 63.1
          - type: mrr_at_5
            value: 64.23
          - type: ndcg_at_1
            value: 57.8
          - type: ndcg_at_10
            value: 68.041
          - type: ndcg_at_100
            value: 71.074
          - type: ndcg_at_1000
            value: 71.919
          - type: ndcg_at_3
            value: 64.584
          - type: ndcg_at_5
            value: 66.625
          - type: precision_at_1
            value: 57.8
          - type: precision_at_10
            value: 7.85
          - type: precision_at_100
            value: 0.9289999999999999
          - type: precision_at_1000
            value: 0.099
          - type: precision_at_3
            value: 23.1
          - type: precision_at_5
            value: 14.84
          - type: recall_at_1
            value: 57.8
          - type: recall_at_10
            value: 78.5
          - type: recall_at_100
            value: 92.9
          - type: recall_at_1000
            value: 99.4
          - type: recall_at_3
            value: 69.3
          - type: recall_at_5
            value: 74.2
          - type: main_score
            value: 68.041
        task:
          type: Retrieval
      - dataset:
          config: default
          name: MTEB T2Reranking
          revision: None
          split: dev
          type: C-MTEB/T2Reranking
        metrics:
          - type: map
            value: 69.13287570713865
          - type: mrr
            value: 79.95326487625066
          - type: main_score
            value: 69.13287570713865
        task:
          type: Reranking
      - dataset:
          config: default
          name: MTEB T2Retrieval
          revision: None
          split: dev
          type: C-MTEB/T2Retrieval
        metrics:
          - type: map_at_1
            value: 28.041
          - type: map_at_10
            value: 78.509
          - type: map_at_100
            value: 82.083
          - type: map_at_1000
            value: 82.143
          - type: map_at_3
            value: 55.345
          - type: map_at_5
            value: 67.899
          - type: mrr_at_1
            value: 90.86
          - type: mrr_at_10
            value: 93.31
          - type: mrr_at_100
            value: 93.388
          - type: mrr_at_1000
            value: 93.391
          - type: mrr_at_3
            value: 92.92200000000001
          - type: mrr_at_5
            value: 93.167
          - type: ndcg_at_1
            value: 90.86
          - type: ndcg_at_10
            value: 85.875
          - type: ndcg_at_100
            value: 89.269
          - type: ndcg_at_1000
            value: 89.827
          - type: ndcg_at_3
            value: 87.254
          - type: ndcg_at_5
            value: 85.855
          - type: precision_at_1
            value: 90.86
          - type: precision_at_10
            value: 42.488
          - type: precision_at_100
            value: 5.029
          - type: precision_at_1000
            value: 0.516
          - type: precision_at_3
            value: 76.172
          - type: precision_at_5
            value: 63.759
          - type: recall_at_1
            value: 28.041
          - type: recall_at_10
            value: 84.829
          - type: recall_at_100
            value: 95.89999999999999
          - type: recall_at_1000
            value: 98.665
          - type: recall_at_3
            value: 57.009
          - type: recall_at_5
            value: 71.188
          - type: main_score
            value: 85.875
        task:
          type: Retrieval
      - dataset:
          config: default
          name: MTEB VideoRetrieval
          revision: None
          split: dev
          type: C-MTEB/VideoRetrieval
        metrics:
          - type: map_at_1
            value: 67.30000000000001
          - type: map_at_10
            value: 76.819
          - type: map_at_100
            value: 77.141
          - type: map_at_1000
            value: 77.142
          - type: map_at_3
            value: 75.233
          - type: map_at_5
            value: 76.163
          - type: mrr_at_1
            value: 67.30000000000001
          - type: mrr_at_10
            value: 76.819
          - type: mrr_at_100
            value: 77.141
          - type: mrr_at_1000
            value: 77.142
          - type: mrr_at_3
            value: 75.233
          - type: mrr_at_5
            value: 76.163
          - type: ndcg_at_1
            value: 67.30000000000001
          - type: ndcg_at_10
            value: 80.93599999999999
          - type: ndcg_at_100
            value: 82.311
          - type: ndcg_at_1000
            value: 82.349
          - type: ndcg_at_3
            value: 77.724
          - type: ndcg_at_5
            value: 79.406
          - type: precision_at_1
            value: 67.30000000000001
          - type: precision_at_10
            value: 9.36
          - type: precision_at_100
            value: 0.996
          - type: precision_at_1000
            value: 0.1
          - type: precision_at_3
            value: 28.299999999999997
          - type: precision_at_5
            value: 17.8
          - type: recall_at_1
            value: 67.30000000000001
          - type: recall_at_10
            value: 93.60000000000001
          - type: recall_at_100
            value: 99.6
          - type: recall_at_1000
            value: 99.9
          - type: recall_at_3
            value: 84.89999999999999
          - type: recall_at_5
            value: 89
          - type: main_score
            value: 80.93599999999999
        task:
          type: Retrieval
tags:
  - mteb
language:
  - zh

XYZ-embedding-zh-v2

Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("fangxq/XYZ-embedding-zh-v2")
# Run inference
sentences = [
    'The weather is lovely today.',
    "It's so sunny outside!",
    'He drove to the stadium.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 1792]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]