Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +1950 -0
config.json +7 -0
model.bin +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +61 -0
vocabulary.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,1950 @@

+---
+language:
+- en
+license: apache-2.0
+library_name: transformers
+tags:
+- language
+- granite
+- embeddings
+model-index:
+- name: ibm-granite/granite-embedding-125m-english
+  results:
+  - dataset:
+      type: mteb/arguana
+      name: MTEB ArguaAna
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.33642
+    - type: map_at_10
+      value: 0.49716
+    - type: map_at_100
+      value: 0.50519
+    - type: map_at_1000
+      value: 0.50521
+    - type: map_at_3
+      value: 0.45057
+    - type: map_at_5
+      value: 0.47774
+    - type: mrr_at_1
+      value: 0.34922
+    - type: mrr_at_10
+      value: 0.50197
+    - type: mrr_at_100
+      value: 0.50992
+    - type: mrr_at_1000
+      value: 0.50994
+    - type: mrr_at_3
+      value: 0.45484
+    - type: mrr_at_5
+      value: 0.48272
+    - type: ndcg_at_1
+      value: 0.33642
+    - type: ndcg_at_10
+      value: 0.58401
+    - type: ndcg_at_100
+      value: 0.6157
+    - type: ndcg_at_1000
+      value: 0.61608
+    - type: ndcg_at_3
+      value: 0.48825
+    - type: ndcg_at_5
+      value: 0.53689
+    - type: precision_at_1
+      value: 0.33642
+    - type: precision_at_10
+      value: 0.08606
+    - type: precision_at_100
+      value: 0.00994
+    - type: precision_at_1000
+      value: 0.001
+    - type: precision_at_3
+      value: 0.19915
+    - type: precision_at_5
+      value: 0.14296
+    - type: recall_at_1
+      value: 0.33642
+    - type: recall_at_10
+      value: 0.8606
+    - type: recall_at_100
+      value: 0.9936
+    - type: recall_at_1000
+      value: 0.99644
+    - type: recall_at_3
+      value: 0.59744
+    - type: recall_at_5
+      value: 0.71479
+  - dataset:
+      type: mteb/climate-fever
+      name: MTEB ClimateFEVER
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.1457
+    - type: map_at_10
+      value: 0.24102
+    - type: map_at_100
+      value: 0.25826
+    - type: map_at_1000
+      value: 0.26021
+    - type: map_at_3
+      value: 0.20346
+    - type: map_at_5
+      value: 0.22228
+    - type: mrr_at_1
+      value: 0.32573
+    - type: mrr_at_10
+      value: 0.44411
+    - type: mrr_at_100
+      value: 0.45176
+    - type: mrr_at_1000
+      value: 0.45209
+    - type: mrr_at_3
+      value: 0.4126
+    - type: mrr_at_5
+      value: 0.43312
+    - type: ndcg_at_1
+      value: 0.32573
+    - type: ndcg_at_10
+      value: 0.3315
+    - type: ndcg_at_100
+      value: 0.39898
+    - type: ndcg_at_1000
+      value: 0.43151
+    - type: ndcg_at_3
+      value: 0.27683
+    - type: ndcg_at_5
+      value: 0.29538
+    - type: precision_at_1
+      value: 0.32573
+    - type: precision_at_10
+      value: 0.10176
+    - type: precision_at_100
+      value: 0.01754
+    - type: precision_at_1000
+      value: 0.00236
+    - type: precision_at_3
+      value: 0.20347
+    - type: precision_at_5
+      value: 0.15505
+    - type: recall_at_1
+      value: 0.1457
+    - type: recall_at_10
+      value: 0.38825
+    - type: recall_at_100
+      value: 0.62237
+    - type: recall_at_1000
+      value: 0.8022
+    - type: recall_at_3
+      value: 0.25245
+    - type: recall_at_5
+      value: 0.30821
+  - dataset:
+      type: mteb/cqadupstack-android
+      name: MTEB CQADupstackAndroidRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.36964
+    - type: map_at_10
+      value: 0.5043
+    - type: map_at_100
+      value: 0.52066
+    - type: map_at_1000
+      value: 0.52175
+    - type: map_at_3
+      value: 0.46001
+    - type: map_at_5
+      value: 0.48312
+    - type: mrr_at_1
+      value: 0.45923
+    - type: mrr_at_10
+      value: 0.56733
+    - type: mrr_at_100
+      value: 0.57292
+    - type: mrr_at_1000
+      value: 0.57321
+    - type: mrr_at_3
+      value: 0.54053
+    - type: mrr_at_5
+      value: 0.55556
+    - type: ndcg_at_1
+      value: 0.45923
+    - type: ndcg_at_10
+      value: 0.57667
+    - type: ndcg_at_100
+      value: 0.62373
+    - type: ndcg_at_1000
+      value: 0.6368
+    - type: ndcg_at_3
+      value: 0.51843
+    - type: ndcg_at_5
+      value: 0.54257
+    - type: precision_at_1
+      value: 0.45923
+    - type: precision_at_10
+      value: 0.11316
+    - type: precision_at_100
+      value: 0.01705
+    - type: precision_at_1000
+      value: 0.00216
+    - type: precision_at_3
+      value: 0.2537
+    - type: precision_at_5
+      value: 0.1814
+    - type: recall_at_1
+      value: 0.36964
+    - type: recall_at_10
+      value: 0.71234
+    - type: recall_at_100
+      value: 0.90421
+    - type: recall_at_1000
+      value: 0.98296
+    - type: recall_at_3
+      value: 0.53655
+    - type: recall_at_5
+      value: 0.60996
+  - dataset:
+      type: mteb/cqadupstack-english
+      name: MTEB CQADupstackEnglishRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.36198
+    - type: map_at_10
+      value: 0.49199
+    - type: map_at_100
+      value: 0.50602
+    - type: map_at_1000
+      value: 0.50736
+    - type: map_at_3
+      value: 0.45678
+    - type: map_at_5
+      value: 0.47605
+    - type: mrr_at_1
+      value: 0.45478
+    - type: mrr_at_10
+      value: 0.55075
+    - type: mrr_at_100
+      value: 0.55656
+    - type: mrr_at_1000
+      value: 0.55688
+    - type: mrr_at_3
+      value: 0.52887
+    - type: mrr_at_5
+      value: 0.54282
+    - type: ndcg_at_1
+      value: 0.45478
+    - type: ndcg_at_10
+      value: 0.55505
+    - type: ndcg_at_100
+      value: 0.59606
+    - type: ndcg_at_1000
+      value: 0.61255
+    - type: ndcg_at_3
+      value: 0.51124
+    - type: ndcg_at_5
+      value: 0.53166
+    - type: precision_at_1
+      value: 0.45478
+    - type: precision_at_10
+      value: 0.10752
+    - type: precision_at_100
+      value: 0.01666
+    - type: precision_at_1000
+      value: 0.00211
+    - type: precision_at_3
+      value: 0.25053
+    - type: precision_at_5
+      value: 0.17694
+    - type: recall_at_1
+      value: 0.36198
+    - type: recall_at_10
+      value: 0.66465
+    - type: recall_at_100
+      value: 0.83632
+    - type: recall_at_1000
+      value: 0.93276
+    - type: recall_at_3
+      value: 0.53207
+    - type: recall_at_5
+      value: 0.59169
+  - dataset:
+      type: mteb/cqadupstack-gaming
+      name: MTEB CQADupstackGamingRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.44157
+    - type: map_at_10
+      value: 0.57753
+    - type: map_at_100
+      value: 0.58698
+    - type: map_at_1000
+      value: 0.5874
+    - type: map_at_3
+      value: 0.54223
+    - type: map_at_5
+      value: 0.56307
+    - type: mrr_at_1
+      value: 0.50094
+    - type: mrr_at_10
+      value: 0.607
+    - type: mrr_at_100
+      value: 0.6126
+    - type: mrr_at_1000
+      value: 0.6128
+    - type: mrr_at_3
+      value: 0.58265
+    - type: mrr_at_5
+      value: 0.59817
+    - type: ndcg_at_1
+      value: 0.50094
+    - type: ndcg_at_10
+      value: 0.63641
+    - type: ndcg_at_100
+      value: 0.67055
+    - type: ndcg_at_1000
+      value: 0.67855
+    - type: ndcg_at_3
+      value: 0.58022
+    - type: ndcg_at_5
+      value: 0.6097
+    - type: precision_at_1
+      value: 0.50094
+    - type: precision_at_10
+      value: 0.10182
+    - type: precision_at_100
+      value: 0.01278
+    - type: precision_at_1000
+      value: 0.00138
+    - type: precision_at_3
+      value: 0.2581
+    - type: precision_at_5
+      value: 0.17755
+    - type: recall_at_1
+      value: 0.44157
+    - type: recall_at_10
+      value: 0.7778
+    - type: recall_at_100
+      value: 0.92244
+    - type: recall_at_1000
+      value: 0.9781
+    - type: recall_at_3
+      value: 0.63087
+    - type: recall_at_5
+      value: 0.70172
+  - dataset:
+      type: mteb/cqadupstack-gis
+      name: MTEB CQADupstackGisRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.29532
+    - type: map_at_10
+      value: 0.40214
+    - type: map_at_100
+      value: 0.41289
+    - type: map_at_1000
+      value: 0.41359
+    - type: map_at_3
+      value: 0.37086
+    - type: map_at_5
+      value: 0.38889
+    - type: mrr_at_1
+      value: 0.3209
+    - type: mrr_at_10
+      value: 0.42423
+    - type: mrr_at_100
+      value: 0.43342
+    - type: mrr_at_1000
+      value: 0.43395
+    - type: mrr_at_3
+      value: 0.39736
+    - type: mrr_at_5
+      value: 0.41307
+    - type: ndcg_at_1
+      value: 0.3209
+    - type: ndcg_at_10
+      value: 0.46075
+    - type: ndcg_at_100
+      value: 0.5103
+    - type: ndcg_at_1000
+      value: 0.52668
+    - type: ndcg_at_3
+      value: 0.40149
+    - type: ndcg_at_5
+      value: 0.43111
+    - type: precision_at_1
+      value: 0.3209
+    - type: precision_at_10
+      value: 0.07141
+    - type: precision_at_100
+      value: 0.01018
+    - type: precision_at_1000
+      value: 0.00118
+    - type: precision_at_3
+      value: 0.17175
+    - type: precision_at_5
+      value: 0.12068
+    - type: recall_at_1
+      value: 0.29532
+    - type: recall_at_10
+      value: 0.62025
+    - type: recall_at_100
+      value: 0.83829
+    - type: recall_at_1000
+      value: 0.95995
+    - type: recall_at_3
+      value: 0.4603
+    - type: recall_at_5
+      value: 0.53089
+  - dataset:
+      type: mteb/cqadupstack-mathematica
+      name: MTEB CQADupstackMathematicaRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.18944
+    - type: map_at_10
+      value: 0.29611
+    - type: map_at_100
+      value: 0.31063
+    - type: map_at_1000
+      value: 0.31174
+    - type: map_at_3
+      value: 0.26098
+    - type: map_at_5
+      value: 0.28151
+    - type: mrr_at_1
+      value: 0.23756
+    - type: mrr_at_10
+      value: 0.34491
+    - type: mrr_at_100
+      value: 0.35457
+    - type: mrr_at_1000
+      value: 0.35512
+    - type: mrr_at_3
+      value: 0.3126
+    - type: mrr_at_5
+      value: 0.3317
+    - type: ndcg_at_1
+      value: 0.23756
+    - type: ndcg_at_10
+      value: 0.36015
+    - type: ndcg_at_100
+      value: 0.42175
+    - type: ndcg_at_1000
+      value: 0.44607
+    - type: ndcg_at_3
+      value: 0.29725
+    - type: ndcg_at_5
+      value: 0.32879
+    - type: precision_at_1
+      value: 0.23756
+    - type: precision_at_10
+      value: 0.06928
+    - type: precision_at_100
+      value: 0.01153
+    - type: precision_at_1000
+      value: 0.00149
+    - type: precision_at_3
+      value: 0.14635
+    - type: precision_at_5
+      value: 0.1107
+    - type: recall_at_1
+      value: 0.18944
+    - type: recall_at_10
+      value: 0.50691
+    - type: recall_at_100
+      value: 0.76503
+    - type: recall_at_1000
+      value: 0.93624
+    - type: recall_at_3
+      value: 0.33611
+    - type: recall_at_5
+      value: 0.41427
+  - dataset:
+      type: mteb/cqadupstack-physics
+      name: MTEB CQADupstackPhysicsRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.33824
+    - type: map_at_10
+      value: 0.46868
+    - type: map_at_100
+      value: 0.48306
+    - type: map_at_1000
+      value: 0.48406
+    - type: map_at_3
+      value: 0.43335
+    - type: map_at_5
+      value: 0.45279
+    - type: mrr_at_1
+      value: 0.42348
+    - type: mrr_at_10
+      value: 0.52972
+    - type: mrr_at_100
+      value: 0.53707
+    - type: mrr_at_1000
+      value: 0.53734
+    - type: mrr_at_3
+      value: 0.50722
+    - type: mrr_at_5
+      value: 0.52012
+    - type: ndcg_at_1
+      value: 0.42348
+    - type: ndcg_at_10
+      value: 0.53504
+    - type: ndcg_at_100
+      value: 0.58899
+    - type: ndcg_at_1000
+      value: 0.60323
+    - type: ndcg_at_3
+      value: 0.48478
+    - type: ndcg_at_5
+      value: 0.5079
+    - type: precision_at_1
+      value: 0.42348
+    - type: precision_at_10
+      value: 0.0975
+    - type: precision_at_100
+      value: 0.01466
+    - type: precision_at_1000
+      value: 0.00177
+    - type: precision_at_3
+      value: 0.23741
+    - type: precision_at_5
+      value: 0.16439
+    - type: recall_at_1
+      value: 0.33824
+    - type: recall_at_10
+      value: 0.67142
+    - type: recall_at_100
+      value: 0.89134
+    - type: recall_at_1000
+      value: 0.97816
+    - type: recall_at_3
+      value: 0.52305
+    - type: recall_at_5
+      value: 0.58804
+  - dataset:
+      type: mteb/cqadupstack-programmers
+      name: MTEB CQADupstackProgrammersRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.30125
+    - type: map_at_10
+      value: 0.42119
+    - type: map_at_100
+      value: 0.43599
+    - type: map_at_1000
+      value: 0.4369
+    - type: map_at_3
+      value: 0.38018
+    - type: map_at_5
+      value: 0.40368
+    - type: mrr_at_1
+      value: 0.37557
+    - type: mrr_at_10
+      value: 0.47573
+    - type: mrr_at_100
+      value: 0.4846
+    - type: mrr_at_1000
+      value: 0.48499
+    - type: mrr_at_3
+      value: 0.44654
+    - type: mrr_at_5
+      value: 0.4644
+    - type: ndcg_at_1
+      value: 0.37557
+    - type: ndcg_at_10
+      value: 0.48743
+    - type: ndcg_at_100
+      value: 0.54458
+    - type: ndcg_at_1000
+      value: 0.56076
+    - type: ndcg_at_3
+      value: 0.42573
+    - type: ndcg_at_5
+      value: 0.45528
+    - type: precision_at_1
+      value: 0.37557
+    - type: precision_at_10
+      value: 0.09269
+    - type: precision_at_100
+      value: 0.01401
+    - type: precision_at_1000
+      value: 0.0017
+    - type: precision_at_3
+      value: 0.20624
+    - type: precision_at_5
+      value: 0.15068
+    - type: recall_at_1
+      value: 0.30125
+    - type: recall_at_10
+      value: 0.62619
+    - type: recall_at_100
+      value: 0.86574
+    - type: recall_at_1000
+      value: 0.97102
+    - type: recall_at_3
+      value: 0.45437
+    - type: recall_at_5
+      value: 0.53197
+  - dataset:
+      type: mteb/cqadupstack-stats
+      name: MTEB CQADupstackStatsRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.29193
+    - type: map_at_10
+      value: 0.37529
+    - type: map_at_100
+      value: 0.38614
+    - type: map_at_1000
+      value: 0.38714
+    - type: map_at_3
+      value: 0.34897
+    - type: map_at_5
+      value: 0.36273
+    - type: mrr_at_1
+      value: 0.32669
+    - type: mrr_at_10
+      value: 0.40288
+    - type: mrr_at_100
+      value: 0.41177
+    - type: mrr_at_1000
+      value: 0.41241
+    - type: mrr_at_3
+      value: 0.38037
+    - type: mrr_at_5
+      value: 0.39195
+    - type: ndcg_at_1
+      value: 0.32669
+    - type: ndcg_at_10
+      value: 0.42353
+    - type: ndcg_at_100
+      value: 0.47424
+    - type: ndcg_at_1000
+      value: 0.4959
+    - type: ndcg_at_3
+      value: 0.37604
+    - type: ndcg_at_5
+      value: 0.39682
+    - type: precision_at_1
+      value: 0.32669
+    - type: precision_at_10
+      value: 0.06871
+    - type: precision_at_100
+      value: 0.01008
+    - type: precision_at_1000
+      value: 0.00126
+    - type: precision_at_3
+      value: 0.16309
+    - type: precision_at_5
+      value: 0.11288
+    - type: recall_at_1
+      value: 0.29193
+    - type: recall_at_10
+      value: 0.54159
+    - type: recall_at_100
+      value: 0.77267
+    - type: recall_at_1000
+      value: 0.92805
+    - type: recall_at_3
+      value: 0.41014
+    - type: recall_at_5
+      value: 0.46248
+  - dataset:
+      type: mteb/cqadupstack-tex
+      name: MTEB CQADupstackTexRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.21217
+    - type: map_at_10
+      value: 0.30848
+    - type: map_at_100
+      value: 0.32173
+    - type: map_at_1000
+      value: 0.32296
+    - type: map_at_3
+      value: 0.27882
+    - type: map_at_5
+      value: 0.29537
+    - type: mrr_at_1
+      value: 0.25946
+    - type: mrr_at_10
+      value: 0.35091
+    - type: mrr_at_100
+      value: 0.36047
+    - type: mrr_at_1000
+      value: 0.36111
+    - type: mrr_at_3
+      value: 0.32485
+    - type: mrr_at_5
+      value: 0.33964
+    - type: ndcg_at_1
+      value: 0.25946
+    - type: ndcg_at_10
+      value: 0.3655
+    - type: ndcg_at_100
+      value: 0.42328
+    - type: ndcg_at_1000
+      value: 0.44783
+    - type: ndcg_at_3
+      value: 0.31463
+    - type: ndcg_at_5
+      value: 0.33803
+    - type: precision_at_1
+      value: 0.25946
+    - type: precision_at_10
+      value: 0.06793
+    - type: precision_at_100
+      value: 0.01138
+    - type: precision_at_1000
+      value: 0.00155
+    - type: precision_at_3
+      value: 0.1513
+    - type: precision_at_5
+      value: 0.10991
+    - type: recall_at_1
+      value: 0.21217
+    - type: recall_at_10
+      value: 0.49327
+    - type: recall_at_100
+      value: 0.7472
+    - type: recall_at_1000
+      value: 0.91637
+    - type: recall_at_3
+      value: 0.34993
+    - type: recall_at_5
+      value: 0.41029
+  - dataset:
+      type: mteb/cqadupstack-unix
+      name: MTEB CQADupstackUnixRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.34303
+    - type: map_at_10
+      value: 0.45312
+    - type: map_at_100
+      value: 0.46563
+    - type: map_at_1000
+      value: 0.4664
+    - type: map_at_3
+      value: 0.4143
+    - type: map_at_5
+      value: 0.43633
+    - type: mrr_at_1
+      value: 0.40112
+    - type: mrr_at_10
+      value: 0.49097
+    - type: mrr_at_100
+      value: 0.49966
+    - type: mrr_at_1000
+      value: 0.50006
+    - type: mrr_at_3
+      value: 0.46129
+    - type: mrr_at_5
+      value: 0.47901
+    - type: ndcg_at_1
+      value: 0.40112
+    - type: ndcg_at_10
+      value: 0.513
+    - type: ndcg_at_100
+      value: 0.56534
+    - type: ndcg_at_1000
+      value: 0.58048
+    - type: ndcg_at_3
+      value: 0.4491
+    - type: ndcg_at_5
+      value: 0.48048
+    - type: precision_at_1
+      value: 0.40112
+    - type: precision_at_10
+      value: 0.08806
+    - type: precision_at_100
+      value: 0.01266
+    - type: precision_at_1000
+      value: 0.00149
+    - type: precision_at_3
+      value: 0.20211
+    - type: precision_at_5
+      value: 0.14496
+    - type: recall_at_1
+      value: 0.34303
+    - type: recall_at_10
+      value: 0.65508
+    - type: recall_at_100
+      value: 0.8753
+    - type: recall_at_1000
+      value: 0.9742
+    - type: recall_at_3
+      value: 0.48465
+    - type: recall_at_5
+      value: 0.56374
+  - dataset:
+      type: mteb/cqadupstack-webmasters
+      name: MTEB CQADupstackWebmastersRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.30312
+    - type: map_at_10
+      value: 0.40931
+    - type: map_at_100
+      value: 0.42893
+    - type: map_at_1000
+      value: 0.4312
+    - type: map_at_3
+      value: 0.37527
+    - type: map_at_5
+      value: 0.3936
+    - type: mrr_at_1
+      value: 0.36364
+    - type: mrr_at_10
+      value: 0.45677
+    - type: mrr_at_100
+      value: 0.46753
+    - type: mrr_at_1000
+      value: 0.46787
+    - type: mrr_at_3
+      value: 0.42918
+    - type: mrr_at_5
+      value: 0.4443
+    - type: ndcg_at_1
+      value: 0.36364
+    - type: ndcg_at_10
+      value: 0.47301
+    - type: ndcg_at_100
+      value: 0.53698
+    - type: ndcg_at_1000
+      value: 0.55503
+    - type: ndcg_at_3
+      value: 0.41875
+    - type: ndcg_at_5
+      value: 0.44316
+    - type: precision_at_1
+      value: 0.36364
+    - type: precision_at_10
+      value: 0.09032
+    - type: precision_at_100
+      value: 0.01806
+    - type: precision_at_1000
+      value: 0.00258
+    - type: precision_at_3
+      value: 0.19499
+    - type: precision_at_5
+      value: 0.1415
+    - type: recall_at_1
+      value: 0.30312
+    - type: recall_at_10
+      value: 0.59418
+    - type: recall_at_100
+      value: 0.8656
+    - type: recall_at_1000
+      value: 0.97412
+    - type: recall_at_3
+      value: 0.44251
+    - type: recall_at_5
+      value: 0.50457
+  - dataset:
+      type: mteb/cqadupstack-wordpress
+      name: MTEB CQADupstackWordpressRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.23851
+    - type: map_at_10
+      value: 0.33429
+    - type: map_at_100
+      value: 0.34482
+    - type: map_at_1000
+      value: 0.3457
+    - type: map_at_3
+      value: 0.30271
+    - type: map_at_5
+      value: 0.31905
+    - type: mrr_at_1
+      value: 0.25693
+    - type: mrr_at_10
+      value: 0.35383
+    - type: mrr_at_100
+      value: 0.36295
+    - type: mrr_at_1000
+      value: 0.36346
+    - type: mrr_at_3
+      value: 0.32532
+    - type: mrr_at_5
+      value: 0.3402
+    - type: ndcg_at_1
+      value: 0.25693
+    - type: ndcg_at_10
+      value: 0.39196
+    - type: ndcg_at_100
+      value: 0.44501
+    - type: ndcg_at_1000
+      value: 0.46482
+    - type: ndcg_at_3
+      value: 0.33
+    - type: ndcg_at_5
+      value: 0.35736
+    - type: precision_at_1
+      value: 0.25693
+    - type: precision_at_10
+      value: 0.06433
+    - type: precision_at_100
+      value: 0.00989
+    - type: precision_at_1000
+      value: 0.00128
+    - type: precision_at_3
+      value: 0.14295
+    - type: precision_at_5
+      value: 0.10277
+    - type: recall_at_1
+      value: 0.23851
+    - type: recall_at_10
+      value: 0.55036
+    - type: recall_at_100
+      value: 0.79592
+    - type: recall_at_1000
+      value: 0.94283
+    - type: recall_at_3
+      value: 0.38435
+    - type: recall_at_5
+      value: 0.44872
+  - dataset:
+      type: mteb/dbpedia
+      name: MTEB DBPedia
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.0871
+    - type: map_at_10
+      value: 0.19218
+    - type: map_at_100
+      value: 0.26291
+    - type: map_at_1000
+      value: 0.27985
+    - type: map_at_3
+      value: 0.13974
+    - type: map_at_5
+      value: 0.16104
+    - type: mrr_at_1
+      value: 0.6725
+    - type: mrr_at_10
+      value: 0.75037
+    - type: mrr_at_100
+      value: 0.75318
+    - type: mrr_at_1000
+      value: 0.75325
+    - type: mrr_at_3
+      value: 0.73833
+    - type: mrr_at_5
+      value: 0.74308
+    - type: ndcg_at_1
+      value: 0.54375
+    - type: ndcg_at_10
+      value: 0.39409
+    - type: ndcg_at_100
+      value: 0.44382
+    - type: ndcg_at_1000
+      value: 0.52485
+    - type: ndcg_at_3
+      value: 0.44463
+    - type: ndcg_at_5
+      value: 0.41276
+    - type: precision_at_1
+      value: 0.6725
+    - type: precision_at_10
+      value: 0.3055
+    - type: precision_at_100
+      value: 0.09588
+    - type: precision_at_1000
+      value: 0.02118
+    - type: precision_at_3
+      value: 0.48167
+    - type: precision_at_5
+      value: 0.394
+    - type: recall_at_1
+      value: 0.0871
+    - type: recall_at_10
+      value: 0.2527
+    - type: recall_at_100
+      value: 0.5185
+    - type: recall_at_1000
+      value: 0.76491
+    - type: recall_at_3
+      value: 0.15516
+    - type: recall_at_5
+      value: 0.18907
+  - dataset:
+      type: mteb/fever
+      name: MTEB FEVER
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.78993
+    - type: map_at_10
+      value: 0.8502
+    - type: map_at_100
+      value: 0.85186
+    - type: map_at_1000
+      value: 0.852
+    - type: map_at_3
+      value: 0.8437
+    - type: map_at_5
+      value: 0.84812
+    - type: mrr_at_1
+      value: 0.85179
+    - type: mrr_at_10
+      value: 0.90744
+    - type: mrr_at_100
+      value: 0.90799
+    - type: mrr_at_1000
+      value: 0.90801
+    - type: mrr_at_3
+      value: 0.90322
+    - type: mrr_at_5
+      value: 0.90622
+    - type: ndcg_at_1
+      value: 0.85179
+    - type: ndcg_at_10
+      value: 0.88229
+    - type: ndcg_at_100
+      value: 0.8884
+    - type: ndcg_at_1000
+      value: 0.89116
+    - type: ndcg_at_3
+      value: 0.87304
+    - type: ndcg_at_5
+      value: 0.87862
+    - type: precision_at_1
+      value: 0.85179
+    - type: precision_at_10
+      value: 0.10129
+    - type: precision_at_100
+      value: 0.0106
+    - type: precision_at_1000
+      value: 0.0011
+    - type: precision_at_3
+      value: 0.32543
+    - type: precision_at_5
+      value: 0.19931
+    - type: recall_at_1
+      value: 0.78993
+    - type: recall_at_10
+      value: 0.92685
+    - type: recall_at_100
+      value: 0.9516
+    - type: recall_at_1000
+      value: 0.96943
+    - type: recall_at_3
+      value: 0.89965
+    - type: recall_at_5
+      value: 0.91562
+  - dataset:
+      type: mteb/fiqa
+      name: MTEB FiQA2018
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.22586
+    - type: map_at_10
+      value: 0.36836
+    - type: map_at_100
+      value: 0.38863
+    - type: map_at_1000
+      value: 0.39041
+    - type: map_at_3
+      value: 0.32445
+    - type: map_at_5
+      value: 0.34951
+    - type: mrr_at_1
+      value: 0.44599
+    - type: mrr_at_10
+      value: 0.53471
+    - type: mrr_at_100
+      value: 0.54186
+    - type: mrr_at_1000
+      value: 0.54223
+    - type: mrr_at_3
+      value: 0.51157
+    - type: mrr_at_5
+      value: 0.52423
+    - type: ndcg_at_1
+      value: 0.44599
+    - type: ndcg_at_10
+      value: 0.44931
+    - type: ndcg_at_100
+      value: 0.51914
+    - type: ndcg_at_1000
+      value: 0.54674
+    - type: ndcg_at_3
+      value: 0.41597
+    - type: ndcg_at_5
+      value: 0.42611
+    - type: precision_at_1
+      value: 0.44599
+    - type: precision_at_10
+      value: 0.12346
+    - type: precision_at_100
+      value: 0.01951
+    - type: precision_at_1000
+      value: 0.00244
+    - type: precision_at_3
+      value: 0.27623
+    - type: precision_at_5
+      value: 0.20093
+    - type: recall_at_1
+      value: 0.22586
+    - type: recall_at_10
+      value: 0.5152
+    - type: recall_at_100
+      value: 0.77251
+    - type: recall_at_1000
+      value: 0.93503
+    - type: recall_at_3
+      value: 0.37802
+    - type: recall_at_5
+      value: 0.4386
+  - dataset:
+      type: mteb/hotpotqa
+      name: MTEB HotpotQA
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.38177
+    - type: map_at_10
+      value: 0.59021
+    - type: map_at_100
+      value: 0.59924
+    - type: map_at_1000
+      value: 0.59989
+    - type: map_at_3
+      value: 0.55553
+    - type: map_at_5
+      value: 0.57773
+    - type: mrr_at_1
+      value: 0.76354
+    - type: mrr_at_10
+      value: 0.827
+    - type: mrr_at_100
+      value: 0.82887
+    - type: mrr_at_1000
+      value: 0.82896
+    - type: mrr_at_3
+      value: 0.8172
+    - type: mrr_at_5
+      value: 0.82338
+    - type: ndcg_at_1
+      value: 0.76354
+    - type: ndcg_at_10
+      value: 0.67775
+    - type: ndcg_at_100
+      value: 0.70849
+    - type: ndcg_at_1000
+      value: 0.7215
+    - type: ndcg_at_3
+      value: 0.629
+    - type: ndcg_at_5
+      value: 0.65679
+    - type: precision_at_1
+      value: 0.76354
+    - type: precision_at_10
+      value: 0.14176
+    - type: precision_at_100
+      value: 0.01656
+    - type: precision_at_1000
+      value: 0.00183
+    - type: precision_at_3
+      value: 0.40113
+    - type: precision_at_5
+      value: 0.26255
+    - type: recall_at_1
+      value: 0.38177
+    - type: recall_at_10
+      value: 0.70878
+    - type: recall_at_100
+      value: 0.82822
+    - type: recall_at_1000
+      value: 0.91472
+    - type: recall_at_3
+      value: 0.60169
+    - type: recall_at_5
+      value: 0.65638
+  - dataset:
+      type: mteb/msmarco
+      name: MTEB MSMARCO
+      config: default
+      split: dev
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.15062
+    - type: map_at_10
+      value: 0.26008
+    - type: map_at_100
+      value: 0.27305
+    - type: map_at_1000
+      value: 0.27373
+    - type: map_at_3
+      value: 0.22236
+    - type: map_at_5
+      value: 0.24362
+    - type: mrr_at_1
+      value: 0.15444
+    - type: mrr_at_10
+      value: 0.26458
+    - type: mrr_at_100
+      value: 0.27718
+    - type: mrr_at_1000
+      value: 0.2778
+    - type: mrr_at_3
+      value: 0.22701
+    - type: mrr_at_5
+      value: 0.24844
+    - type: ndcg_at_1
+      value: 0.15444
+    - type: ndcg_at_10
+      value: 0.32495
+    - type: ndcg_at_100
+      value: 0.38957
+    - type: ndcg_at_1000
+      value: 0.40684
+    - type: ndcg_at_3
+      value: 0.24745
+    - type: ndcg_at_5
+      value: 0.2856
+    - type: precision_at_1
+      value: 0.15444
+    - type: precision_at_10
+      value: 0.05486
+    - type: precision_at_100
+      value: 0.00875
+    - type: precision_at_1000
+      value: 0.00102
+    - type: precision_at_3
+      value: 0.1086
+    - type: precision_at_5
+      value: 0.08441
+    - type: recall_at_1
+      value: 0.15062
+    - type: recall_at_10
+      value: 0.5272
+    - type: recall_at_100
+      value: 0.83006
+    - type: recall_at_1000
+      value: 0.96263
+    - type: recall_at_3
+      value: 0.31556
+    - type: recall_at_5
+      value: 0.40706
+  - dataset:
+      type: mteb/nfcorpus
+      name: MTEB NFCorpus
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.06126
+    - type: map_at_10
+      value: 0.14152
+    - type: map_at_100
+      value: 0.1827
+    - type: map_at_1000
+      value: 0.1988
+    - type: map_at_3
+      value: 0.10301
+    - type: map_at_5
+      value: 0.12085
+    - type: mrr_at_1
+      value: 0.47988
+    - type: mrr_at_10
+      value: 0.5692
+    - type: mrr_at_100
+      value: 0.57428
+    - type: mrr_at_1000
+      value: 0.57482
+    - type: mrr_at_3
+      value: 0.55315
+    - type: mrr_at_5
+      value: 0.56352
+    - type: ndcg_at_1
+      value: 0.45356
+    - type: ndcg_at_10
+      value: 0.3725
+    - type: ndcg_at_100
+      value: 0.34496
+    - type: ndcg_at_1000
+      value: 0.43374
+    - type: ndcg_at_3
+      value: 0.42643
+    - type: ndcg_at_5
+      value: 0.40882
+    - type: precision_at_1
+      value: 0.47368
+    - type: precision_at_10
+      value: 0.2774
+    - type: precision_at_100
+      value: 0.09071
+    - type: precision_at_1000
+      value: 0.02226
+    - type: precision_at_3
+      value: 0.40144
+    - type: precision_at_5
+      value: 0.35913
+    - type: recall_at_1
+      value: 0.06126
+    - type: recall_at_10
+      value: 0.18427
+    - type: recall_at_100
+      value: 0.35018
+    - type: recall_at_1000
+      value: 0.6766
+    - type: recall_at_3
+      value: 0.11706
+    - type: recall_at_5
+      value: 0.14419
+  - dataset:
+      type: mteb/nq
+      name: MTEB NQ
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.33053
+    - type: map_at_10
+      value: 0.49739
+    - type: map_at_100
+      value: 0.50626
+    - type: map_at_1000
+      value: 0.50647
+    - type: map_at_3
+      value: 0.4491
+    - type: map_at_5
+      value: 0.4783
+    - type: mrr_at_1
+      value: 0.37254
+    - type: mrr_at_10
+      value: 0.52222
+    - type: mrr_at_100
+      value: 0.52855
+    - type: mrr_at_1000
+      value: 0.52869
+    - type: mrr_at_3
+      value: 0.48445
+    - type: mrr_at_5
+      value: 0.50834
+    - type: ndcg_at_1
+      value: 0.37254
+    - type: ndcg_at_10
+      value: 0.58044
+    - type: ndcg_at_100
+      value: 0.61613
+    - type: ndcg_at_1000
+      value: 0.62046
+    - type: ndcg_at_3
+      value: 0.49219
+    - type: ndcg_at_5
+      value: 0.54037
+    - type: precision_at_1
+      value: 0.37254
+    - type: precision_at_10
+      value: 0.09655
+    - type: precision_at_100
+      value: 0.01167
+    - type: precision_at_1000
+      value: 0.00121
+    - type: precision_at_3
+      value: 0.22538
+    - type: precision_at_5
+      value: 0.16344
+    - type: recall_at_1
+      value: 0.33053
+    - type: recall_at_10
+      value: 0.8076
+    - type: recall_at_100
+      value: 0.95862
+    - type: recall_at_1000
+      value: 0.99044
+    - type: recall_at_3
+      value: 0.58157
+    - type: recall_at_5
+      value: 0.69235
+  - dataset:
+      type: mteb/quora
+      name: MTEB QuoraRetrieval
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.70056
+    - type: map_at_10
+      value: 0.84009
+    - type: map_at_100
+      value: 0.84661
+    - type: map_at_1000
+      value: 0.84678
+    - type: map_at_3
+      value: 0.81036
+    - type: map_at_5
+      value: 0.82923
+    - type: mrr_at_1
+      value: 0.8062
+    - type: mrr_at_10
+      value: 0.86971
+    - type: mrr_at_100
+      value: 0.87079
+    - type: mrr_at_1000
+      value: 0.8708
+    - type: mrr_at_3
+      value: 0.85943
+    - type: mrr_at_5
+      value: 0.86664
+    - type: ndcg_at_1
+      value: 0.8064
+    - type: ndcg_at_10
+      value: 0.87821
+    - type: ndcg_at_100
+      value: 0.89091
+    - type: ndcg_at_1000
+      value: 0.89202
+    - type: ndcg_at_3
+      value: 0.849
+    - type: ndcg_at_5
+      value: 0.86544
+    - type: precision_at_1
+      value: 0.8064
+    - type: precision_at_10
+      value: 0.13347
+    - type: precision_at_100
+      value: 0.01527
+    - type: precision_at_1000
+      value: 0.00157
+    - type: precision_at_3
+      value: 0.37153
+    - type: precision_at_5
+      value: 0.2448
+    - type: recall_at_1
+      value: 0.70056
+    - type: recall_at_10
+      value: 0.95148
+    - type: recall_at_100
+      value: 0.99474
+    - type: recall_at_1000
+      value: 0.99977
+    - type: recall_at_3
+      value: 0.86773
+    - type: recall_at_5
+      value: 0.91396
+  - dataset:
+      type: mteb/scidocs
+      name: MTEB SCIDOCS
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.05737
+    - type: map_at_10
+      value: 0.14896
+    - type: map_at_100
+      value: 0.17646
+    - type: map_at_1000
+      value: 0.1803
+    - type: map_at_3
+      value: 0.10474
+    - type: map_at_5
+      value: 0.12656
+    - type: mrr_at_1
+      value: 0.281
+    - type: mrr_at_10
+      value: 0.39579
+    - type: mrr_at_100
+      value: 0.40687
+    - type: mrr_at_1000
+      value: 0.40722
+    - type: mrr_at_3
+      value: 0.35917
+    - type: mrr_at_5
+      value: 0.38097
+    - type: ndcg_at_1
+      value: 0.281
+    - type: ndcg_at_10
+      value: 0.24146
+    - type: ndcg_at_100
+      value: 0.339
+    - type: ndcg_at_1000
+      value: 0.39728
+    - type: ndcg_at_3
+      value: 0.22721
+    - type: ndcg_at_5
+      value: 0.20015
+    - type: precision_at_1
+      value: 0.281
+    - type: precision_at_10
+      value: 0.1254
+    - type: precision_at_100
+      value: 0.02651
+    - type: precision_at_1000
+      value: 0.00404
+    - type: precision_at_3
+      value: 0.212
+    - type: precision_at_5
+      value: 0.176
+    - type: recall_at_1
+      value: 0.05737
+    - type: recall_at_10
+      value: 0.254
+    - type: recall_at_100
+      value: 0.53772
+    - type: recall_at_1000
+      value: 0.82013
+    - type: recall_at_3
+      value: 0.12897
+    - type: recall_at_5
+      value: 0.17855
+  - dataset:
+      type: mteb/scifact
+      name: MTEB SciFact
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.60011
+    - type: map_at_10
+      value: 0.70101
+    - type: map_at_100
+      value: 0.70687
+    - type: map_at_1000
+      value: 0.70699
+    - type: map_at_3
+      value: 0.67135
+    - type: map_at_5
+      value: 0.6878
+    - type: mrr_at_1
+      value: 0.62667
+    - type: mrr_at_10
+      value: 0.71022
+    - type: mrr_at_100
+      value: 0.71484
+    - type: mrr_at_1000
+      value: 0.71496
+    - type: mrr_at_3
+      value: 0.68944
+    - type: mrr_at_5
+      value: 0.69961
+    - type: ndcg_at_1
+      value: 0.62667
+    - type: ndcg_at_10
+      value: 0.7472
+    - type: ndcg_at_100
+      value: 0.76961
+    - type: ndcg_at_1000
+      value: 0.77294
+    - type: ndcg_at_3
+      value: 0.69776
+    - type: ndcg_at_5
+      value: 0.71964
+    - type: precision_at_1
+      value: 0.62667
+    - type: precision_at_10
+      value: 0.09933
+    - type: precision_at_100
+      value: 0.01103
+    - type: precision_at_1000
+      value: 0.00113
+    - type: precision_at_3
+      value: 0.27
+    - type: precision_at_5
+      value: 0.178
+    - type: recall_at_1
+      value: 0.60011
+    - type: recall_at_10
+      value: 0.878
+    - type: recall_at_100
+      value: 0.97333
+    - type: recall_at_1000
+      value: 1
+    - type: recall_at_3
+      value: 0.74839
+    - type: recall_at_5
+      value: 0.80028
+  - dataset:
+      type: mteb/touche2020
+      name: MTEB Touche2020
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.02152
+    - type: map_at_10
+      value: 0.07747
+    - type: map_at_100
+      value: 0.1364
+    - type: map_at_1000
+      value: 0.15235
+    - type: map_at_3
+      value: 0.04103
+    - type: map_at_5
+      value: 0.05482
+    - type: mrr_at_1
+      value: 0.26531
+    - type: mrr_at_10
+      value: 0.41399
+    - type: mrr_at_100
+      value: 0.43047
+    - type: mrr_at_1000
+      value: 0.43047
+    - type: mrr_at_3
+      value: 0.38776
+    - type: mrr_at_5
+      value: 0.40612
+    - type: ndcg_at_1
+      value: 0.23469
+    - type: ndcg_at_10
+      value: 0.20147
+    - type: ndcg_at_100
+      value: 0.3279
+    - type: ndcg_at_1000
+      value: 0.45324
+    - type: ndcg_at_3
+      value: 0.22555
+    - type: ndcg_at_5
+      value: 0.2097
+    - type: precision_at_1
+      value: 0.26531
+    - type: precision_at_10
+      value: 0.17755
+    - type: precision_at_100
+      value: 0.07082
+    - type: precision_at_1000
+      value: 0.01547
+    - type: precision_at_3
+      value: 0.2449
+    - type: precision_at_5
+      value: 0.21633
+    - type: recall_at_1
+      value: 0.02152
+    - type: recall_at_10
+      value: 0.13331
+    - type: recall_at_100
+      value: 0.4535
+    - type: recall_at_1000
+      value: 0.83447
+    - type: recall_at_3
+      value: 0.05531
+    - type: recall_at_5
+      value: 0.08029
+  - dataset:
+      type: mteb/trec-covid
+      name: MTEB TRECCOVID
+      config: default
+      split: test
+    task:
+      type: Retrieval
+    metrics:
+    - type: map_at_1
+      value: 0.00202
+    - type: map_at_10
+      value: 0.01727
+    - type: map_at_100
+      value: 0.10906
+    - type: map_at_1000
+      value: 0.2894
+    - type: map_at_3
+      value: 0.00553
+    - type: map_at_5
+      value: 0.00924
+    - type: mrr_at_1
+      value: 0.74
+    - type: mrr_at_10
+      value: 0.85667
+    - type: mrr_at_100
+      value: 0.85667
+    - type: mrr_at_1000
+      value: 0.85667
+    - type: mrr_at_3
+      value: 0.85667
+    - type: mrr_at_5
+      value: 0.85667
+    - type: ndcg_at_1
+      value: 0.66
+    - type: ndcg_at_10
+      value: 0.69259
+    - type: ndcg_at_100
+      value: 0.57274
+    - type: ndcg_at_1000
+      value: 0.55462
+    - type: ndcg_at_3
+      value: 0.70654
+    - type: ndcg_at_5
+      value: 0.71611
+    - type: precision_at_1
+      value: 0.74
+    - type: precision_at_10
+      value: 0.748
+    - type: precision_at_100
+      value: 0.5962
+    - type: precision_at_1000
+      value: 0.24842
+    - type: precision_at_3
+      value: 0.77333
+    - type: precision_at_5
+      value: 0.788
+    - type: recall_at_1
+      value: 0.00202
+    - type: recall_at_10
+      value: 0.02001
+    - type: recall_at_100
+      value: 0.14801
+    - type: recall_at_1000
+      value: 0.53939
+    - type: recall_at_3
+      value: 0.00609
+    - type: recall_at_5
+      value: 0.01048
+pipeline_tag: sentence-similarity
+---
+# Granite-Embedding-125m-English
+**Model Summary:**
+Granite-Embedding-125m-English is a 125M parameter dense biencoder embedding model from the Granite Embeddings suite that can be used to generate high quality text embeddings. This model produces embedding vectors of size 768. Compared to most other open-source models, this model was only trained using open-source relevance-pair datasets with permissive, enterprise-friendly license, plus IBM collected and generated datasets. While maintaining competitive scores on academic benchmarks such as BEIR, this model also performs well on many enterprise use cases. This model is developed using retrieval oriented pretraining, contrastive finetuning and knowledge distillation.
+- **Developers:** Granite Embedding Team, IBM
+- **GitHub Repository:** [ibm-granite/granite-embedding-models](https://github.com/ibm-granite/granite-embedding-models)
+- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)
+- **Paper:** Coming Soon
+- **Release Date**: December 18th, 2024
+- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
+**Supported Languages:**
+English.
+**Intended use:**
+The model is designed to produce fixed length vector representations for a given text, which can be used for text similarity, retrieval, and search applications.
+**Usage with Sentence Transformers:**
+The model is compatible with SentenceTransformer library and is very easy to use:
+First, install the sentence transformers library
+```shell
+pip install sentence_transformers
+```
+The model can then be used to encode pairs of text and find the similarity between their representations
+```python
+from sentence_transformers import SentenceTransformer, util
+model_path = "ibm-granite/granite-embedding-125m-english"
+# Load the Sentence Transformer model
+model = SentenceTransformer(model_path)
+input_queries = [
+    ' Who made the song My achy breaky heart? ',
+    'summit define'
+    ]
+input_passages = [
+    "Achy Breaky Heart is a country song written by Don Von Tress. Originally titled Don't Tell My Heart and performed by The Marcy Brothers in 1991. ",
+    "Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments."
+    ]
+# encode queries and passages
+query_embeddings = model.encode(input_queries)
+passage_embeddings = model.encode(input_passages)
+# calculate cosine similarity
+print(util.cos_sim(query_embeddings, passage_embeddings))
+```
+**Usage with Huggingface Transformers:**
+This is a simple example of how to use the Granite-Embedding-125m-English model with the Transformers library and PyTorch.
+First, install the required libraries
+```shell
+pip install transformers torch
+```
+The model can then be used to encode pairs of text
+```python
+import torch
+from transformers import AutoModel, AutoTokenizer
+model_path = "ibm-granite/granite-embedding-125m-english"
+# Load the model and tokenizer
+model = AutoModel.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model.eval()
+input_queries = [
+    ' Who made the song My achy breaky heart? ',
+    'summit define'
+    ]
+# tokenize inputs
+tokenized_queries = tokenizer(input_queries, padding=True, truncation=True, return_tensors='pt')
+# encode queries
+with torch.no_grad():
+    # Queries
+    model_output = model(**tokenized_queries)
+    # Perform pooling. granite-embedding-125m-english uses CLS Pooling
+    query_embeddings = model_output[0][:, 0]
+# normalize the embeddings
+query_embeddings = torch.nn.functional.normalize(query_embeddings, dim=1)
+```
+**Evaluation:**
+The performance of the Granite-Embedding-125M-English model on MTEB Retrieval (i.e., BEIR) and code retrieval (CoIR) benchmarks is reported below.
+| Model                           | Paramters (M)| Embedding Dimension |  MTEB Retrieval (15) |  CoIR (10) |
+|---------------------------------|:------------:|:-------------------:|:-------------------: |:----------:|
+|granite-embedding-125m-english   |125           |768                  |52.3                  |50.3        |
+**Model Architecture:**
+Granite-Embedding-125m-English is based on an encoder-only RoBERTa like transformer architecture, trained internally at IBM Research.
+| Model                     | granite-embedding-30m-english | granite-embedding-125m-english    | granite-embedding-107m-multilingual | granite-embedding-278m-multilingual |
+| :---------                | :-------:| :--------:   | :-----:| :-----:|
+| Embedding size            | 384  | **768**        | 384    | 768    |
+| Number of layers          | 6    | **12**           | 6      | 12     |
+| Number of attention heads | 12   | **12**          | 12     | 12     |
+| Intermediate size         | 1536 | **3072**         | 1536   | 3072   |
+| Activation Function       | GeLU | **GeLU**         | GeLU   | GeLU   |
+| Vocabulary Size           | 50265| **50265**        | 250002 | 250002 |
+| Max. Sequence Length      | 512  | **512**          | 512    | 512    |
+| # Parameters              | 30M  | **125M**         | 107M   | 278M   |
+**Training Data:**
+Overall, the training data consists of four key sources: (1) unsupervised title-body paired data scraped from the web, (2) publicly available paired with permissive, enterprise-friendly license, (3) IBM-internal paired data targetting specific technical domains, and (4) IBM-generated synthetic data. The data is listed below:
+| **Dataset**                                        | **Num. Pairs** |
+|----------------------------------------------------|:---------------:|
+| SPECTER citation triplets                          | 684,100         |
+| Stack Exchange Duplicate questions (titles)        | 304,525         |
+| Stack Exchange Duplicate questions (bodies)        | 250,519         |
+| Stack Exchange Duplicate questions (titles+bodies) | 250,460         |
+| Natural Questions (NQ)                             | 100,231         |
+| SQuAD2.0                                           | 87,599          |
+| PAQ (Question, Answer) pairs                       | 64,371,441       |
+| Stack Exchange (Title, Answer) pairs               | 4,067,139        |
+| Stack Exchange (Title, Body) pairs                 | 23,978,013       |
+| Stack Exchange (Title+Body, Answer) pairs          | 187,195         |
+| S2ORC Citation pairs (Titles)                      | 52,603,982       |
+| S2ORC (Title, Abstract)                            | 41,769,185       |
+| S2ORC (Citations, abstracts)                       | 52,603,982       |
+| WikiAnswers Duplicate question pairs               | 77,427,422       |
+| SearchQA                                           | 582,261         |
+| HotpotQA                                           | 85,000          |
+| Fever                                              | 109,810         |
+| Arxiv                                              | 2,358,545        |
+| Wikipedia                                          | 20,745,403       |
+| PubMed                                             | 20,000,000       |
+| Miracl En Pairs                                    | 9,016           |
+| DBPedia Title-Body Pairs                           | 4,635,922        |
+| Synthetic: Query-Wikipedia Passage                 | 1,879,093        |
+| Synthetic: Fact Verification                       | 9,888           |
+| IBM Internal Triples                               | 40,290          |
+| IBM Internal Title-Body Pairs                      | 1,524,586        |
+Notably, we do not use the popular MS-MARCO retrieval dataset in our training corpus due to its non-commercial license, while other open-source models train on this dataset due to its high quality.
+**Infrastructure:**
+We train Granite Embedding Models using IBM's computing cluster, Cognitive Compute Cluster, which is outfitted with NVIDIA A100 80gb GPUs. This cluster provides a scalable and efficient infrastructure for training our models over multiple GPUs.
+**Ethical Considerations and Limitations:**
+The data used to train the base language model was filtered to remove text containing hate, abuse, and profanity. Granite-Embedding-125m-English is trained only for English texts, and has a context length of 512 tokens (longer texts will be truncated to this size).
+<!-- ## Citation
+```
+@misc{granite-embedding-models,
+  author = {author 1, author2, ...},
+  title = {},
+  journal = {},
+  volume = {},
+  year = {2024},
+  url = {https://arxiv.org/abs/0000.00000},
+}
+``` -->

config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "layer_norm_epsilon": 1e-05,
+  "multi_query_attention": false,
+  "unk_token": "<unk>"
+}

model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5540cde121e37f10ba85db71ff244844ddd7eb50f7d3d1e515faa020a8baa24c
+size 125709805

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "max_length": 512,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "stride": 0,
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}

vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff