fangxq commited on
Commit
527bc01
·
verified ·
1 Parent(s): 7784df4

上传xyz模型

Browse files
README.md CHANGED
@@ -2,130 +2,12 @@
2
  model-index:
3
  - name: XYZ-embedding-zh-v2
4
  results:
5
- - dataset:
6
- config: default
7
- name: MTEB AFQMC
8
- revision: None
9
- split: validation
10
- type: C-MTEB/AFQMC
11
- metrics:
12
- - type: cos_sim_pearson
13
- value: 55.51799059309076
14
- - type: cos_sim_spearman
15
- value: 58.407433584137806
16
- - type: manhattan_pearson
17
- value: 57.17473672145622
18
- - type: manhattan_spearman
19
- value: 58.389018054159955
20
- - type: euclidean_pearson
21
- value: 57.19483956761451
22
- - type: euclidean_spearman
23
- value: 58.407433584137806
24
- - type: main_score
25
- value: 58.407433584137806
26
- task:
27
- type: STS
28
- - dataset:
29
- config: default
30
- name: MTEB ATEC
31
- revision: None
32
- split: test
33
- type: C-MTEB/ATEC
34
- metrics:
35
- - type: cos_sim_pearson
36
- value: 57.31078155367183
37
- - type: cos_sim_spearman
38
- value: 57.59782762324478
39
- - type: manhattan_pearson
40
- value: 62.525487007985035
41
- - type: manhattan_spearman
42
- value: 57.591139966303615
43
- - type: euclidean_pearson
44
- value: 62.53702437760052
45
- - type: euclidean_spearman
46
- value: 57.597828749091384
47
- - type: main_score
48
- value: 57.59782762324478
49
- task:
50
- type: STS
51
- - dataset:
52
- config: zh
53
- name: MTEB AmazonReviewsClassification (zh)
54
- revision: 1399c76144fd37290681b995c656ef9b2e06e26d
55
- split: test
56
- type: mteb/amazon_reviews_multi
57
- metrics:
58
- - type: accuracy
59
- value: 49.374
60
- - type: accuracy_stderr
61
- value: 1.436636349254743
62
- - type: f1
63
- value: 47.115240601017774
64
- - type: f1_stderr
65
- value: 1.5642799356594534
66
- - type: main_score
67
- value: 49.374
68
- task:
69
- type: Classification
70
- - dataset:
71
- config: default
72
- name: MTEB BQ
73
- revision: None
74
- split: test
75
- type: C-MTEB/BQ
76
- metrics:
77
- - type: cos_sim_pearson
78
- value: 71.49514309404829
79
- - type: cos_sim_spearman
80
- value: 72.66161713021279
81
- - type: manhattan_pearson
82
- value: 71.03443640254005
83
- - type: manhattan_spearman
84
- value: 72.63439621980275
85
- - type: euclidean_pearson
86
- value: 71.06830370642658
87
- - type: euclidean_spearman
88
- value: 72.66161713043078
89
- - type: main_score
90
- value: 72.66161713021279
91
- task:
92
- type: STS
93
- - dataset:
94
- config: default
95
- name: MTEB CLSClusteringP2P
96
- revision: None
97
- split: test
98
- type: C-MTEB/CLSClusteringP2P
99
- metrics:
100
- - type: v_measure
101
- value: 57.237692641281
102
- - type: v_measure_std
103
- value: 1.2777768354339174
104
- - type: main_score
105
- value: 57.237692641281
106
- task:
107
- type: Clustering
108
- - dataset:
109
- config: default
110
- name: MTEB CLSClusteringS2S
111
- revision: None
112
- split: test
113
- type: C-MTEB/CLSClusteringS2S
114
- metrics:
115
- - type: v_measure
116
- value: 48.41686666939331
117
- - type: v_measure_std
118
- value: 1.7663118461900793
119
- - type: main_score
120
- value: 48.41686666939331
121
- task:
122
- type: Clustering
123
  - dataset:
124
  config: default
125
  name: MTEB CMedQAv1
126
  revision: None
127
  split: test
128
- type: C-MTEB/CMedQAv1-reranking
129
  metrics:
130
  - type: map
131
  value: 89.9766367822762
@@ -140,7 +22,7 @@ model-index:
140
  name: MTEB CMedQAv2
141
  revision: None
142
  split: test
143
- type: C-MTEB/CMedQAv2-reranking
144
  metrics:
145
  - type: map
146
  value: 89.04628340075982
@@ -221,77 +103,6 @@ model-index:
221
  value: 48.294
222
  task:
223
  type: Retrieval
224
- - dataset:
225
- config: default
226
- name: MTEB Cmnli
227
- revision: None
228
- split: validation
229
- type: C-MTEB/CMNLI
230
- metrics:
231
- - type: cos_sim_accuracy
232
- value: 82.8983764281419
233
- - type: cos_sim_accuracy_threshold
234
- value: 56.05731010437012
235
- - type: cos_sim_ap
236
- value: 90.23156362696572
237
- - type: cos_sim_f1
238
- value: 83.83207278307574
239
- - type: cos_sim_f1_threshold
240
- value: 52.05453634262085
241
- - type: cos_sim_precision
242
- value: 78.91044160132068
243
- - type: cos_sim_recall
244
- value: 89.40846387654898
245
- - type: dot_accuracy
246
- value: 82.8983764281419
247
- - type: dot_accuracy_threshold
248
- value: 56.05730414390564
249
- - type: dot_ap
250
- value: 90.20952356258861
251
- - type: dot_f1
252
- value: 83.83207278307574
253
- - type: dot_f1_threshold
254
- value: 52.054524421691895
255
- - type: dot_precision
256
- value: 78.91044160132068
257
- - type: dot_recall
258
- value: 89.40846387654898
259
- - type: euclidean_accuracy
260
- value: 82.8983764281419
261
- - type: euclidean_accuracy_threshold
262
- value: 93.74719858169556
263
- - type: euclidean_ap
264
- value: 90.23156283510565
265
- - type: euclidean_f1
266
- value: 83.83207278307574
267
- - type: euclidean_f1_threshold
268
- value: 97.92392253875732
269
- - type: euclidean_precision
270
- value: 78.91044160132068
271
- - type: euclidean_recall
272
- value: 89.40846387654898
273
- - type: manhattan_accuracy
274
- value: 82.85027059530968
275
- - type: manhattan_accuracy_threshold
276
- value: 3164.584159851074
277
- - type: manhattan_ap
278
- value: 90.23178004516869
279
- - type: manhattan_f1
280
- value: 83.82157123834887
281
- - type: manhattan_f1_threshold
282
- value: 3273.5992431640625
283
- - type: manhattan_precision
284
- value: 79.76768743400211
285
- - type: manhattan_recall
286
- value: 88.30956277764788
287
- - type: max_accuracy
288
- value: 82.8983764281419
289
- - type: max_ap
290
- value: 90.23178004516869
291
- - type: max_f1
292
- value: 83.83207278307574
293
- task:
294
- type: PairClassification
295
  - dataset:
296
  config: default
297
  name: MTEB CovidRetrieval
@@ -505,71 +316,6 @@ model-index:
505
  value: 70.294
506
  task:
507
  type: Retrieval
508
- - dataset:
509
- config: default
510
- name: MTEB IFlyTek
511
- revision: None
512
- split: validation
513
- type: C-MTEB/IFlyTek-classification
514
- metrics:
515
- - type: accuracy
516
- value: 52.743362831858406
517
- - type: accuracy_stderr
518
- value: 0.23768288128480788
519
- - type: f1
520
- value: 41.1548855278405
521
- - type: f1_stderr
522
- value: 0.4088759842813554
523
- - type: main_score
524
- value: 52.743362831858406
525
- task:
526
- type: Classification
527
- - dataset:
528
- config: default
529
- name: MTEB JDReview
530
- revision: None
531
- split: test
532
- type: C-MTEB/JDReview-classification
533
- metrics:
534
- - type: accuracy
535
- value: 89.08067542213884
536
- - type: accuracy_stderr
537
- value: 0.9559278951487445
538
- - type: ap
539
- value: 60.875320104586564
540
- - type: ap_stderr
541
- value: 2.137806661565934
542
- - type: f1
543
- value: 84.39314192399665
544
- - type: f1_stderr
545
- value: 1.132407155321657
546
- - type: main_score
547
- value: 89.08067542213884
548
- task:
549
- type: Classification
550
- - dataset:
551
- config: default
552
- name: MTEB LCQMC
553
- revision: None
554
- split: test
555
- type: C-MTEB/LCQMC
556
- metrics:
557
- - type: cos_sim_pearson
558
- value: 73.3633875566899
559
- - type: cos_sim_spearman
560
- value: 79.27679599527615
561
- - type: manhattan_pearson
562
- value: 79.12061667088273
563
- - type: manhattan_spearman
564
- value: 79.26989882781706
565
- - type: euclidean_pearson
566
- value: 79.12871362068391
567
- - type: euclidean_spearman
568
- value: 79.27679377557219
569
- - type: main_score
570
- value: 79.27679599527615
571
- task:
572
- type: STS
573
  - dataset:
574
  config: default
575
  name: MTEB MMarcoReranking
@@ -656,44 +402,6 @@ model-index:
656
  value: 82.505
657
  task:
658
  type: Retrieval
659
- - dataset:
660
- config: zh-CN
661
- name: MTEB MassiveIntentClassification (zh-CN)
662
- revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
663
- split: test
664
- type: mteb/amazon_massive_intent
665
- metrics:
666
- - type: accuracy
667
- value: 77.9388029589778
668
- - type: accuracy_stderr
669
- value: 1.416192788478398
670
- - type: f1
671
- value: 74.77765701086211
672
- - type: f1_stderr
673
- value: 1.254859698486085
674
- - type: main_score
675
- value: 77.9388029589778
676
- task:
677
- type: Classification
678
- - dataset:
679
- config: zh-CN
680
- name: MTEB MassiveScenarioClassification (zh-CN)
681
- revision: 7d571f92784cd94a019292a1f45445077d0ef634
682
- split: test
683
- type: mteb/amazon_massive_scenario
684
- metrics:
685
- - type: accuracy
686
- value: 83.8231338264963
687
- - type: accuracy_stderr
688
- value: 0.6973305760755886
689
- - type: f1
690
- value: 83.13105322628088
691
- - type: f1_stderr
692
- value: 0.600506118139685
693
- - type: main_score
694
- value: 83.8231338264963
695
- task:
696
- type: Classification
697
  - dataset:
698
  config: default
699
  name: MTEB MedicalRetrieval
@@ -765,211 +473,6 @@ model-index:
765
  value: 68.041
766
  task:
767
  type: Retrieval
768
- - dataset:
769
- config: default
770
- name: MTEB MultilingualSentiment
771
- revision: None
772
- split: validation
773
- type: C-MTEB/MultilingualSentiment-classification
774
- metrics:
775
- - type: accuracy
776
- value: 78.60333333333334
777
- - type: accuracy_stderr
778
- value: 0.3331499495555859
779
- - type: f1
780
- value: 78.4814340961856
781
- - type: f1_stderr
782
- value: 0.45721454672060496
783
- - type: main_score
784
- value: 78.60333333333334
785
- task:
786
- type: Classification
787
- - dataset:
788
- config: default
789
- name: MTEB Ocnli
790
- revision: None
791
- split: validation
792
- type: C-MTEB/OCNLI
793
- metrics:
794
- - type: cos_sim_accuracy
795
- value: 80.5630752571738
796
- - type: cos_sim_accuracy_threshold
797
- value: 53.72971296310425
798
- - type: cos_sim_ap
799
- value: 85.61885910463258
800
- - type: cos_sim_f1
801
- value: 82.40469208211144
802
- - type: cos_sim_f1_threshold
803
- value: 50.07883310317993
804
- - type: cos_sim_precision
805
- value: 76.70609645131938
806
- - type: cos_sim_recall
807
- value: 89.01795142555439
808
- - type: dot_accuracy
809
- value: 80.5630752571738
810
- - type: dot_accuracy_threshold
811
- value: 53.7297248840332
812
- - type: dot_ap
813
- value: 85.61885910463258
814
- - type: dot_f1
815
- value: 82.40469208211144
816
- - type: dot_f1_threshold
817
- value: 50.07884502410889
818
- - type: dot_precision
819
- value: 76.70609645131938
820
- - type: dot_recall
821
- value: 89.01795142555439
822
- - type: euclidean_accuracy
823
- value: 80.5630752571738
824
- - type: euclidean_accuracy_threshold
825
- value: 96.19801044464111
826
- - type: euclidean_ap
827
- value: 85.61885910463258
828
- - type: euclidean_f1
829
- value: 82.40469208211144
830
- - type: euclidean_f1_threshold
831
- value: 99.92111921310425
832
- - type: euclidean_precision
833
- value: 76.70609645131938
834
- - type: euclidean_recall
835
- value: 89.01795142555439
836
- - type: manhattan_accuracy
837
- value: 80.67135896047645
838
- - type: manhattan_accuracy_threshold
839
- value: 3323.1739044189453
840
- - type: manhattan_ap
841
- value: 85.55348220886658
842
- - type: manhattan_f1
843
- value: 82.26744186046511
844
- - type: manhattan_f1_threshold
845
- value: 3389.273452758789
846
- - type: manhattan_precision
847
- value: 76.00716204118174
848
- - type: manhattan_recall
849
- value: 89.65153115100317
850
- - type: max_accuracy
851
- value: 80.67135896047645
852
- - type: max_ap
853
- value: 85.61885910463258
854
- - type: max_f1
855
- value: 82.40469208211144
856
- task:
857
- type: PairClassification
858
- - dataset:
859
- config: default
860
- name: MTEB OnlineShopping
861
- revision: None
862
- split: test
863
- type: C-MTEB/OnlineShopping-classification
864
- metrics:
865
- - type: accuracy
866
- value: 94.94
867
- - type: accuracy_stderr
868
- value: 0.49030602688525093
869
- - type: ap
870
- value: 93.0785841977823
871
- - type: ap_stderr
872
- value: 0.5447383082750599
873
- - type: f1
874
- value: 94.92765777406245
875
- - type: f1_stderr
876
- value: 0.4891510966106189
877
- - type: main_score
878
- value: 94.94
879
- task:
880
- type: Classification
881
- - dataset:
882
- config: default
883
- name: MTEB PAWSX
884
- revision: None
885
- split: test
886
- type: C-MTEB/PAWSX
887
- metrics:
888
- - type: cos_sim_pearson
889
- value: 36.564307811370654
890
- - type: cos_sim_spearman
891
- value: 42.44208208349051
892
- - type: manhattan_pearson
893
- value: 42.099358471578306
894
- - type: manhattan_spearman
895
- value: 42.50283181486304
896
- - type: euclidean_pearson
897
- value: 42.07954956675317
898
- - type: euclidean_spearman
899
- value: 42.453014115018554
900
- - type: main_score
901
- value: 42.44208208349051
902
- task:
903
- type: STS
904
- - dataset:
905
- config: default
906
- name: MTEB QBQTC
907
- revision: None
908
- split: test
909
- type: C-MTEB/QBQTC
910
- metrics:
911
- - type: cos_sim_pearson
912
- value: 39.19092968089104
913
- - type: cos_sim_spearman
914
- value: 41.5174661348832
915
- - type: manhattan_pearson
916
- value: 37.91587646684523
917
- - type: manhattan_spearman
918
- value: 41.536668677987194
919
- - type: euclidean_pearson
920
- value: 37.91079973901135
921
- - type: euclidean_spearman
922
- value: 41.51833855501128
923
- - type: main_score
924
- value: 41.5174661348832
925
- task:
926
- type: STS
927
- - dataset:
928
- config: zh
929
- name: MTEB STS22 (zh)
930
- revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
931
- split: test
932
- type: mteb/sts22-crosslingual-sts
933
- metrics:
934
- - type: cos_sim_pearson
935
- value: 62.029449510721605
936
- - type: cos_sim_spearman
937
- value: 66.31935471251364
938
- - type: manhattan_pearson
939
- value: 63.63179975157496
940
- - type: manhattan_spearman
941
- value: 66.3007950466125
942
- - type: euclidean_pearson
943
- value: 63.59752734041086
944
- - type: euclidean_spearman
945
- value: 66.31935471251364
946
- - type: main_score
947
- value: 66.31935471251364
948
- task:
949
- type: STS
950
- - dataset:
951
- config: default
952
- name: MTEB STSB
953
- revision: None
954
- split: test
955
- type: C-MTEB/STSB
956
- metrics:
957
- - type: cos_sim_pearson
958
- value: 81.81459862563769
959
- - type: cos_sim_spearman
960
- value: 82.15323953301453
961
- - type: manhattan_pearson
962
- value: 81.61904305126016
963
- - type: manhattan_spearman
964
- value: 82.1361073852468
965
- - type: euclidean_pearson
966
- value: 81.60799063723992
967
- - type: euclidean_spearman
968
- value: 82.15405405083231
969
- - type: main_score
970
- value: 82.15323953301453
971
- task:
972
- type: STS
973
  - dataset:
974
  config: default
975
  name: MTEB T2Reranking
@@ -978,11 +481,11 @@ model-index:
978
  type: C-MTEB/T2Reranking
979
  metrics:
980
  - type: map
981
- value: 69.13560834260383
982
  - type: mrr
983
- value: 79.95749642669074
984
  - type: main_score
985
- value: 69.13560834260383
986
  task:
987
  type: Reranking
988
  - dataset:
@@ -1056,55 +559,6 @@ model-index:
1056
  value: 85.875
1057
  task:
1058
  type: Retrieval
1059
- - dataset:
1060
- config: default
1061
- name: MTEB TNews
1062
- revision: None
1063
- split: validation
1064
- type: C-MTEB/TNews-classification
1065
- metrics:
1066
- - type: accuracy
1067
- value: 54.309000000000005
1068
- - type: accuracy_stderr
1069
- value: 0.4694347665011627
1070
- - type: f1
1071
- value: 52.598803987889255
1072
- - type: f1_stderr
1073
- value: 0.5191189533227434
1074
- - type: main_score
1075
- value: 54.309000000000005
1076
- task:
1077
- type: Classification
1078
- - dataset:
1079
- config: default
1080
- name: MTEB ThuNewsClusteringP2P
1081
- revision: None
1082
- split: test
1083
- type: C-MTEB/ThuNewsClusteringP2P
1084
- metrics:
1085
- - type: v_measure
1086
- value: 76.64191229011249
1087
- - type: v_measure_std
1088
- value: 2.807206940615986
1089
- - type: main_score
1090
- value: 76.64191229011249
1091
- task:
1092
- type: Clustering
1093
- - dataset:
1094
- config: default
1095
- name: MTEB ThuNewsClusteringS2S
1096
- revision: None
1097
- split: test
1098
- type: C-MTEB/ThuNewsClusteringS2S
1099
- metrics:
1100
- - type: v_measure
1101
- value: 71.02529199411326
1102
- - type: v_measure_std
1103
- value: 2.0547855888165945
1104
- - type: main_score
1105
- value: 71.02529199411326
1106
- task:
1107
- type: Clustering
1108
  - dataset:
1109
  config: default
1110
  name: MTEB VideoRetrieval
@@ -1176,32 +630,13 @@ model-index:
1176
  value: 80.93599999999999
1177
  task:
1178
  type: Retrieval
1179
- - dataset:
1180
- config: default
1181
- name: MTEB Waimai
1182
- revision: None
1183
- split: test
1184
- type: C-MTEB/waimai-classification
1185
- metrics:
1186
- - type: accuracy
1187
- value: 89.47
1188
- - type: accuracy_stderr
1189
- value: 0.26476404589747476
1190
- - type: ap
1191
- value: 75.49555223825388
1192
- - type: ap_stderr
1193
- value: 0.596040511982105
1194
- - type: f1
1195
- value: 88.01797939221065
1196
- - type: f1_stderr
1197
- value: 0.27168216797281214
1198
- - type: main_score
1199
- value: 89.47
1200
- task:
1201
- type: Classification
1202
  tags:
1203
  - mteb
 
 
 
1204
  ---
 
1205
  <h2 align="left">XYZ-embedding-zh-v2</h2>
1206
 
1207
  ## Usage (Sentence Transformers)
@@ -1231,4 +666,4 @@ print(embeddings.shape)
1231
  similarities = model.similarity(embeddings, embeddings)
1232
  print(similarities.shape)
1233
  # [3, 3]
1234
- ```
 
2
  model-index:
3
  - name: XYZ-embedding-zh-v2
4
  results:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  - dataset:
6
  config: default
7
  name: MTEB CMedQAv1
8
  revision: None
9
  split: test
10
+ type: C-MTEB/CMedQAv1
11
  metrics:
12
  - type: map
13
  value: 89.9766367822762
 
22
  name: MTEB CMedQAv2
23
  revision: None
24
  split: test
25
+ type: C-MTEB/CMedQAv2
26
  metrics:
27
  - type: map
28
  value: 89.04628340075982
 
103
  value: 48.294
104
  task:
105
  type: Retrieval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  - dataset:
107
  config: default
108
  name: MTEB CovidRetrieval
 
316
  value: 70.294
317
  task:
318
  type: Retrieval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  - dataset:
320
  config: default
321
  name: MTEB MMarcoReranking
 
402
  value: 82.505
403
  task:
404
  type: Retrieval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  - dataset:
406
  config: default
407
  name: MTEB MedicalRetrieval
 
473
  value: 68.041
474
  task:
475
  type: Retrieval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  - dataset:
477
  config: default
478
  name: MTEB T2Reranking
 
481
  type: C-MTEB/T2Reranking
482
  metrics:
483
  - type: map
484
+ value: 69.13287570713865
485
  - type: mrr
486
+ value: 79.95326487625066
487
  - type: main_score
488
+ value: 69.13287570713865
489
  task:
490
  type: Reranking
491
  - dataset:
 
559
  value: 85.875
560
  task:
561
  type: Retrieval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
  - dataset:
563
  config: default
564
  name: MTEB VideoRetrieval
 
630
  value: 80.93599999999999
631
  task:
632
  type: Retrieval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
  tags:
634
  - mteb
635
+ language:
636
+ - zh
637
+
638
  ---
639
+
640
  <h2 align="left">XYZ-embedding-zh-v2</h2>
641
 
642
  ## Usage (Sentence Transformers)
 
666
  similarities = model.similarity(embeddings, embeddings)
667
  print(similarities.shape)
668
  # [3, 3]
669
+ ```
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 24,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "position_embedding_type": "absolute",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.41.0",
29
+ "type_vocab_size": 2,
30
+ "use_cache": true,
31
+ "vocab_size": 21128
32
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.41.0",
5
+ "pytorch": "2.2.2+cu118"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.models.Dense"
19
+ }
20
+ ]
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8090436280027987a24ffb67f66976b4069d4812c580f271ef7fe4720a037bcf
3
+ size 1302216550
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_length": 512,
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "[PAD]",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "[SEP]",
57
+ "stride": 0,
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
+ "unk_token": "[UNK]"
64
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff