panuthept commited on
Commit
a1f2fd3
Β·
1 Parent(s): 4fb8a8d

add more results

Browse files
Files changed (1) hide show
  1. app.py +148 -40
app.py CHANGED
@@ -116,10 +116,10 @@ results = [
116
  'Model Size (Million Parameters)': 278,
117
  'Embedding Dimensions': 768,
118
  'Average (8 datasets)': 60.02,
119
- 'STS Average (1 datasets)': None,
120
- 'Classification (3 datasets)': None,
121
- 'PairClassification (1 datasets)': None,
122
- 'Retrieval (3 datasets)': None,
123
  },
124
  {
125
  'T': '🟒',
@@ -127,10 +127,10 @@ results = [
127
  'Model Size (Million Parameters)': 279,
128
  'Embedding Dimensions': 768,
129
  'Average (8 datasets)': 57.69,
130
- 'STS Average (1 datasets)': None,
131
- 'Classification (3 datasets)': None,
132
- 'PairClassification (1 datasets)': None,
133
- 'Retrieval (3 datasets)': None,
134
  },
135
  {
136
  'T': '🟒',
@@ -138,10 +138,10 @@ results = [
138
  'Model Size (Million Parameters)': 106,
139
  'Embedding Dimensions': 768,
140
  'Average (8 datasets)': 62.22,
141
- 'STS Average (1 datasets)': None,
142
- 'Classification (3 datasets)': None,
143
- 'PairClassification (1 datasets)': None,
144
- 'Retrieval (3 datasets)': None,
145
  },
146
  {
147
  'T': '🟒',
@@ -149,10 +149,10 @@ results = [
149
  'Model Size (Million Parameters)': 278,
150
  'Embedding Dimensions': 768,
151
  'Average (8 datasets)': 63.28,
152
- 'STS Average (1 datasets)': None,
153
- 'Classification (3 datasets)': None,
154
- 'PairClassification (1 datasets)': None,
155
- 'Retrieval (3 datasets)': None,
156
  },
157
  {
158
  'T': '🟒',
@@ -160,10 +160,10 @@ results = [
160
  'Model Size (Million Parameters)': 279,
161
  'Embedding Dimensions': 768,
162
  'Average (8 datasets)': 65.37,
163
- 'STS Average (1 datasets)': None,
164
- 'Classification (3 datasets)': None,
165
- 'PairClassification (1 datasets)': None,
166
- 'Retrieval (3 datasets)': None,
167
  },
168
  {
169
  'T': '🟒',
@@ -171,10 +171,10 @@ results = [
171
  'Model Size (Million Parameters)': 106,
172
  'Embedding Dimensions': 768,
173
  'Average (8 datasets)': 63.55,
174
- 'STS Average (1 datasets)': None,
175
- 'Classification (3 datasets)': None,
176
- 'PairClassification (1 datasets)': None,
177
- 'Retrieval (3 datasets)': None,
178
  },
179
  {
180
  'T': '🟒',
@@ -182,10 +182,10 @@ results = [
182
  'Model Size (Million Parameters)': 278,
183
  'Embedding Dimensions': 768,
184
  'Average (8 datasets)': 66.00,
185
- 'STS Average (1 datasets)': None,
186
- 'Classification (3 datasets)': None,
187
- 'PairClassification (1 datasets)': None,
188
- 'Retrieval (3 datasets)': None,
189
  },
190
  {
191
  'T': '🟒',
@@ -193,10 +193,10 @@ results = [
193
  'Model Size (Million Parameters)': 279,
194
  'Embedding Dimensions': 768,
195
  'Average (8 datasets)': 66.84,
196
- 'STS Average (1 datasets)': None,
197
- 'Classification (3 datasets)': None,
198
- 'PairClassification (1 datasets)': None,
199
- 'Retrieval (3 datasets)': None,
200
  },
201
  {
202
  'T': '🟒',
@@ -204,10 +204,10 @@ results = [
204
  'Model Size (Million Parameters)': 106,
205
  'Embedding Dimensions': 768,
206
  'Average (8 datasets)': 67.17,
207
- 'STS Average (1 datasets)': None,
208
- 'Classification (3 datasets)': None,
209
- 'PairClassification (1 datasets)': None,
210
- 'Retrieval (3 datasets)': None,
211
  },
212
  {
213
  'T': '🟒',
@@ -215,10 +215,108 @@ results = [
215
  'Model Size (Million Parameters)': 278,
216
  'Embedding Dimensions': 768,
217
  'Average (8 datasets)': 66.94,
218
- 'STS Average (1 datasets)': None,
219
- 'Classification (3 datasets)': None,
220
- 'PairClassification (1 datasets)': None,
221
- 'Retrieval (3 datasets)': None,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  },
223
  {
224
  'T': 'πŸ“¦',
@@ -230,6 +328,16 @@ results = [
230
  'PairClassification (1 datasets)': 73.28,
231
  'Retrieval (3 datasets)': 91.43,
232
  },
 
 
 
 
 
 
 
 
 
 
233
  ]
234
 
235
  # Sort by average
 
116
  'Model Size (Million Parameters)': 278,
117
  'Embedding Dimensions': 768,
118
  'Average (8 datasets)': 60.02,
119
+ 'STS Average (1 datasets)': 67.82,
120
+ 'Classification (3 datasets)': 53.50,
121
+ 'PairClassification (1 datasets)': 63.35,
122
+ 'Retrieval (3 datasets)': 66.05,
123
  },
124
  {
125
  'T': '🟒',
 
127
  'Model Size (Million Parameters)': 279,
128
  'Embedding Dimensions': 768,
129
  'Average (8 datasets)': 57.69,
130
+ 'STS Average (1 datasets)': 68.91,
131
+ 'Classification (3 datasets)': 55.93,
132
+ 'PairClassification (1 datasets)': 66.49,
133
+ 'Retrieval (3 datasets)': 54.90,
134
  },
135
  {
136
  'T': '🟒',
 
138
  'Model Size (Million Parameters)': 106,
139
  'Embedding Dimensions': 768,
140
  'Average (8 datasets)': 62.22,
141
+ 'STS Average (1 datasets)': 71.35,
142
+ 'Classification (3 datasets)': 59.19,
143
+ 'PairClassification (1 datasets)': 67.04,
144
+ 'Retrieval (3 datasets)': 63.83,
145
  },
146
  {
147
  'T': '🟒',
 
149
  'Model Size (Million Parameters)': 278,
150
  'Embedding Dimensions': 768,
151
  'Average (8 datasets)': 63.28,
152
+ 'STS Average (1 datasets)': 74.08,
153
+ 'Classification (3 datasets)': 58.77,
154
+ 'PairClassification (1 datasets)': 65.87,
155
+ 'Retrieval (3 datasets)': 66.20,
156
  },
157
  {
158
  'T': '🟒',
 
160
  'Model Size (Million Parameters)': 279,
161
  'Embedding Dimensions': 768,
162
  'Average (8 datasets)': 65.37,
163
+ 'STS Average (1 datasets)': 78.78,
164
+ 'Classification (3 datasets)': 56.87,
165
+ 'PairClassification (1 datasets)': 79.78,
166
+ 'Retrieval (3 datasets)': 65.02,
167
  },
168
  {
169
  'T': '🟒',
 
171
  'Model Size (Million Parameters)': 106,
172
  'Embedding Dimensions': 768,
173
  'Average (8 datasets)': 63.55,
174
+ 'STS Average (1 datasets)': 77.77,
175
+ 'Classification (3 datasets)': 56.33,
176
+ 'PairClassification (1 datasets)': 77.04,
177
+ 'Retrieval (3 datasets)': 62.38,
178
  },
179
  {
180
  'T': '🟒',
 
182
  'Model Size (Million Parameters)': 278,
183
  'Embedding Dimensions': 768,
184
  'Average (8 datasets)': 66.00,
185
+ 'STS Average (1 datasets)': 77.80,
186
+ 'Classification (3 datasets)': 57.27,
187
+ 'PairClassification (1 datasets)': 77.84,
188
+ 'Retrieval (3 datasets)': 67.94,
189
  },
190
  {
191
  'T': '🟒',
 
193
  'Model Size (Million Parameters)': 279,
194
  'Embedding Dimensions': 768,
195
  'Average (8 datasets)': 66.84,
196
+ 'STS Average (1 datasets)': 79.69,
197
+ 'Classification (3 datasets)': 56.90,
198
+ 'PairClassification (1 datasets)': 81.47,
199
+ 'Retrieval (3 datasets)': 68.03,
200
  },
201
  {
202
  'T': '🟒',
 
204
  'Model Size (Million Parameters)': 106,
205
  'Embedding Dimensions': 768,
206
  'Average (8 datasets)': 67.17,
207
+ 'STS Average (1 datasets)': 78.78,
208
+ 'Classification (3 datasets)': 58.16,
209
+ 'PairClassification (1 datasets)': 82.43,
210
+ 'Retrieval (3 datasets)': 67.66,
211
  },
212
  {
213
  'T': '🟒',
 
215
  'Model Size (Million Parameters)': 278,
216
  'Embedding Dimensions': 768,
217
  'Average (8 datasets)': 66.94,
218
+ 'STS Average (1 datasets)': 78.90,
219
+ 'Classification (3 datasets)': 57.63,
220
+ 'PairClassification (1 datasets)': 81.01,
221
+ 'Retrieval (3 datasets)': 68.04,
222
+ },
223
+ {
224
+ 'T': '🟒',
225
+ 'Model Name': '[E5-Mistral-7B-Instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct)',
226
+ 'Model Size (Million Parameters)': 7110,
227
+ 'Embedding Dimensions': 4096,
228
+ 'Average (8 datasets)': 71.94,
229
+ 'STS Average (1 datasets)': 75.52,
230
+ 'Classification (3 datasets)': 60.46,
231
+ 'PairClassification (1 datasets)': 68.04,
232
+ 'Retrieval (3 datasets)': 86.80,
233
+ },
234
+ {
235
+ 'T': '🟒',
236
+ 'Model Name': '[gte-Qwen2-7B-Instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)',
237
+ 'Model Size (Million Parameters)': 7610,
238
+ 'Embedding Dimensions': 3584,
239
+ 'Average (8 datasets)': 49.31,
240
+ 'STS Average (1 datasets)': 51.60,
241
+ 'Classification (3 datasets)': 57.55,
242
+ 'PairClassification (1 datasets)': 61.73,
243
+ 'Retrieval (3 datasets)': 38.31,
244
+ },
245
+ {
246
+ 'T': '🟒',
247
+ 'Model Name': '[GritLM-7B](https://huggingface.co/GritLM/GritLM-7B)',
248
+ 'Model Size (Million Parameters)': 7240,
249
+ 'Embedding Dimensions': 4096,
250
+ 'Average (8 datasets)': 42.38,
251
+ 'STS Average (1 datasets)': 45.50,
252
+ 'Classification (3 datasets)': 56.83,
253
+ 'PairClassification (1 datasets)': 56.40,
254
+ 'Retrieval (3 datasets)': 22.79,
255
+ },
256
+ {
257
+ 'T': '🟒',
258
+ 'Model Name': '[Llama3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B)',
259
+ 'Model Size (Million Parameters)': 8030,
260
+ 'Embedding Dimensions': 4096,
261
+ 'Average (8 datasets)': 51.63,
262
+ 'STS Average (1 datasets)': 49.48,
263
+ 'Classification (3 datasets)': 58.54,
264
+ 'PairClassification (1 datasets)': 57.76,
265
+ 'Retrieval (3 datasets)': 47.93,
266
+ },
267
+ {
268
+ 'T': '🟒',
269
+ 'Model Name': '[Llama3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct)',
270
+ 'Model Size (Million Parameters)': 8030,
271
+ 'Embedding Dimensions': 4096,
272
+ 'Average (8 datasets)': 52.81,
273
+ 'STS Average (1 datasets)': 50.63,
274
+ 'Classification (3 datasets)': 58.85,
275
+ 'PairClassification (1 datasets)': 58.04,
276
+ 'Retrieval (3 datasets)': 50.38,
277
+ },
278
+ {
279
+ 'T': '🟒',
280
+ 'Model Name': '[Llama3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)',
281
+ 'Model Size (Million Parameters)': 8030,
282
+ 'Embedding Dimensions': 4096,
283
+ 'Average (8 datasets)': 50.36,
284
+ 'STS Average (1 datasets)': 49.98,
285
+ 'Classification (3 datasets)': 58.18,
286
+ 'PairClassification (1 datasets)': 58.12,
287
+ 'Retrieval (3 datasets)': 43.64,
288
+ },
289
+ {
290
+ 'T': '🟒',
291
+ 'Model Name': '[Llama3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)',
292
+ 'Model Size (Million Parameters)': 8030,
293
+ 'Embedding Dimensions': 4096,
294
+ 'Average (8 datasets)': 50.06,
295
+ 'STS Average (1 datasets)': 49.76,
296
+ 'Classification (3 datasets)': 57.90,
297
+ 'PairClassification (1 datasets)': 57.47,
298
+ 'Retrieval (3 datasets)': 43.63,
299
+ },
300
+ {
301
+ 'T': '🟒',
302
+ 'Model Name': '[Typhoon-8B-Instruct](https://huggingface.co/scb10x/llama-3-typhoon-v1.5-8b-instruct)',
303
+ 'Model Size (Million Parameters)': 8030,
304
+ 'Embedding Dimensions': 4096,
305
+ 'Average (8 datasets)': 53.51,
306
+ 'STS Average (1 datasets)': 51.46,
307
+ 'Classification (3 datasets)': 58.91,
308
+ 'PairClassification (1 datasets)': 58.05,
309
+ 'Retrieval (3 datasets)': 52.65,
310
+ },
311
+ {
312
+ 'T': 'πŸ“¦',
313
+ 'Model Name': 'Cohere-embed-multilingual-v2.0',
314
+ 'Embedding Dimensions': 768,
315
+ 'Average (8 datasets)': 68.01,
316
+ 'STS Average (1 datasets)': 68.03,
317
+ 'Classification (3 datasets)': 57.31,
318
+ 'PairClassification (1 datasets)': 62.03,
319
+ 'Retrieval (3 datasets)': 85.23,
320
  },
321
  {
322
  'T': 'πŸ“¦',
 
328
  'PairClassification (1 datasets)': 73.28,
329
  'Retrieval (3 datasets)': 91.43,
330
  },
331
+ {
332
+ 'T': 'πŸ“¦',
333
+ 'Model Name': 'Openai-text-embedding-3-large',
334
+ 'Embedding Dimensions': 3072,
335
+ 'Average (8 datasets)': 69.26,
336
+ 'STS Average (1 datasets)': 70.46,
337
+ 'Classification (3 datasets)': 58.79,
338
+ 'PairClassification (1 datasets)': 67.33,
339
+ 'Retrieval (3 datasets)': 83.87,
340
+ },
341
  ]
342
 
343
  # Sort by average