Spaces:
Running
Running
,model,scenario,score,aggragated_from,source | |
0,claude_3_5_sonnet_20240620,livebench_lb,61.16,[],livebench_240701 | |
1,gpt_4o_2024_05_13,livebench_lb,54.96,[],livebench_240701 | |
2,gpt_4_turbo_2024_04_09,livebench_lb,53.0,[],livebench_240701 | |
3,gpt_4_1106_preview,livebench_lb,52.17,[],livebench_240701 | |
4,claude_3_opus_20240229,livebench_lb,50.75,[],livebench_240701 | |
5,gpt_4_0125_preview,livebench_lb,49.39,[],livebench_240701 | |
6,deepseek_coder_v2,livebench_lb,46.79,[],livebench_240701 | |
7,gemini_1.5_pro_api_0514,livebench_lb,44.35,[],livebench_240701 | |
8,gemma_2_27b_it,livebench_lb,41.22,[],livebench_240701 | |
9,gemini_1.5_flash_api_0514,livebench_lb,40.89,[],livebench_240701 | |
10,qwen2_72b_instruct,livebench_lb,40.16,[],livebench_240701 | |
11,acm_rewrite_qwen2_72b_chat,livebench_lb,39.6,[],livebench_240701 | |
12,mistral_large_2402,livebench_lb,38.92,[],livebench_240701 | |
13,deepseek_chat_v2,livebench_lb,38.39,[],livebench_240701 | |
14,claude_3_sonnet_20240229,livebench_lb,38.08,[],livebench_240701 | |
15,meta_llama_3_70b_instruct,livebench_lb,37.38,[],livebench_240701 | |
16,claude_3_haiku_20240307,livebench_lb,35.32,[],livebench_240701 | |
17,mixtral_8x22b_instruct_v0.1,livebench_lb,34.84,[],livebench_240701 | |
18,gpt_3.5_turbo_0125,livebench_lb,34.43,[],livebench_240701 | |
19,gpt_3.5_turbo_1106,livebench_lb,34.14,[],livebench_240701 | |
20,command_r_plus,livebench_lb,32.86,[],livebench_240701 | |
21,mistral_small_2402,livebench_lb,32.8,[],livebench_240701 | |
22,gemma_2_9b_it,livebench_lb,31.57,[],livebench_240701 | |
23,phi_3_medium_4k_instruct,livebench_lb,30.33,[],livebench_240701 | |
24,phi_3_medium_128k_instruct,livebench_lb,29.64,[],livebench_240701 | |
25,deepseek_coder_v2_lite_instruct,livebench_lb,29.15,[],livebench_240701 | |
26,qwen1.5_110b_chat,livebench_lb,28.96,[],livebench_240701 | |
27,qwen1.5_72b_chat,livebench_lb,28.89,[],livebench_240701 | |
28,command_r,livebench_lb,27.23,[],livebench_240701 | |
29,phi_3_small_128k_instruct,livebench_lb,27.19,[],livebench_240701 | |
30,meta_llama_3_8b_instruct,livebench_lb,26.67,[],livebench_240701 | |
31,qwen2_7b_instruct,livebench_lb,26.45,[],livebench_240701 | |
32,phi_3_small_8k_instruct,livebench_lb,26.24,[],livebench_240701 | |
33,openhermes_2.5_mistral_7b,livebench_lb,23.3,[],livebench_240701 | |
34,mixtral_8x7b_instruct_v0.1,livebench_lb,22.5,[],livebench_240701 | |
35,mistral_7b_instruct_v0.2,livebench_lb,19.33,[],livebench_240701 | |
36,phi_3_mini_4k_instruct,livebench_lb,19.27,[],livebench_240701 | |
37,zephyr_7b_alpha,livebench_lb,19.22,[],livebench_240701 | |
38,phi_3_mini_128k_instruct,livebench_lb,18.04,[],livebench_240701 | |
39,zephyr_7b_beta,livebench_lb,17.32,[],livebench_240701 | |
40,deepseek_v2_lite_chat,livebench_lb,17.14,[],livebench_240701 | |
41,qwen1.5_7b_chat,livebench_lb,16.5,[],livebench_240701 | |
42,starling_lm_7b_beta,livebench_lb,16.44,[],livebench_240701 | |
43,vicuna_7b_v1.5_16k,livebench_lb,13.71,[],livebench_240701 | |
44,vicuna_7b_v1.5,livebench_lb,11.73,[],livebench_240701 | |
45,qwen1.5_4b_chat,livebench_lb,11.13,[],livebench_240701 | |
46,llama_2_7b_chat,livebench_lb,10.25,[],livebench_240701 | |
47,qwen2_1.5b_instruct,livebench_lb,9.96,[],livebench_240701 | |
48,yi_6b_chat,livebench_lb,8.79,[],livebench_240701 | |
49,qwen2_0.5b_instruct,livebench_lb,6.78,[],livebench_240701 | |
50,qwen1.5_1.8b_chat,livebench_lb,6.09,[],livebench_240701 | |
51,qwen1.5_0.5b_chat,livebench_lb,5.26,[],livebench_240701 | |
52,claude_3_5_sonnet_20240620,reasoning_lb,64.0,[],livebench_240701 | |
53,gpt_4o_2024_05_13,reasoning_lb,55.0,[],livebench_240701 | |
54,gpt_4_turbo_2024_04_09,reasoning_lb,54.0,[],livebench_240701 | |
55,gpt_4_1106_preview,reasoning_lb,52.0,[],livebench_240701 | |
56,claude_3_opus_20240229,reasoning_lb,41.0,[],livebench_240701 | |
57,gpt_4_0125_preview,reasoning_lb,48.0,[],livebench_240701 | |
58,deepseek_coder_v2,reasoning_lb,49.0,[],livebench_240701 | |
59,gemini_1.5_pro_api_0514,reasoning_lb,33.0,[],livebench_240701 | |
60,gemma_2_27b_it,reasoning_lb,31.0,[],livebench_240701 | |
61,gemini_1.5_flash_api_0514,reasoning_lb,30.0,[],livebench_240701 | |
62,qwen2_72b_instruct,reasoning_lb,42.0,[],livebench_240701 | |
63,acm_rewrite_qwen2_72b_chat,reasoning_lb,37.0,[],livebench_240701 | |
64,mistral_large_2402,reasoning_lb,35.0,[],livebench_240701 | |
65,deepseek_chat_v2,reasoning_lb,29.0,[],livebench_240701 | |
66,claude_3_sonnet_20240229,reasoning_lb,26.0,[],livebench_240701 | |
67,meta_llama_3_70b_instruct,reasoning_lb,31.0,[],livebench_240701 | |
68,claude_3_haiku_20240307,reasoning_lb,26.0,[],livebench_240701 | |
69,mixtral_8x22b_instruct_v0.1,reasoning_lb,29.0,[],livebench_240701 | |
70,gpt_3.5_turbo_0125,reasoning_lb,26.0,[],livebench_240701 | |
71,gpt_3.5_turbo_1106,reasoning_lb,28.0,[],livebench_240701 | |
72,command_r_plus,reasoning_lb,32.0,[],livebench_240701 | |
73,mistral_small_2402,reasoning_lb,28.0,[],livebench_240701 | |
74,gemma_2_9b_it,reasoning_lb,19.0,[],livebench_240701 | |
75,phi_3_medium_4k_instruct,reasoning_lb,35.0,[],livebench_240701 | |
76,phi_3_medium_128k_instruct,reasoning_lb,31.0,[],livebench_240701 | |
77,deepseek_coder_v2_lite_instruct,reasoning_lb,22.0,[],livebench_240701 | |
78,qwen1.5_110b_chat,reasoning_lb,26.0,[],livebench_240701 | |
79,qwen1.5_72b_chat,reasoning_lb,21.0,[],livebench_240701 | |
80,command_r,reasoning_lb,28.0,[],livebench_240701 | |
81,phi_3_small_128k_instruct,reasoning_lb,36.0,[],livebench_240701 | |
82,meta_llama_3_8b_instruct,reasoning_lb,25.0,[],livebench_240701 | |
83,qwen2_7b_instruct,reasoning_lb,20.0,[],livebench_240701 | |
84,phi_3_small_8k_instruct,reasoning_lb,23.0,[],livebench_240701 | |
85,openhermes_2.5_mistral_7b,reasoning_lb,17.0,[],livebench_240701 | |
86,mixtral_8x7b_instruct_v0.1,reasoning_lb,18.0,[],livebench_240701 | |
87,mistral_7b_instruct_v0.2,reasoning_lb,13.0,[],livebench_240701 | |
88,phi_3_mini_4k_instruct,reasoning_lb,19.0,[],livebench_240701 | |
89,zephyr_7b_alpha,reasoning_lb,17.0,[],livebench_240701 | |
90,phi_3_mini_128k_instruct,reasoning_lb,10.0,[],livebench_240701 | |
91,zephyr_7b_beta,reasoning_lb,16.0,[],livebench_240701 | |
92,deepseek_v2_lite_chat,reasoning_lb,13.0,[],livebench_240701 | |
93,qwen1.5_7b_chat,reasoning_lb,13.0,[],livebench_240701 | |
94,starling_lm_7b_beta,reasoning_lb,19.0,[],livebench_240701 | |
95,vicuna_7b_v1.5_16k,reasoning_lb,15.0,[],livebench_240701 | |
96,vicuna_7b_v1.5,reasoning_lb,12.0,[],livebench_240701 | |
97,qwen1.5_4b_chat,reasoning_lb,13.0,[],livebench_240701 | |
98,llama_2_7b_chat,reasoning_lb,5.0,[],livebench_240701 | |
99,qwen2_1.5b_instruct,reasoning_lb,8.0,[],livebench_240701 | |
100,yi_6b_chat,reasoning_lb,8.0,[],livebench_240701 | |
101,qwen2_0.5b_instruct,reasoning_lb,3.0,[],livebench_240701 | |
102,qwen1.5_1.8b_chat,reasoning_lb,5.0,[],livebench_240701 | |
103,qwen1.5_0.5b_chat,reasoning_lb,4.0,[],livebench_240701 | |
104,claude_3_5_sonnet_20240620,coding_lb,63.21,[],livebench_240701 | |
105,gpt_4o_2024_05_13,coding_lb,46.37,[],livebench_240701 | |
106,gpt_4_turbo_2024_04_09,coding_lb,47.05,[],livebench_240701 | |
107,gpt_4_1106_preview,coding_lb,44.37,[],livebench_240701 | |
108,claude_3_opus_20240229,coding_lb,40.05,[],livebench_240701 | |
109,gpt_4_0125_preview,coding_lb,44.05,[],livebench_240701 | |
110,deepseek_coder_v2,coding_lb,41.05,[],livebench_240701 | |
111,gemini_1.5_pro_api_0514,coding_lb,32.79,[],livebench_240701 | |
112,gemma_2_27b_it,coding_lb,36.74,[],livebench_240701 | |
113,gemini_1.5_flash_api_0514,coding_lb,39.05,[],livebench_240701 | |
114,qwen2_72b_instruct,coding_lb,31.79,[],livebench_240701 | |
115,acm_rewrite_qwen2_72b_chat,coding_lb,39.05,[],livebench_240701 | |
116,mistral_large_2402,coding_lb,26.84,[],livebench_240701 | |
117,deepseek_chat_v2,coding_lb,33.47,[],livebench_240701 | |
118,claude_3_sonnet_20240229,coding_lb,25.21,[],livebench_240701 | |
119,meta_llama_3_70b_instruct,coding_lb,20.95,[],livebench_240701 | |
120,claude_3_haiku_20240307,coding_lb,24.53,[],livebench_240701 | |
121,mixtral_8x22b_instruct_v0.1,coding_lb,33.11,[],livebench_240701 | |
122,gpt_3.5_turbo_0125,coding_lb,29.16,[],livebench_240701 | |
123,gpt_3.5_turbo_1106,coding_lb,26.84,[],livebench_240701 | |
124,command_r_plus,coding_lb,20.26,[],livebench_240701 | |
125,mistral_small_2402,coding_lb,24.21,[],livebench_240701 | |
126,gemma_2_9b_it,coding_lb,22.21,[],livebench_240701 | |
127,phi_3_medium_4k_instruct,coding_lb,20.58,[],livebench_240701 | |
128,phi_3_medium_128k_instruct,coding_lb,21.58,[],livebench_240701 | |
129,deepseek_coder_v2_lite_instruct,coding_lb,26.84,[],livebench_240701 | |
130,qwen1.5_110b_chat,coding_lb,22.21,[],livebench_240701 | |
131,qwen1.5_72b_chat,coding_lb,22.89,[],livebench_240701 | |
132,command_r,coding_lb,14.95,[],livebench_240701 | |
133,phi_3_small_128k_instruct,coding_lb,25.84,[],livebench_240701 | |
134,meta_llama_3_8b_instruct,coding_lb,18.26,[],livebench_240701 | |
135,qwen2_7b_instruct,coding_lb,29.21,[],livebench_240701 | |
136,phi_3_small_8k_instruct,coding_lb,19.58,[],livebench_240701 | |
137,openhermes_2.5_mistral_7b,coding_lb,11.63,[],livebench_240701 | |
138,mixtral_8x7b_instruct_v0.1,coding_lb,11.32,[],livebench_240701 | |
139,mistral_7b_instruct_v0.2,coding_lb,11.63,[],livebench_240701 | |
140,phi_3_mini_4k_instruct,coding_lb,14.95,[],livebench_240701 | |
141,zephyr_7b_alpha,coding_lb,11.32,[],livebench_240701 | |
142,phi_3_mini_128k_instruct,coding_lb,11.63,[],livebench_240701 | |
143,zephyr_7b_beta,coding_lb,8.32,[],livebench_240701 | |
144,deepseek_v2_lite_chat,coding_lb,8.63,[],livebench_240701 | |
145,qwen1.5_7b_chat,coding_lb,6.63,[],livebench_240701 | |
146,starling_lm_7b_beta,coding_lb,18.26,[],livebench_240701 | |
147,vicuna_7b_v1.5_16k,coding_lb,1.32,[],livebench_240701 | |
148,vicuna_7b_v1.5,coding_lb,1.0,[],livebench_240701 | |
149,qwen1.5_4b_chat,coding_lb,4.0,[],livebench_240701 | |
150,llama_2_7b_chat,coding_lb,0.0,[],livebench_240701 | |
151,qwen2_1.5b_instruct,coding_lb,5.63,[],livebench_240701 | |
152,yi_6b_chat,coding_lb,1.32,[],livebench_240701 | |
153,qwen2_0.5b_instruct,coding_lb,2.0,[],livebench_240701 | |
154,qwen1.5_1.8b_chat,coding_lb,0.0,[],livebench_240701 | |
155,qwen1.5_0.5b_chat,coding_lb,0.0,[],livebench_240701 | |
156,claude_3_5_sonnet_20240620,mathematics_lb,53.75,[],livebench_240701 | |
157,gpt_4o_2024_05_13,mathematics_lb,49.88,[],livebench_240701 | |
158,gpt_4_turbo_2024_04_09,mathematics_lb,48.99,[],livebench_240701 | |
159,gpt_4_1106_preview,mathematics_lb,47.55,[],livebench_240701 | |
160,claude_3_opus_20240229,mathematics_lb,46.54,[],livebench_240701 | |
161,gpt_4_0125_preview,mathematics_lb,42.75,[],livebench_240701 | |
162,deepseek_coder_v2,mathematics_lb,52.19,[],livebench_240701 | |
163,gemini_1.5_pro_api_0514,mathematics_lb,42.07,[],livebench_240701 | |
164,gemma_2_27b_it,mathematics_lb,36.23,[],livebench_240701 | |
165,gemini_1.5_flash_api_0514,mathematics_lb,38.54,[],livebench_240701 | |
166,qwen2_72b_instruct,mathematics_lb,43.44,[],livebench_240701 | |
167,acm_rewrite_qwen2_72b_chat,mathematics_lb,40.32,[],livebench_240701 | |
168,mistral_large_2402,mathematics_lb,32.2,[],livebench_240701 | |
169,deepseek_chat_v2,mathematics_lb,33.23,[],livebench_240701 | |
170,claude_3_sonnet_20240229,mathematics_lb,29.65,[],livebench_240701 | |
171,meta_llama_3_70b_instruct,mathematics_lb,32.31,[],livebench_240701 | |
172,claude_3_haiku_20240307,mathematics_lb,25.72,[],livebench_240701 | |
173,mixtral_8x22b_instruct_v0.1,mathematics_lb,26.94,[],livebench_240701 | |
174,gpt_3.5_turbo_0125,mathematics_lb,25.54,[],livebench_240701 | |
175,gpt_3.5_turbo_1106,mathematics_lb,28.13,[],livebench_240701 | |
176,command_r_plus,mathematics_lb,24.85,[],livebench_240701 | |
177,mistral_small_2402,mathematics_lb,26.76,[],livebench_240701 | |
178,gemma_2_9b_it,mathematics_lb,23.98,[],livebench_240701 | |
179,phi_3_medium_4k_instruct,mathematics_lb,27.54,[],livebench_240701 | |
180,phi_3_medium_128k_instruct,mathematics_lb,24.25,[],livebench_240701 | |
181,deepseek_coder_v2_lite_instruct,mathematics_lb,34.09,[],livebench_240701 | |
182,qwen1.5_110b_chat,mathematics_lb,25.58,[],livebench_240701 | |
183,qwen1.5_72b_chat,mathematics_lb,26.82,[],livebench_240701 | |
184,command_r,mathematics_lb,16.92,[],livebench_240701 | |
185,phi_3_small_128k_instruct,mathematics_lb,24.84,[],livebench_240701 | |
186,meta_llama_3_8b_instruct,mathematics_lb,17.58,[],livebench_240701 | |
187,qwen2_7b_instruct,mathematics_lb,25.83,[],livebench_240701 | |
188,phi_3_small_8k_instruct,mathematics_lb,24.15,[],livebench_240701 | |
189,openhermes_2.5_mistral_7b,mathematics_lb,20.1,[],livebench_240701 | |
190,mixtral_8x7b_instruct_v0.1,mathematics_lb,18.97,[],livebench_240701 | |
191,mistral_7b_instruct_v0.2,mathematics_lb,16.04,[],livebench_240701 | |
192,phi_3_mini_4k_instruct,mathematics_lb,19.88,[],livebench_240701 | |
193,zephyr_7b_alpha,mathematics_lb,9.61,[],livebench_240701 | |
194,phi_3_mini_128k_instruct,mathematics_lb,21.48,[],livebench_240701 | |
195,zephyr_7b_beta,mathematics_lb,11.23,[],livebench_240701 | |
196,deepseek_v2_lite_chat,mathematics_lb,11.99,[],livebench_240701 | |
197,qwen1.5_7b_chat,mathematics_lb,12.86,[],livebench_240701 | |
198,starling_lm_7b_beta,mathematics_lb,13.82,[],livebench_240701 | |
199,vicuna_7b_v1.5_16k,mathematics_lb,6.61,[],livebench_240701 | |
200,vicuna_7b_v1.5,mathematics_lb,4.33,[],livebench_240701 | |
201,qwen1.5_4b_chat,mathematics_lb,7.08,[],livebench_240701 | |
202,llama_2_7b_chat,mathematics_lb,4.78,[],livebench_240701 | |
203,qwen2_1.5b_instruct,mathematics_lb,7.16,[],livebench_240701 | |
204,yi_6b_chat,mathematics_lb,7.14,[],livebench_240701 | |
205,qwen2_0.5b_instruct,mathematics_lb,4.22,[],livebench_240701 | |
206,qwen1.5_1.8b_chat,mathematics_lb,2.14,[],livebench_240701 | |
207,qwen1.5_0.5b_chat,mathematics_lb,3.39,[],livebench_240701 | |
208,claude_3_5_sonnet_20240620,data_analysis_lb,56.74,[],livebench_240701 | |
209,gpt_4o_2024_05_13,data_analysis_lb,52.41,[],livebench_240701 | |
210,gpt_4_turbo_2024_04_09,data_analysis_lb,51.32,[],livebench_240701 | |
211,gpt_4_1106_preview,data_analysis_lb,51.33,[],livebench_240701 | |
212,claude_3_opus_20240229,data_analysis_lb,54.32,[],livebench_240701 | |
213,gpt_4_0125_preview,data_analysis_lb,54.06,[],livebench_240701 | |
214,deepseek_coder_v2,data_analysis_lb,38.25,[],livebench_240701 | |
215,gemini_1.5_pro_api_0514,data_analysis_lb,52.81,[],livebench_240701 | |
216,gemma_2_27b_it,data_analysis_lb,43.58,[],livebench_240701 | |
217,gemini_1.5_flash_api_0514,data_analysis_lb,44.03,[],livebench_240701 | |
218,qwen2_72b_instruct,data_analysis_lb,26.24,[],livebench_240701 | |
219,acm_rewrite_qwen2_72b_chat,data_analysis_lb,26.19,[],livebench_240701 | |
220,mistral_large_2402,data_analysis_lb,42.55,[],livebench_240701 | |
221,deepseek_chat_v2,data_analysis_lb,38.03,[],livebench_240701 | |
222,claude_3_sonnet_20240229,data_analysis_lb,44.56,[],livebench_240701 | |
223,meta_llama_3_70b_instruct,data_analysis_lb,42.41,[],livebench_240701 | |
224,claude_3_haiku_20240307,data_analysis_lb,41.54,[],livebench_240701 | |
225,mixtral_8x22b_instruct_v0.1,data_analysis_lb,30.33,[],livebench_240701 | |
226,gpt_3.5_turbo_0125,data_analysis_lb,41.21,[],livebench_240701 | |
227,gpt_3.5_turbo_1106,data_analysis_lb,41.7,[],livebench_240701 | |
228,command_r_plus,data_analysis_lb,24.6,[],livebench_240701 | |
229,mistral_small_2402,data_analysis_lb,31.88,[],livebench_240701 | |
230,gemma_2_9b_it,data_analysis_lb,35.06,[],livebench_240701 | |
231,phi_3_medium_4k_instruct,data_analysis_lb,31.63,[],livebench_240701 | |
232,phi_3_medium_128k_instruct,data_analysis_lb,32.12,[],livebench_240701 | |
233,deepseek_coder_v2_lite_instruct,data_analysis_lb,33.0,[],livebench_240701 | |
234,qwen1.5_110b_chat,data_analysis_lb,31.45,[],livebench_240701 | |
235,qwen1.5_72b_chat,data_analysis_lb,32.98,[],livebench_240701 | |
236,command_r,data_analysis_lb,31.69,[],livebench_240701 | |
237,phi_3_small_128k_instruct,data_analysis_lb,27.33,[],livebench_240701 | |
238,meta_llama_3_8b_instruct,data_analysis_lb,23.33,[],livebench_240701 | |
239,qwen2_7b_instruct,data_analysis_lb,28.75,[],livebench_240701 | |
240,phi_3_small_8k_instruct,data_analysis_lb,27.5,[],livebench_240701 | |
241,openhermes_2.5_mistral_7b,data_analysis_lb,26.92,[],livebench_240701 | |
242,mixtral_8x7b_instruct_v0.1,data_analysis_lb,28.13,[],livebench_240701 | |
243,mistral_7b_instruct_v0.2,data_analysis_lb,14.62,[],livebench_240701 | |
244,phi_3_mini_4k_instruct,data_analysis_lb,14.67,[],livebench_240701 | |
245,zephyr_7b_alpha,data_analysis_lb,17.4,[],livebench_240701 | |
246,phi_3_mini_128k_instruct,data_analysis_lb,8.69,[],livebench_240701 | |
247,zephyr_7b_beta,data_analysis_lb,15.75,[],livebench_240701 | |
248,deepseek_v2_lite_chat,data_analysis_lb,18.19,[],livebench_240701 | |
249,qwen1.5_7b_chat,data_analysis_lb,16.23,[],livebench_240701 | |
250,starling_lm_7b_beta,data_analysis_lb,2.0,[],livebench_240701 | |
251,vicuna_7b_v1.5_16k,data_analysis_lb,9.27,[],livebench_240701 | |
252,vicuna_7b_v1.5,data_analysis_lb,2.67,[],livebench_240701 | |
253,qwen1.5_4b_chat,data_analysis_lb,9.13,[],livebench_240701 | |
254,llama_2_7b_chat,data_analysis_lb,0.0,[],livebench_240701 | |
255,qwen2_1.5b_instruct,data_analysis_lb,10.01,[],livebench_240701 | |
256,yi_6b_chat,data_analysis_lb,4.38,[],livebench_240701 | |
257,qwen2_0.5b_instruct,data_analysis_lb,2.0,[],livebench_240701 | |
258,qwen1.5_1.8b_chat,data_analysis_lb,3.33,[],livebench_240701 | |
259,qwen1.5_0.5b_chat,data_analysis_lb,0.0,[],livebench_240701 | |
260,claude_3_5_sonnet_20240620,language_lb,56.94,[],livebench_240701 | |
261,gpt_4o_2024_05_13,language_lb,53.94,[],livebench_240701 | |
262,gpt_4_turbo_2024_04_09,language_lb,45.26,[],livebench_240701 | |
263,gpt_4_1106_preview,language_lb,48.37,[],livebench_240701 | |
264,claude_3_opus_20240229,language_lb,51.72,[],livebench_240701 | |
265,gpt_4_0125_preview,language_lb,43.55,[],livebench_240701 | |
266,deepseek_coder_v2,language_lb,33.04,[],livebench_240701 | |
267,gemini_1.5_pro_api_0514,language_lb,38.25,[],livebench_240701 | |
268,gemma_2_27b_it,language_lb,32.4,[],livebench_240701 | |
269,gemini_1.5_flash_api_0514,language_lb,30.69,[],livebench_240701 | |
270,qwen2_72b_instruct,language_lb,29.21,[],livebench_240701 | |
271,acm_rewrite_qwen2_72b_chat,language_lb,30.03,[],livebench_240701 | |
272,mistral_large_2402,language_lb,28.74,[],livebench_240701 | |
273,deepseek_chat_v2,language_lb,32.29,[],livebench_240701 | |
274,claude_3_sonnet_20240229,language_lb,38.08,[],livebench_240701 | |
275,meta_llama_3_70b_instruct,language_lb,34.11,[],livebench_240701 | |
276,claude_3_haiku_20240307,language_lb,30.07,[],livebench_240701 | |
277,mixtral_8x22b_instruct_v0.1,language_lb,26.48,[],livebench_240701 | |
278,gpt_3.5_turbo_0125,language_lb,24.22,[],livebench_240701 | |
279,gpt_3.5_turbo_1106,language_lb,28.63,[],livebench_240701 | |
280,command_r_plus,language_lb,23.92,[],livebench_240701 | |
281,mistral_small_2402,language_lb,22.06,[],livebench_240701 | |
282,gemma_2_9b_it,language_lb,27.64,[],livebench_240701 | |
283,phi_3_medium_4k_instruct,language_lb,13.91,[],livebench_240701 | |
284,phi_3_medium_128k_instruct,language_lb,12.76,[],livebench_240701 | |
285,deepseek_coder_v2_lite_instruct,language_lb,10.64,[],livebench_240701 | |
286,qwen1.5_110b_chat,language_lb,13.22,[],livebench_240701 | |
287,qwen1.5_72b_chat,language_lb,11.37,[],livebench_240701 | |
288,command_r,language_lb,14.64,[],livebench_240701 | |
289,phi_3_small_128k_instruct,language_lb,12.28,[],livebench_240701 | |
290,meta_llama_3_8b_instruct,language_lb,18.72,[],livebench_240701 | |
291,qwen2_7b_instruct,language_lb,10.21,[],livebench_240701 | |
292,phi_3_small_8k_instruct,language_lb,14.96,[],livebench_240701 | |
293,openhermes_2.5_mistral_7b,language_lb,11.37,[],livebench_240701 | |
294,mixtral_8x7b_instruct_v0.1,language_lb,13.76,[],livebench_240701 | |
295,mistral_7b_instruct_v0.2,language_lb,9.05,[],livebench_240701 | |
296,phi_3_mini_4k_instruct,language_lb,7.1,[],livebench_240701 | |
297,zephyr_7b_alpha,language_lb,7.2,[],livebench_240701 | |
298,phi_3_mini_128k_instruct,language_lb,6.8,[],livebench_240701 | |
299,zephyr_7b_beta,language_lb,4.28,[],livebench_240701 | |
300,deepseek_v2_lite_chat,language_lb,9.2,[],livebench_240701 | |
301,qwen1.5_7b_chat,language_lb,6.18,[],livebench_240701 | |
302,starling_lm_7b_beta,language_lb,7.26,[],livebench_240701 | |
303,vicuna_7b_v1.5_16k,language_lb,7.92,[],livebench_240701 | |
304,vicuna_7b_v1.5,language_lb,8.66,[],livebench_240701 | |
305,qwen1.5_4b_chat,language_lb,5.8,[],livebench_240701 | |
306,llama_2_7b_chat,language_lb,6.86,[],livebench_240701 | |
307,qwen2_1.5b_instruct,language_lb,3.05,[],livebench_240701 | |
308,yi_6b_chat,language_lb,4.69,[],livebench_240701 | |
309,qwen2_0.5b_instruct,language_lb,2.8,[],livebench_240701 | |
310,qwen1.5_1.8b_chat,language_lb,3.16,[],livebench_240701 | |
311,qwen1.5_0.5b_chat,language_lb,2.88,[],livebench_240701 | |
312,claude_3_5_sonnet_20240620,if_lb,72.3,[],livebench_240701 | |
313,gpt_4o_2024_05_13,if_lb,72.17,[],livebench_240701 | |
314,gpt_4_turbo_2024_04_09,if_lb,71.39,[],livebench_240701 | |
315,gpt_4_1106_preview,if_lb,69.39,[],livebench_240701 | |
316,claude_3_opus_20240229,if_lb,70.87,[],livebench_240701 | |
317,gpt_4_0125_preview,if_lb,63.92,[],livebench_240701 | |
318,deepseek_coder_v2,if_lb,67.18,[],livebench_240701 | |
319,gemini_1.5_pro_api_0514,if_lb,67.2,[],livebench_240701 | |
320,gemma_2_27b_it,if_lb,67.37,[],livebench_240701 | |
321,gemini_1.5_flash_api_0514,if_lb,63.01,[],livebench_240701 | |
322,qwen2_72b_instruct,if_lb,68.27,[],livebench_240701 | |
323,acm_rewrite_qwen2_72b_chat,if_lb,65.0,[],livebench_240701 | |
324,mistral_large_2402,if_lb,68.19,[],livebench_240701 | |
325,deepseek_chat_v2,if_lb,64.34,[],livebench_240701 | |
326,claude_3_sonnet_20240229,if_lb,65.0,[],livebench_240701 | |
327,meta_llama_3_70b_instruct,if_lb,63.5,[],livebench_240701 | |
328,claude_3_haiku_20240307,if_lb,64.03,[],livebench_240701 | |
329,mixtral_8x22b_instruct_v0.1,if_lb,63.17,[],livebench_240701 | |
330,gpt_3.5_turbo_0125,if_lb,60.47,[],livebench_240701 | |
331,gpt_3.5_turbo_1106,if_lb,51.53,[],livebench_240701 | |
332,command_r_plus,if_lb,71.51,[],livebench_240701 | |
333,mistral_small_2402,if_lb,63.91,[],livebench_240701 | |
334,gemma_2_9b_it,if_lb,61.55,[],livebench_240701 | |
335,phi_3_medium_4k_instruct,if_lb,53.3,[],livebench_240701 | |
336,phi_3_medium_128k_instruct,if_lb,56.15,[],livebench_240701 | |
337,deepseek_coder_v2_lite_instruct,if_lb,48.34,[],livebench_240701 | |
338,qwen1.5_110b_chat,if_lb,55.26,[],livebench_240701 | |
339,qwen1.5_72b_chat,if_lb,58.25,[],livebench_240701 | |
340,command_r,if_lb,57.16,[],livebench_240701 | |
341,phi_3_small_128k_instruct,if_lb,36.88,[],livebench_240701 | |
342,meta_llama_3_8b_instruct,if_lb,57.14,[],livebench_240701 | |
343,qwen2_7b_instruct,if_lb,44.74,[],livebench_240701 | |
344,phi_3_small_8k_instruct,if_lb,48.24,[],livebench_240701 | |
345,openhermes_2.5_mistral_7b,if_lb,52.78,[],livebench_240701 | |
346,mixtral_8x7b_instruct_v0.1,if_lb,44.81,[],livebench_240701 | |
347,mistral_7b_instruct_v0.2,if_lb,51.65,[],livebench_240701 | |
348,phi_3_mini_4k_instruct,if_lb,40.05,[],livebench_240701 | |
349,zephyr_7b_alpha,if_lb,52.79,[],livebench_240701 | |
350,phi_3_mini_128k_instruct,if_lb,49.65,[],livebench_240701 | |
351,zephyr_7b_beta,if_lb,48.32,[],livebench_240701 | |
352,deepseek_v2_lite_chat,if_lb,41.83,[],livebench_240701 | |
353,qwen1.5_7b_chat,if_lb,44.12,[],livebench_240701 | |
354,starling_lm_7b_beta,if_lb,38.32,[],livebench_240701 | |
355,vicuna_7b_v1.5_16k,if_lb,42.12,[],livebench_240701 | |
356,vicuna_7b_v1.5,if_lb,41.75,[],livebench_240701 | |
357,qwen1.5_4b_chat,if_lb,27.75,[],livebench_240701 | |
358,llama_2_7b_chat,if_lb,44.88,[],livebench_240701 | |
359,qwen2_1.5b_instruct,if_lb,25.9,[],livebench_240701 | |
360,yi_6b_chat,if_lb,27.22,[],livebench_240701 | |
361,qwen2_0.5b_instruct,if_lb,26.63,[],livebench_240701 | |
362,qwen1.5_1.8b_chat,if_lb,22.9,[],livebench_240701 | |
363,qwen1.5_0.5b_chat,if_lb,21.3,[],livebench_240701 | |