Spaces:
Running
Running
Yotam-Perlitz
commited on
Commit
•
633250e
1
Parent(s):
fe9d4c9
produce new cache
Browse filesSigned-off-by: Yotam-Perlitz <[email protected]>
cache/aggregate_scoress_cache_5e66a88dab42480065db47711c55c458.csv
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model,score
|
2 |
+
gpt_4o_2024_05_13,0.9847612958226769
|
3 |
+
claude_3_5_sonnet_20240620,0.982905982905983
|
4 |
+
gpt_4o_2024_08_06,0.9575873827791986
|
5 |
+
gpt_4_turbo_2024_04_09,0.9428463693169576
|
6 |
+
gpt_4_0125_preview,0.9171132221004344
|
7 |
+
mistral_large_2407,0.8868286445012787
|
8 |
+
llama3_1_405b_instruct,0.8672150411280846
|
9 |
+
yi_large_preview,0.8641553641553642
|
10 |
+
hermes_3_llama3_1_70b,0.8626160990712074
|
11 |
+
smaug_qwen2_72b_instruct,0.8593911248710011
|
12 |
+
claude_3_opus_20240229,0.8573567665639277
|
13 |
+
llama3_1_70b_instruct,0.8528408270971201
|
14 |
+
athene_70b,0.8493788819875776
|
15 |
+
deepseek_coder_v2,0.8444160272804775
|
16 |
+
qwen2_72b_instruct,0.8354710666091739
|
17 |
+
yi_large,0.8346273291925466
|
18 |
+
gpt_4_0613,0.8146763722211293
|
19 |
+
llama3_70b_instruct,0.8127546753337573
|
20 |
+
llama3_70b,0.8105600539811066
|
21 |
+
gemma_2_27b_it,0.8045273029120115
|
22 |
+
gpt_4o_mini_2024_07_18,0.8032033326150972
|
23 |
+
gemma_2_9b_it_dpo,0.790057915057915
|
24 |
+
llama3_instruct_8b_simpo,0.7884068278805121
|
25 |
+
phi_3_5_moe_instruct,0.7808307533539731
|
26 |
+
qwen1_5_110b_chat,0.776004448721167
|
27 |
+
qwen1_5_32b,0.7658569500674763
|
28 |
+
yi_1_5_34b_chat,0.7553884711779449
|
29 |
+
llama_2_70b,0.7303193882141251
|
30 |
+
mixtral_8x22b_instruct_v0_1,0.7256023690940907
|
31 |
+
gemma_2_9b_it_simpo,0.7199248120300753
|
32 |
+
qwen1_5_32b_chat,0.7149122807017544
|
33 |
+
mixtral_8x22b_v0_1,0.7135490753911806
|
34 |
+
yi_34b,0.7128879892037787
|
35 |
+
internlm2_5_20b_chat,0.6842105263157895
|
36 |
+
phi_3_small_128k_instruct,0.66937564499484
|
37 |
+
phi_3_medium_4k_instruct,0.6675079642841117
|
38 |
+
claude_3_sonnet_20240229,0.653911731916847
|
39 |
+
gemma_2_9b_it,0.6422797189051059
|
40 |
+
infinity_instruct_3m_0625_llama3_8b,0.6273115220483642
|
41 |
+
mistral_v0_1_7b,0.6239316239316239
|
42 |
+
phi_3_5_mini_instruct,0.6202270381836945
|
43 |
+
mistral_medium,0.6122209165687427
|
44 |
+
mistral_large_2402,0.6058211467418628
|
45 |
+
claude_instant_1_2,0.6049896049896051
|
46 |
+
claude_2_0,0.6020066889632107
|
47 |
+
yi_1_5_9b_chat,0.5881787802840435
|
48 |
+
qwen1_5_14b,0.5770917678812416
|
49 |
+
command_r_plus,0.5761033510394125
|
50 |
+
llama_65b,0.5736992052781527
|
51 |
+
gpt_3_5_turbo_0613,0.5724018332713985
|
52 |
+
qwen1_5_72b_chat,0.5668371367348349
|
53 |
+
phi_3_mini_4k_instruct,0.5548245614035088
|
54 |
+
deepseek_llm_67b_chat,0.5506756756756757
|
55 |
+
claude_3_haiku_20240307,0.549424005945745
|
56 |
+
yi_34b_chat,0.5455449728905107
|
57 |
+
dbrx_instructruct,0.5344129554655871
|
58 |
+
jurassic_2_jumbo_178b,0.532051282051282
|
59 |
+
llama3_1_8b_instruct,0.5175232440678665
|
60 |
+
claude_2_1,0.5110980545763154
|
61 |
+
qwen2_7b_instruct,0.5034227726178191
|
62 |
+
mistral_small_2402,0.49924585218702866
|
63 |
+
mixtral_8x7b_v0_1,0.49324324324324326
|
64 |
+
glm_4_9b_chat,0.46499582289055974
|
65 |
+
qwen1_5_14b_chat,0.4621068436857911
|
66 |
+
phi_3_small_8k_instruct,0.45481670929241264
|
67 |
+
gpt_3_5_turbo_0301,0.4528985507246377
|
68 |
+
snorkel_mistral_pairrm_dpo,0.4521151586368978
|
69 |
+
gemma_7b,0.4471997300944669
|
70 |
+
gpt_3_5_turbo_0125,0.4401920188365201
|
71 |
+
llama3_8b,0.43302968960863697
|
72 |
+
dbrx_instruct,0.4266409266409266
|
73 |
+
llama3_8b_instruct,0.420135922511747
|
74 |
+
phi_3_mini_128k_instruct,0.4153205904787544
|
75 |
+
llama_2_13b,0.41490478332583597
|
76 |
+
jurassic_2_grande_17b,0.39529914529914534
|
77 |
+
openhermes_2_5_mistral_7b,0.3832617447168531
|
78 |
+
mistral_7b_v0_3,0.3737553342816501
|
79 |
+
mixtral_8x7b_instruct_v0_1,0.3713078251895724
|
80 |
+
qwen1_5_7b,0.3508771929824561
|
81 |
+
yi_1_5_6b_chat,0.3354636591478697
|
82 |
+
falcon_40b,0.32812265707002547
|
83 |
+
command_r,0.32386140074759
|
84 |
+
internlm2_chat_20b,0.32252252252252256
|
85 |
+
mistral_7b_v0_2,0.31970128022759603
|
86 |
+
luminous_supreme_70b,0.30128205128205127
|
87 |
+
starling_lm_7b_alpha,0.29823530624445954
|
88 |
+
yi_6b,0.29234143049932526
|
89 |
+
mistral_7b_instruct_v0_2,0.28609513981031004
|
90 |
+
zephyr_7b_alpha,0.2838442157327606
|
91 |
+
zephyr_7b_beta,0.2666234345800909
|
92 |
+
gemma_1_1_7b_it,0.26226051061156724
|
93 |
+
mistral_7b_instruct_v0_3,0.2537839697282422
|
94 |
+
starling_lm_7b_beta,0.25234441602728047
|
95 |
+
llama_2_7b,0.2391288049182786
|
96 |
+
luminous_extended_30b,0.2329059829059829
|
97 |
+
alpaca_7b,0.22072072072072071
|
98 |
+
vicuna_33b_v1_3,0.2056404230317274
|
99 |
+
phi_2,0.20087901666849037
|
100 |
+
qwen2_1_5b_instruct,0.19711042311661506
|
101 |
+
yi_6b_chat,0.1938854489164087
|
102 |
+
qwen1_5_7b_chat,0.1916569245052217
|
103 |
+
tulu_2_dpo_70b,0.17624223602484473
|
104 |
+
qwen1_5_4b_chat,0.1674406604747162
|
105 |
+
llama_2_70b_chat,0.15527950310559005
|
106 |
+
gpt_neox_20b,0.14400584795321636
|
107 |
+
vicuna_7b_v1_5,0.13619501854795973
|
108 |
+
falcon_40b_instruct,0.13264580369843526
|
109 |
+
gemma_7b_it,0.12136319058515854
|
110 |
+
falcon_7b,0.11407257459889038
|
111 |
+
gpt_j_6b,0.10160818713450293
|
112 |
+
luminous_base_13b,0.08333333333333333
|
113 |
+
llama_2_7b_chat,0.08304448781801049
|
114 |
+
gemma_1_1_2b_it,0.07665903890160183
|
115 |
+
olmo_7b,0.06545209176788123
|
116 |
+
gemma_2b_it,0.05921052631578947
|
117 |
+
qwen1_5_1_8b_chat,0.059167526659786716
|
118 |
+
qwen2_0_5b_instruct,0.059081527347781215
|
119 |
+
pythia_12b,0.054093567251461985
|
120 |
+
pythia_6_9b,0.019736842105263157
|
121 |
+
falcon_7b_instruct,0.013513513513513514
|
122 |
+
qwen1_5_0_5b_chat,0.013157894736842105
|
cache/agreements_cache_5e66a88dab42480065db47711c55c458.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
cache/allbenchs_cache_5e66a88dab42480065db47711c55c458.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|