model,score gpt_4o_2024_05_13,0.9847612958226769 claude_3_5_sonnet_20240620,0.982905982905983 gpt_4o_2024_08_06,0.9575873827791986 gpt_4_turbo_2024_04_09,0.9428463693169576 gpt_4_0125_preview,0.9171132221004344 mistral_large_2407,0.8868286445012787 llama3_1_405b_instruct,0.8672150411280846 yi_large_preview,0.8641553641553642 hermes_3_llama3_1_70b,0.8626160990712074 smaug_qwen2_72b_instruct,0.8593911248710011 claude_3_opus_20240229,0.8573567665639277 llama3_1_70b_instruct,0.8528408270971201 athene_70b,0.8493788819875776 deepseek_coder_v2,0.8444160272804775 qwen2_72b_instruct,0.8354710666091739 yi_large,0.8346273291925466 gpt_4_0613,0.8146763722211293 llama3_70b_instruct,0.8127546753337573 llama3_70b,0.8105600539811066 gemma_2_27b_it,0.8045273029120115 gpt_4o_mini_2024_07_18,0.8032033326150972 gemma_2_9b_it_dpo,0.790057915057915 llama3_instruct_8b_simpo,0.7884068278805121 phi_3_5_moe_instruct,0.7808307533539731 qwen1_5_110b_chat,0.776004448721167 qwen1_5_32b,0.7658569500674763 yi_1_5_34b_chat,0.7553884711779449 llama_2_70b,0.7303193882141251 mixtral_8x22b_instruct_v0_1,0.7256023690940907 gemma_2_9b_it_simpo,0.7199248120300753 qwen1_5_32b_chat,0.7149122807017544 mixtral_8x22b_v0_1,0.7135490753911806 yi_34b,0.7128879892037787 internlm2_5_20b_chat,0.6842105263157895 phi_3_small_128k_instruct,0.66937564499484 phi_3_medium_4k_instruct,0.6675079642841117 claude_3_sonnet_20240229,0.653911731916847 gemma_2_9b_it,0.6422797189051059 infinity_instruct_3m_0625_llama3_8b,0.6273115220483642 mistral_v0_1_7b,0.6239316239316239 phi_3_5_mini_instruct,0.6202270381836945 mistral_medium,0.6122209165687427 mistral_large_2402,0.6058211467418628 claude_instant_1_2,0.6049896049896051 claude_2_0,0.6020066889632107 yi_1_5_9b_chat,0.5881787802840435 qwen1_5_14b,0.5770917678812416 command_r_plus,0.5761033510394125 llama_65b,0.5736992052781527 gpt_3_5_turbo_0613,0.5724018332713985 qwen1_5_72b_chat,0.5668371367348349 phi_3_mini_4k_instruct,0.5548245614035088 deepseek_llm_67b_chat,0.5506756756756757 claude_3_haiku_20240307,0.549424005945745 yi_34b_chat,0.5455449728905107 dbrx_instructruct,0.5344129554655871 jurassic_2_jumbo_178b,0.532051282051282 llama3_1_8b_instruct,0.5175232440678665 claude_2_1,0.5110980545763154 qwen2_7b_instruct,0.5034227726178191 mistral_small_2402,0.49924585218702866 mixtral_8x7b_v0_1,0.49324324324324326 glm_4_9b_chat,0.46499582289055974 qwen1_5_14b_chat,0.4621068436857911 phi_3_small_8k_instruct,0.45481670929241264 gpt_3_5_turbo_0301,0.4528985507246377 snorkel_mistral_pairrm_dpo,0.4521151586368978 gemma_7b,0.4471997300944669 gpt_3_5_turbo_0125,0.4401920188365201 llama3_8b,0.43302968960863697 dbrx_instruct,0.4266409266409266 llama3_8b_instruct,0.420135922511747 phi_3_mini_128k_instruct,0.4153205904787544 llama_2_13b,0.41490478332583597 jurassic_2_grande_17b,0.39529914529914534 openhermes_2_5_mistral_7b,0.3832617447168531 mistral_7b_v0_3,0.3737553342816501 mixtral_8x7b_instruct_v0_1,0.3713078251895724 qwen1_5_7b,0.3508771929824561 yi_1_5_6b_chat,0.3354636591478697 falcon_40b,0.32812265707002547 command_r,0.32386140074759 internlm2_chat_20b,0.32252252252252256 mistral_7b_v0_2,0.31970128022759603 luminous_supreme_70b,0.30128205128205127 starling_lm_7b_alpha,0.29823530624445954 yi_6b,0.29234143049932526 mistral_7b_instruct_v0_2,0.28609513981031004 zephyr_7b_alpha,0.2838442157327606 zephyr_7b_beta,0.2666234345800909 gemma_1_1_7b_it,0.26226051061156724 mistral_7b_instruct_v0_3,0.2537839697282422 starling_lm_7b_beta,0.25234441602728047 llama_2_7b,0.2391288049182786 luminous_extended_30b,0.2329059829059829 alpaca_7b,0.22072072072072071 vicuna_33b_v1_3,0.2056404230317274 phi_2,0.20087901666849037 qwen2_1_5b_instruct,0.19711042311661506 yi_6b_chat,0.1938854489164087 qwen1_5_7b_chat,0.1916569245052217 tulu_2_dpo_70b,0.17624223602484473 qwen1_5_4b_chat,0.1674406604747162 llama_2_70b_chat,0.15527950310559005 gpt_neox_20b,0.14400584795321636 vicuna_7b_v1_5,0.13619501854795973 falcon_40b_instruct,0.13264580369843526 gemma_7b_it,0.12136319058515854 falcon_7b,0.11407257459889038 gpt_j_6b,0.10160818713450293 luminous_base_13b,0.08333333333333333 llama_2_7b_chat,0.08304448781801049 gemma_1_1_2b_it,0.07665903890160183 olmo_7b,0.06545209176788123 gemma_2b_it,0.05921052631578947 qwen1_5_1_8b_chat,0.059167526659786716 qwen2_0_5b_instruct,0.059081527347781215 pythia_12b,0.054093567251461985 pythia_6_9b,0.019736842105263157 falcon_7b_instruct,0.013513513513513514 qwen1_5_0_5b_chat,0.013157894736842105