Spaces:
Sleeping
Sleeping
{"model name ": "gpt-4-0125-preview", "elo overall": 1219, "Information seeking": 1266, "Creative Writing": 1199, "Coding & Debugging": 1154, "Reasoning": 1275, "Editing": 1120, "Math": 1192, "Planning": 1188, "Brainstorming": 1179, "Role playing": 1218, "Advice seeking": 1277, "Data Analysis": 1161, "Others": 1033, "average": 1188.5, "# battles": 5363} | |
{"model name ": "Llama-2-70b-chat-hf.nosp", "elo overall": 1166, "Information seeking": 1225, "Creative Writing": 1172, "Coding & Debugging": 1063, "Reasoning": 1130, "Editing": 1078, "Math": 1008, "Planning": 1135, "Brainstorming": 1139, "Role playing": 1130, "Advice seeking": 1115, "Data Analysis": 1043, "Others": 1016, "average": 1104.5, "# battles": 1715} | |
{"model name ": "Llama-2-7b-chat-hf.nosp", "elo overall": 1151, "Information seeking": 1195, "Creative Writing": 1149, "Coding & Debugging": 1051, "Reasoning": 1120, "Editing": 1042, "Math": 1009, "Planning": 1141, "Brainstorming": 1142, "Role playing": 1139, "Advice seeking": 1149, "Data Analysis": 1059, "Others": 1008, "average": 1100.3333333333333, "# battles": 1667} | |
{"model name ": "Llama-2-13b-chat-hf.nosp", "elo overall": 1148, "Information seeking": 1192, "Creative Writing": 1137, "Coding & Debugging": 1032, "Reasoning": 1102, "Editing": 1064, "Math": 1021, "Planning": 1121, "Brainstorming": 1140, "Role playing": 1102, "Advice seeking": 1126, "Data Analysis": 1040, "Others": 1032, "average": 1092.4166666666667, "# battles": 1657} | |
{"model name ": "Yi-34B-Chat", "elo overall": 1106, "Information seeking": 1096, "Creative Writing": 1066, "Coding & Debugging": 1126, "Reasoning": 1118, "Editing": 1013, "Math": 1095, "Planning": 1137, "Brainstorming": 1118, "Role playing": 1052, "Advice seeking": 1133, "Data Analysis": 1072, "Others": 1003, "average": 1085.75, "# battles": 2134} | |
{"model name ": "Mistral-7B-Instruct-v0.2", "elo overall": 1088, "Information seeking": 1092, "Creative Writing": 1132, "Coding & Debugging": 1053, "Reasoning": 1057, "Editing": 1068, "Math": 1086, "Planning": 1042, "Brainstorming": 1037, "Role playing": 1123, "Advice seeking": 1041, "Data Analysis": 1048, "Others": 1010, "average": 1065.75, "# battles": 2313} | |
{"model name ": "zephyr-7b-beta", "elo overall": 1060, "Information seeking": 1022, "Creative Writing": 1093, "Coding & Debugging": 1065, "Reasoning": 1036, "Editing": 1074, "Math": 1040, "Planning": 1051, "Brainstorming": 1072, "Role playing": 1035, "Advice seeking": 976, "Data Analysis": 1047, "Others": 1014, "average": 1043.75, "# battles": 3310} | |
{"model name ": "tulu-2-dpo-70b", "elo overall": 1020, "Information seeking": 1004, "Creative Writing": 1065, "Coding & Debugging": 970, "Reasoning": 998, "Editing": 1052, "Math": 1030, "Planning": 1005, "Brainstorming": 1008, "Role playing": 1061, "Advice seeking": 1007, "Data Analysis": 998, "Others": 1026, "average": 1018.6666666666666, "# battles": 3342} | |
{"model name ": "claude-3-sonnet-20240229", "elo overall": 994, "Information seeking": 975, "Creative Writing": 944, "Coding & Debugging": 1099, "Reasoning": 1039, "Editing": 1045, "Math": 1069, "Planning": 1017, "Brainstorming": 953, "Role playing": 896, "Advice seeking": 976, "Data Analysis": 1041, "Others": 988, "average": 1003.5, "# battles": 2483} | |
{"model name ": "Mixtral-8x7B-Instruct-v0.1", "elo overall": 991, "Information seeking": 993, "Creative Writing": 1003, "Coding & Debugging": 980, "Reasoning": 987, "Editing": 991, "Math": 994, "Planning": 960, "Brainstorming": 965, "Role playing": 1024, "Advice seeking": 959, "Data Analysis": 983, "Others": 1020, "average": 988.25, "# battles": 3350} | |
{"model name ": "claude-3-opus-20240229", "elo overall": 986, "Information seeking": 951, "Creative Writing": 940, "Coding & Debugging": 1137, "Reasoning": 1034, "Editing": 1067, "Math": 1028, "Planning": 1010, "Brainstorming": 945, "Role playing": 885, "Advice seeking": 1009, "Data Analysis": 1055, "Others": 1000, "average": 1005.0833333333334, "# battles": 2259} | |
{"model name ": "Llama-2-70b-chat-hf", "elo overall": 985, "Information seeking": 988, "Creative Writing": 969, "Coding & Debugging": 998, "Reasoning": 1017, "Editing": 1021, "Math": 960, "Planning": 1026, "Brainstorming": 1018, "Role playing": 972, "Advice seeking": 1049, "Data Analysis": 1014, "Others": 1025, "average": 1004.75, "# battles": 2090} | |
{"model name ": "command", "elo overall": 982, "Information seeking": 965, "Creative Writing": 983, "Coding & Debugging": 1004, "Reasoning": 980, "Editing": 1005, "Math": 992, "Planning": 1052, "Brainstorming": 1030, "Role playing": 954, "Advice seeking": 974, "Data Analysis": 967, "Others": 995, "average": 991.75, "# battles": 1655} | |
{"model name ": "Llama-2-13b-chat-hf", "elo overall": 966, "Information seeking": 968, "Creative Writing": 956, "Coding & Debugging": 956, "Reasoning": 965, "Editing": 1019, "Math": 1011, "Planning": 999, "Brainstorming": 999, "Role playing": 976, "Advice seeking": 1027, "Data Analysis": 1012, "Others": 1000, "average": 990.6666666666666, "# battles": 2045} | |
{"model name ": "mistral-large-2402", "elo overall": 958, "Information seeking": 939, "Creative Writing": 995, "Coding & Debugging": 941, "Reasoning": 969, "Editing": 985, "Math": 985, "Planning": 919, "Brainstorming": 986, "Role playing": 988, "Advice seeking": 952, "Data Analysis": 968, "Others": 1005, "average": 969.3333333333334, "# battles": 1794} | |
{"model name ": "gemini-1.0-pro", "elo overall": 951, "Information seeking": 957, "Creative Writing": 960, "Coding & Debugging": 942, "Reasoning": 955, "Editing": 947, "Math": 969, "Planning": 904, "Brainstorming": 953, "Role playing": 1000, "Advice seeking": 926, "Data Analysis": 964, "Others": 998, "average": 956.25, "# battles": 1644} | |
{"model name ": "Llama-2-7b-chat-hf", "elo overall": 948, "Information seeking": 952, "Creative Writing": 957, "Coding & Debugging": 918, "Reasoning": 962, "Editing": 990, "Math": 962, "Planning": 995, "Brainstorming": 982, "Role playing": 958, "Advice seeking": 998, "Data Analysis": 993, "Others": 1006, "average": 972.75, "# battles": 2036} | |
{"model name ": "gemma-7b-it", "elo overall": 889, "Information seeking": 907, "Creative Writing": 858, "Coding & Debugging": 953, "Reasoning": 881, "Editing": 854, "Math": 929, "Planning": 870, "Brainstorming": 872, "Role playing": 949, "Advice seeking": 899, "Data Analysis": 943, "Others": 1003, "average": 909.8333333333334, "# battles": 2430} | |
{"model name ": "Mistral-7B-Instruct-v0.1", "elo overall": 887, "Information seeking": 862, "Creative Writing": 913, "Coding & Debugging": 930, "Reasoning": 875, "Editing": 948, "Math": 964, "Planning": 862, "Brainstorming": 870, "Role playing": 956, "Advice seeking": 845, "Data Analysis": 964, "Others": 987, "average": 914.6666666666666, "# battles": 2416} | |
{"model name ": "gemma-2b-it", "elo overall": 854, "Information seeking": 876, "Creative Writing": 883, "Coding & Debugging": 838, "Reasoning": 813, "Editing": 878, "Math": 820, "Planning": 904, "Brainstorming": 902, "Role playing": 909, "Advice seeking": 881, "Data Analysis": 861, "Others": 985, "average": 879.1666666666666, "# battles": 2410} | |
{"model name ": "vicuna-13b-v1.5", "elo overall": 854, "Information seeking": 830, "Creative Writing": 837, "Coding & Debugging": 929, "Reasoning": 864, "Editing": 889, "Math": 953, "Planning": 863, "Brainstorming": 874, "Role playing": 894, "Advice seeking": 883, "Data Analysis": 954, "Others": 993, "average": 896.9166666666666, "# battles": 2170} | |
{"model name ": "gpt-3.5-turbo-0125", "elo overall": 789, "Information seeking": 744, "Creative Writing": 788, "Coding & Debugging": 860, "Reasoning": 822, "Editing": 857, "Math": 874, "Planning": 797, "Brainstorming": 815, "Role playing": 775, "Advice seeking": 788, "Data Analysis": 832, "Others": 845, "average": 816.4166666666666, "# battles": 13333} | |