File size: 4,459 Bytes
4ff62ee
89ae31d
943f952
6d86af8
 
4ff62ee
4b30bd0
4ff62ee
3f0aca6
4ff62ee
c91d7f4
 
 
 
5cabcff
 
5959f50
f8fc31a
 
 
 
 
 
 
 
 
 
b207139
f8fc31a
 
 
4619468
 
f8fc31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14dbcf4
f8fc31a
 
 
 
 
 
 
 
 
 
 
 
 
 
14dbcf4
f8fc31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5959f50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
---
title: Open CoT Leaderboard
emoji: 🥇
colorFrom: red
colorTo: yellow
sdk: gradio
sdk_version: 4.36.0
app_file: app.py
pinned: true
license: apache-2.0
duplicated_from: logikon/open_cot_leaderboard
fullWidth: true
tags:
  - leaderboard
  - CoT
  - chain-of-thought
short_description: Track, rank and evaluate open LLMs' CoT quality
models:
  - 0-hero/Matter-0.2-7B-DPO
  - 01-ai/Yi-1.5-34B-Chat
  - 01-ai/Yi-1.5-9B
  - 01-ai/Yi-1.5-9B-32K
  - 01-ai/Yi-1.5-9B-Chat
  - 01-ai/Yi-1.5-9B-Chat-16K
  - 01-ai/Yi-34B
  - 01-ai/Yi-34B-Chat
  - 01-ai/Yi-6B
  - BSC-LT/salamandra-7b-instruct
  - ChavyvAkvar/habib-DPO-v3
  - CohereForAI/aya-23-35B
  - CohereForAI/aya-23-8B
  - CohereForAI/aya-expanse-32b
  - CohereForAI/aya-expanse-8b
  - CohereForAI/c4ai-command-r-plus-08-2024
  - CohereForAI/c4ai-command-r-plus-08-2024
  - Deci/DeciLM-7B
  - Deci/DeciLM-7B-instruct
  - HuggingFaceH4/zephyr-7b-beta
  - HuggingFaceTB/SmolLM-1.7B-Instruct
  - HuggingFaceTB/SmolLM-135M-Instruct
  - Intel/neural-chat-7b-v3-1
  - Kukedlc/NeuralLLaMa-3-8b-DT-v0.1
  - Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3
  - Kukedlc/NeuralSynthesis-7b-v0.4-slerp
  - LeroyDyer/Mixtral_AI_Chat_1.0
  - LeroyDyer/Mixtral_AI_CyberTron_DeepMind_III_UFT
  - LeroyDyer/Mixtral_AI_Cyber_Boss
  - Locutusque/Hercules-4.0-Mistral-v0.2-7B
  - Locutusque/OpenCerebrum-1.0-7b-DPO
  - NousResearch/Hermes-2-Pro-Mistral-7B
  - NousResearch/Hermes-3-Llama-3.1-70B
  - NousResearch/Nous-Hermes-llama-2-7b
  - OpenBuddy/openbuddy-llama3-8b-v21.1-8k
  - OpenBuddy/openbuddy-mistral2-7b-v20.2-32k
  - OpenBuddy/openbuddy-yi1.5-9b-v21.1-32k
  - OpenBuddy/openbuddy-zen-3b-v21.1-32k
  - OpenBuddy/openbuddy-zen-3b-v21.2-32k
  - Qwen/Qwen2-72B-Instruct
  - Qwen/Qwen2-7B-Instruct
  - Qwen/Qwen2.5-14B-Instruct
  - Qwen/Qwen2.5-32B-Instruct
  - Qwen/Qwen2.5-3B-Instruct
  - Qwen/Qwen2.5-72B-Instruct
  - Qwen/Qwen2.5-7B-Instruct
  - Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R
  - ai21labs/AI21-Jamba-1.5-Mini
  - allenai/OLMo-7B-0724-Instruct-hf
  - allenai/tulu-2-13b
  - allenai/tulu-2-70b
  - allenai/tulu-2-7b
  - allenai/tulu-2-dpo-13b
  - allenai/tulu-2-dpo-70b
  - allenai/tulu-2-dpo-7b
  - arcee-ai/Llama-3.1-SuperNova-Lite
  - arcee-ai/SuperNova-Medius
  - bunnycore/Mnemosyne-7B
  - cognitivecomputations/Llama-3-8B-Instruct-abliterated-v2
  - cookinai/LlamaReflect-8B-CoT-safetensors
  - cookinai/LlamaReflect-8B-CoT-safetensors
  - databricks/dbrx-instruct
  - databricks/dolly-v2-3b
  - google/gemma-2-27b-it
  - google/gemma-2-2b-it
  - google/gemma-2-9b-it
  - google/gemma-2b
  - google/gemma-2b-it
  - google/gemma-7b
  - google/gemma-7b-it
  - ibm/merlinite-7b
  - ichigoberry/pandafish-2-7b-32k
  - internlm/internlm2-7b
  - internlm/internlm2-chat-20b
  - internlm/internlm2-chat-7b
  - internlm/internlm2-math-20b
  - internlm/internlm2-math-7b
  - meta-llama/Llama-2-13b-chat-hf
  - meta-llama/Llama-2-13b-hf
  - meta-llama/Llama-2-70b-chat-hf
  - meta-llama/Llama-2-70b-hf
  - meta-llama/Llama-2-70b-hf
  - meta-llama/Llama-2-7b-hf
  - meta-llama/Llama-3.2-1B-Instruct
  - meta-llama/Llama-3.2-3B-Instruct
  - meta-llama/Meta-Llama-3-70B
  - meta-llama/Meta-Llama-3-70B-Instruct
  - meta-llama/Meta-Llama-3-8B
  - meta-llama/Meta-Llama-3-8B-Instruct
  - meta-llama/Meta-Llama-3.1-70B-Instruct
  - meta-llama/Meta-Llama-3.1-8B-Instruct
  - microsoft/Orca-2-13b
  - microsoft/Orca-2-7b
  - microsoft/Phi-3-medium-4k-instruct
  - microsoft/Phi-3-mini-4k-instruct
  - microsoft/Phi-3-small-8k-instruct
  - microsoft/Phi-3.5-MoE-instruct
  - microsoft/Phi-3.5-mini-instruct
  - microsoft/phi-2
  - mistralai/Mistral-7B-Instruct-v0.2
  - mistralai/Mistral-7B-Instruct-v0.3
  - mistralai/Mistral-7B-v0.1
  - mistralai/Mistral-Nemo-Instruct-2407
  - mistralai/Mistral-Small-Instruct-2409
  - mistralai/Mixtral-8x22B-Instruct-v0.1
  - mistralai/Mixtral-8x7B-Instruct-v0.1
  - mistralai/Mixtral-8x7B-v0.1
  - mlabonne/AlphaMonarch-7B
  - mlabonne/Daredevil-8B-abliterated
  - nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
  - openbmb/Eurus-70b-sft
  - openbmb/Eurus-7b-kto
  - openchat/openchat-3.5-0106
  - princeton-nlp/gemma-2-9b-it-SimPO
  - teknium/OpenHermes-2.5-Mistral-7B
  - terrycraddock/Reflection-Llama-3.1-8B
  - unsloth/Phi-3.5-mini-instruct
  - upstage/SOLAR-10.7B-Instruct-v1.0
  - upstage/SOLAR-10.7B-v1.0
  - vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B
  - vicgalle/Configurable-Janus-7B
  - vicgalle/Configurable-Yi-1.5-9B-Chat
  - wandb/gemma-2b-zephyr-dpo
  - wenbopan/Faro-Yi-9B-DPO
---