Update space
Browse files
app.py
CHANGED
@@ -128,6 +128,29 @@ def overall_leaderboard(dataframe):
|
|
128 |
)
|
129 |
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
demo = gr.Blocks(css=custom_css)
|
133 |
with demo:
|
@@ -139,7 +162,7 @@ with demo:
|
|
139 |
INTRODUCTION_TEXT_FONT_SIZE = 16
|
140 |
INTRODUCTION_TEXT = (
|
141 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
142 |
-
'<strong>Decentralized Arena</strong> automates, scales, and accelerates
|
143 |
'for large language model (LLM) evaluation across diverse, fine-grained dimensions, '
|
144 |
'such as mathematics (algebra, geometry, probability), logical reasoning, social reasoning, science (chemistry, physics, biology), or any user-defined dimensions. '
|
145 |
'The evaluation is decentralized and democratic, with all participating LLMs assessing each other to ensure unbiased and fair results. '
|
@@ -175,7 +198,7 @@ with demo:
|
|
175 |
|
176 |
TEXT = (
|
177 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
178 |
-
'Total #models: 57 (Last updated: 2024-10-21)'
|
179 |
'</p>'
|
180 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
181 |
'This page prvovides a comprehensive overview of model ranks across various dimensions, based on their averaged ranks. '
|
|
|
128 |
)
|
129 |
|
130 |
|
131 |
+
# Your leaderboard name
|
132 |
+
TITLE = """<h1 align="center" id="space-title">Decentralized Arena Leaderboard</h1>"""
|
133 |
+
|
134 |
+
SUB_TITLE = """<h2 align="center" id="space-subtitle">Automated, Robust, and Transparent LLM Evaluation for Numerous Dimensions</h2>"""
|
135 |
+
|
136 |
+
EXTERNAL_LINKS = """
|
137 |
+
<h2 align="center" id="space-links">
|
138 |
+
<a href="https://de-arena.maitrix.org/" target="_blank">Blog</a> |
|
139 |
+
<a href="https://github.com/maitrix-org/de-arena" target="_blank">GitHub</a> |
|
140 |
+
<a href="https://de-arena.maitrix.org/images/Heading.mp4" target="">Video</a> |
|
141 |
+
<a href="https://maitrix.org/" target="_blank">@Maitrix.org</a> |
|
142 |
+
<a href="https://www.llm360.ai/" target="_blank">@LLM360</a>
|
143 |
+
</h2>
|
144 |
+
"""
|
145 |
+
|
146 |
+
# What does your leaderboard evaluate?
|
147 |
+
INTRODUCTION_TEXT = """
|
148 |
+
**Decentralized Arena** automates and scales "Chatbot Arena" for LLM evaluation across various fine-grained dimensions
|
149 |
+
(e.g., math – algebra, geometry, probability; logical reasoning, social reasoning, biology, chemistry, …).
|
150 |
+
The evaluation is decentralized and democratic, with all LLMs participating in evaluating others.
|
151 |
+
It achieves a 95\% correlation with Chatbot Arena's overall rankings, while being fully transparent and reproducible.
|
152 |
+
"""
|
153 |
+
|
154 |
|
155 |
demo = gr.Blocks(css=custom_css)
|
156 |
with demo:
|
|
|
162 |
INTRODUCTION_TEXT_FONT_SIZE = 16
|
163 |
INTRODUCTION_TEXT = (
|
164 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
165 |
+
'<strong>Decentralized Arena</strong> automates, scales, and accelerates <a href="https://lmarena.ai/">Chatbot Arena</a> '
|
166 |
'for large language model (LLM) evaluation across diverse, fine-grained dimensions, '
|
167 |
'such as mathematics (algebra, geometry, probability), logical reasoning, social reasoning, science (chemistry, physics, biology), or any user-defined dimensions. '
|
168 |
'The evaluation is decentralized and democratic, with all participating LLMs assessing each other to ensure unbiased and fair results. '
|
|
|
198 |
|
199 |
TEXT = (
|
200 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
201 |
+
'<b>Total #models: 57 (Last updated: 2024-10-21)</b>'
|
202 |
'</p>'
|
203 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
204 |
'This page prvovides a comprehensive overview of model ranks across various dimensions, based on their averaged ranks. '
|