Spaces:
Running
Running
ycy
commited on
Commit
·
5ca8d20
1
Parent(s):
cce655b
about
Browse files- src/about.py +13 -5
src/about.py
CHANGED
@@ -63,11 +63,19 @@ def get_INTRODUCTION_TEXT(model_num: int, LAST_UPDATED: str, paper_link="TODO"):
|
|
63 |
|
64 |
#TODO
|
65 |
INTRODUCE_BENCHMARK = f"""
|
66 |
-
<details>
|
67 |
-
|
68 |
-
|
69 |
-
</
|
70 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
#TODO About
|
73 |
LLM_BENCHMARKS_TEXT = f"""
|
|
|
63 |
|
64 |
#TODO
|
65 |
INTRODUCE_BENCHMARK = f"""
|
66 |
+
<details style="margin: 10px 0; padding: 10px;">
|
67 |
+
<summary style="cursor: pointer; font-size: 18px; color: #2c3e50; font-weight: bold; transition: color 0.3s;">
|
68 |
+
💬 Metric Explanations
|
69 |
+
</summary>
|
70 |
+
<div style="color: #2c3e50; border-left: 4px solid #2980b9; padding-left: 12px; margin-top: 8px;">
|
71 |
+
<p>
|
72 |
+
<strong>CapArena-Auto</strong> is an arena-style automated evaluation benchmark for detailed captioning.
|
73 |
+
It includes <strong>600 evaluation images</strong> and assesses model performance through
|
74 |
+
<em>pairwise battles</em> with three baseline models. The final score is calculated by <strong>GPT4o-as-a-Judge</strong>.
|
75 |
+
</p>
|
76 |
+
</div>
|
77 |
+
</details>
|
78 |
+
"""
|
79 |
|
80 |
#TODO About
|
81 |
LLM_BENCHMARKS_TEXT = f"""
|