Spaces:
Sleeping
Sleeping
vtrv.vls
commited on
Commit
Β·
dd9842f
1
Parent(s):
d4e0b1a
Added header and banner
Browse files- about.md +10 -0
- app.py +12 -8
- constants.py +2 -2
- header.md +4 -0
- resources/MERA.png +0 -0
- test.md +0 -1
about.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<br/>
|
2 |
+
|
3 |
+
## We propose a new methodology for the evaluation of fundamental models:
|
4 |
+
|
5 |
+
<br/>
|
6 |
+
21 challenging tasks for fundamental models covering issues related to world knowledge, logic, cause-and-effect relationships, AI ethics, and much more.
|
7 |
+
|
8 |
+
We have developed an open instructional benchmark for evaluating large language models for the Russian language. A unified leaderboard on the website includes fixed, verified expert tasks and standardized configurations of prompts and parameters.
|
9 |
+
|
10 |
+
The project has been supported by the AI Alliance, leading industrial players, and academic partners engaged in language model research.
|
app.py
CHANGED
@@ -8,13 +8,14 @@ from random import choice
|
|
8 |
|
9 |
import queue
|
10 |
|
11 |
-
from constants import css, js_code, js_light
|
12 |
from utils import model_response, clear_chat
|
13 |
from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama
|
14 |
|
15 |
INIT_MODELS = dict()
|
16 |
S3_SESSION = None
|
17 |
-
|
|
|
18 |
CURRENT_MODELS = queue.LifoQueue()
|
19 |
MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
|
20 |
GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
|
@@ -258,10 +259,10 @@ def tab_offline_arena():
|
|
258 |
|
259 |
def build_demo():
|
260 |
with gradio.Blocks(theme=gradio.themes.Soft(text_size=gradio.themes.sizes.text_lg), css=css) as demo: # , css=css, js=js_light
|
261 |
-
|
|
|
262 |
with gradio.Tabs() as tabs:
|
263 |
with gradio.TabItem("πΌ MERA leaderboard", id=0):
|
264 |
-
gradio.Markdown(TEST_MD)
|
265 |
tab_leaderboard()
|
266 |
|
267 |
with gradio.TabItem("π SBS by categories and criteria", id=1):
|
@@ -272,7 +273,7 @@ def build_demo():
|
|
272 |
# _tab_explore()
|
273 |
|
274 |
with gradio.TabItem("πͺ About MERA", id=3):
|
275 |
-
gradio.Markdown(
|
276 |
return demo
|
277 |
|
278 |
if __name__ == "__main__":
|
@@ -283,8 +284,11 @@ if __name__ == "__main__":
|
|
283 |
# data_load(args.result_file)
|
284 |
# TYPES = ["number", "markdown", "number"]
|
285 |
|
286 |
-
with open("
|
287 |
-
|
|
|
|
|
|
|
288 |
|
289 |
try:
|
290 |
session = boto3.session.Session()
|
@@ -298,7 +302,7 @@ if __name__ == "__main__":
|
|
298 |
print('Failed to start s3 session')
|
299 |
|
300 |
app = build_demo()
|
301 |
-
app.launch(share=args.share, height=3000, width="110%") # share=args.share
|
302 |
|
303 |
# demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
|
304 |
# demo.launch()
|
|
|
8 |
|
9 |
import queue
|
10 |
|
11 |
+
from constants import css, js_code, js_light, BANNER
|
12 |
from utils import model_response, clear_chat
|
13 |
from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama
|
14 |
|
15 |
INIT_MODELS = dict()
|
16 |
S3_SESSION = None
|
17 |
+
HEADER_MD = None
|
18 |
+
ABOUT_MD = None
|
19 |
CURRENT_MODELS = queue.LifoQueue()
|
20 |
MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
|
21 |
GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
|
|
|
259 |
|
260 |
def build_demo():
|
261 |
with gradio.Blocks(theme=gradio.themes.Soft(text_size=gradio.themes.sizes.text_lg), css=css) as demo: # , css=css, js=js_light
|
262 |
+
gradio.HTML(BANNER, elem_id="banner")
|
263 |
+
gradio.Markdown(HEADER_MD)
|
264 |
with gradio.Tabs() as tabs:
|
265 |
with gradio.TabItem("πΌ MERA leaderboard", id=0):
|
|
|
266 |
tab_leaderboard()
|
267 |
|
268 |
with gradio.TabItem("π SBS by categories and criteria", id=1):
|
|
|
273 |
# _tab_explore()
|
274 |
|
275 |
with gradio.TabItem("πͺ About MERA", id=3):
|
276 |
+
gradio.Markdown(ABOUT_MD)
|
277 |
return demo
|
278 |
|
279 |
if __name__ == "__main__":
|
|
|
284 |
# data_load(args.result_file)
|
285 |
# TYPES = ["number", "markdown", "number"]
|
286 |
|
287 |
+
with open("header.md", "r") as f:
|
288 |
+
HEADER_MD = f.read()
|
289 |
+
|
290 |
+
with open("about.md", "r") as f:
|
291 |
+
ABOUT_MD = f.read()
|
292 |
|
293 |
try:
|
294 |
session = boto3.session.Session()
|
|
|
302 |
print('Failed to start s3 session')
|
303 |
|
304 |
app = build_demo()
|
305 |
+
app.launch(share=args.share, height=3000, width="110%", allowed_paths=["."]) # share=args.share
|
306 |
|
307 |
# demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
|
308 |
# demo.launch()
|
constants.py
CHANGED
@@ -4,8 +4,8 @@ from collections import OrderedDict
|
|
4 |
# DEFAULT_K = "β"
|
5 |
DEFAULT_K = "1500"
|
6 |
|
7 |
-
|
8 |
-
BANNER = f'<div style="display: flex; justify-content: flex-start;"><img src="{
|
9 |
|
10 |
TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> π¦ AI2 WildBench Leaderboard </b> </body> </html>"
|
11 |
|
|
|
4 |
# DEFAULT_K = "β"
|
5 |
DEFAULT_K = "1500"
|
6 |
|
7 |
+
banner_path = "file/resources/MERA.png" # the same repo here.
|
8 |
+
BANNER = f'<div style="display: flex; justify-content: flex-start;"><img src="{banner_path}" alt="MERA" style="width: 20vw; min-width: 150px; max-width: 400px;"> </div>'
|
9 |
|
10 |
TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> π¦ AI2 WildBench Leaderboard </b> </body> </html>"
|
11 |
|
header.md
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<br/>
|
2 |
+
|
3 |
+
# MERA: Multimodal Evaluation for Russian-language Architectures
|
4 |
+
[GitHub](https://github.com/ai-forever/MERA) [HFDatasets](https://huggingface.co/datasets/ai-forever/MERA) [Paper](https://arxiv.org/abs/2401.04531) [Info](https://mera.a-ai.ru/en) Version: V1
|
resources/MERA.png
ADDED
test.md
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
## TEST
|
|
|
|