Spaces:
Running
Running
External check_significance() at czechllm.fit.vutbr.cz
Browse files
server.py
CHANGED
@@ -4,6 +4,7 @@ import json
|
|
4 |
import os
|
5 |
import hashlib
|
6 |
import time
|
|
|
7 |
from collections import namedtuple
|
8 |
from xml.sax.saxutils import escape as xmlEscape, quoteattr as xmlQuoteAttr
|
9 |
|
@@ -11,7 +12,7 @@ import gradio as gr
|
|
11 |
import pandas as pd
|
12 |
from huggingface_hub import HfApi, snapshot_download
|
13 |
|
14 |
-
from compare_significance import
|
15 |
|
16 |
VISIBLE_METRICS = SUPPORTED_METRICS + ["macro_f1"]
|
17 |
|
@@ -42,6 +43,49 @@ MARKDOWN_SPECIAL_CHARACTERS = {
|
|
42 |
"|": "|"
|
43 |
}
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
class LeaderboardServer:
|
46 |
def __init__(self):
|
47 |
self.server_address = REPO
|
@@ -213,10 +257,14 @@ class LeaderboardServer:
|
|
213 |
new_tournament[new_submission_id][new_submission_id] = {
|
214 |
task: False for task in self.tasks_metadata.keys()
|
215 |
}
|
216 |
-
|
217 |
for competitor_id in self.submission_ids:
|
218 |
-
res =
|
219 |
-
res_inverse =
|
|
|
|
|
|
|
|
|
220 |
new_tournament[new_submission_id][competitor_id] = {
|
221 |
task: data["significant"] for task, data in res.items()
|
222 |
}
|
|
|
4 |
import os
|
5 |
import hashlib
|
6 |
import time
|
7 |
+
import requests
|
8 |
from collections import namedtuple
|
9 |
from xml.sax.saxutils import escape as xmlEscape, quoteattr as xmlQuoteAttr
|
10 |
|
|
|
12 |
import pandas as pd
|
13 |
from huggingface_hub import HfApi, snapshot_download
|
14 |
|
15 |
+
from compare_significance import SUPPORTED_METRICS
|
16 |
|
17 |
VISIBLE_METRICS = SUPPORTED_METRICS + ["macro_f1"]
|
18 |
|
|
|
43 |
"|": "|"
|
44 |
}
|
45 |
|
46 |
+
def check_significance_send_task(model_a_path, model_b_path):
|
47 |
+
url = 'https://czechllm.fit.vutbr.cz/benczechmark-leaderboard/compare_significance/'
|
48 |
+
|
49 |
+
# prepare and send request
|
50 |
+
with (
|
51 |
+
open(model_a_path, 'rb') as model_a_fp,
|
52 |
+
open(model_b_path, 'rb') as model_b_fp,
|
53 |
+
):
|
54 |
+
files = {
|
55 |
+
'model_a': model_a_fp,
|
56 |
+
'model_b': model_b_fp,
|
57 |
+
}
|
58 |
+
response = requests.post(url, files=files)
|
59 |
+
|
60 |
+
# check response
|
61 |
+
if response.status_code == 202:
|
62 |
+
result_url = response.url
|
63 |
+
#task_id = response.json()['task_id']
|
64 |
+
elif response.status_code == 429:
|
65 |
+
raise RuntimeError('Server is too busy. Please try again later.') # TODO: try-except do raise gr.error
|
66 |
+
else:
|
67 |
+
raise RuntimeError(f'Failed to submit task. Status code: {response.status_code}') # TODO: try-except do raise gr.error
|
68 |
+
|
69 |
+
return result_url
|
70 |
+
|
71 |
+
def check_significance_wait_for_result(result_url):
|
72 |
+
while True:
|
73 |
+
response = requests.get(result_url)
|
74 |
+
if response.status_code == 200:
|
75 |
+
result = response.json()
|
76 |
+
break
|
77 |
+
elif response.status_code == 202:
|
78 |
+
time.sleep(5)
|
79 |
+
else:
|
80 |
+
raise RuntimeError(f'Failed to get result. Status code: {response.status_code}') # TODO: try-except do raise gr.error
|
81 |
+
|
82 |
+
return result['result']
|
83 |
+
|
84 |
+
def check_significance(model_a_path, model_b_path):
|
85 |
+
result_url = check_significance_send_task(model_a_path, model_b_path)
|
86 |
+
result = check_significance_wait_for_result(result_url)
|
87 |
+
return result
|
88 |
+
|
89 |
class LeaderboardServer:
|
90 |
def __init__(self):
|
91 |
self.server_address = REPO
|
|
|
257 |
new_tournament[new_submission_id][new_submission_id] = {
|
258 |
task: False for task in self.tasks_metadata.keys()
|
259 |
}
|
260 |
+
|
261 |
for competitor_id in self.submission_ids:
|
262 |
+
res = check_significance_send_task(new_model_file, self.submission_id_to_file[competitor_id])
|
263 |
+
res_inverse = check_significance_send_task(self.submission_id_to_file[competitor_id], new_model_file)
|
264 |
+
|
265 |
+
res = check_significance_wait_for_result(res)
|
266 |
+
res_inverse = check_significance_wait_for_result(res_inverse)
|
267 |
+
|
268 |
new_tournament[new_submission_id][competitor_id] = {
|
269 |
task: data["significant"] for task, data in res.items()
|
270 |
}
|