Use the official dataset over scraping
Browse files- functions.py +4 -6
- openllm.py +0 -59
functions.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
|
|
5 |
from huggingface_hub import (
|
6 |
CommitOperationAdd,
|
7 |
EvalResult,
|
@@ -12,13 +13,11 @@ from huggingface_hub import (
|
|
12 |
from huggingface_hub.repocard_data import eval_results_to_model_index
|
13 |
from pytablewriter import MarkdownTableWriter
|
14 |
|
15 |
-
from openllm import get_datas, get_json_format_data
|
16 |
-
|
17 |
BOT_HF_TOKEN = os.getenv("BOT_HF_TOKEN")
|
18 |
|
19 |
|
20 |
def search(df, value):
|
21 |
-
result_df = df[df["
|
22 |
return result_df.iloc[0].to_dict() if not result_df.empty else None
|
23 |
|
24 |
|
@@ -157,9 +156,8 @@ def commit(
|
|
157 |
message="Adding Evaluation Results",
|
158 |
oauth_token: gr.OAuthToken | None = None,
|
159 |
): # specify pr number if you want to edit it, don't if you don't want
|
160 |
-
data =
|
161 |
-
|
162 |
-
df = pd.DataFrame(finished_models)
|
163 |
|
164 |
desc = """
|
165 |
This is an automated PR created with https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
5 |
+
from datasets import load_dataset
|
6 |
from huggingface_hub import (
|
7 |
CommitOperationAdd,
|
8 |
EvalResult,
|
|
|
13 |
from huggingface_hub.repocard_data import eval_results_to_model_index
|
14 |
from pytablewriter import MarkdownTableWriter
|
15 |
|
|
|
|
|
16 |
BOT_HF_TOKEN = os.getenv("BOT_HF_TOKEN")
|
17 |
|
18 |
|
19 |
def search(df, value):
|
20 |
+
result_df = df[df["fullname"] == value]
|
21 |
return result_df.iloc[0].to_dict() if not result_df.empty else None
|
22 |
|
23 |
|
|
|
156 |
message="Adding Evaluation Results",
|
157 |
oauth_token: gr.OAuthToken | None = None,
|
158 |
): # specify pr number if you want to edit it, don't if you don't want
|
159 |
+
data = load_dataset("open-llm-leaderboard/contents", split="train")
|
160 |
+
df = pd.DataFrame(data)
|
|
|
161 |
|
162 |
desc = """
|
163 |
This is an automated PR created with https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard
|
openllm.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
|
3 |
-
import requests
|
4 |
-
from bs4 import BeautifulSoup
|
5 |
-
|
6 |
-
|
7 |
-
def get_json_format_data():
|
8 |
-
url = "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/"
|
9 |
-
response = requests.get(url)
|
10 |
-
soup = BeautifulSoup(response.content, "html.parser")
|
11 |
-
|
12 |
-
script_elements = soup.find_all("script")
|
13 |
-
json_format_data = json.loads(str(script_elements[1])[31:-10])
|
14 |
-
return json_format_data
|
15 |
-
|
16 |
-
|
17 |
-
def get_datas(data):
|
18 |
-
for component_index in range(
|
19 |
-
0, 50, 1
|
20 |
-
): # component_index sometimes changes when they update the space, we can use this "for" loop to avoid changing component index manually
|
21 |
-
try:
|
22 |
-
result_list = []
|
23 |
-
i = 0
|
24 |
-
columns = data["components"][component_index]["props"]["headers"]
|
25 |
-
|
26 |
-
while True:
|
27 |
-
try:
|
28 |
-
results = data["components"][component_index]["props"]["value"]["data"][i]
|
29 |
-
|
30 |
-
model_fullname_index = (
|
31 |
-
columns.index("fullname")
|
32 |
-
if "fullname" in [col.lower() for col in columns]
|
33 |
-
else -3
|
34 |
-
) # Will be used when we extract the model name from the data
|
35 |
-
|
36 |
-
try:
|
37 |
-
results_json = {
|
38 |
-
"T": results[0],
|
39 |
-
"Model": results[model_fullname_index],
|
40 |
-
} # Set the first 2 manually because normally there is a link in the "Model" column
|
41 |
-
|
42 |
-
if (
|
43 |
-
len(columns) < 20
|
44 |
-
): # If there are less than 20 columns (this number can definetly change), we know that we are trying wrong component index, so breaking loop to try next component index.
|
45 |
-
break
|
46 |
-
|
47 |
-
for col_index, col_name in enumerate(columns[2:], start=2):
|
48 |
-
results_json[col_name] = results[col_index]
|
49 |
-
|
50 |
-
except IndexError: # Wrong component index, so breaking loop to try next component index. (NOTE: More than one component index can give you some results but we must find the right component index to get all results we want.)
|
51 |
-
break
|
52 |
-
result_list.append(results_json)
|
53 |
-
i += 1
|
54 |
-
except IndexError: # No rows to extract so return the list (We know it is the right component index because we didn't break out of loop on the other exception.)
|
55 |
-
return result_list
|
56 |
-
except (KeyError, TypeError):
|
57 |
-
continue
|
58 |
-
|
59 |
-
return result_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|