T145 commited on
Commit
52a5d9f
Β·
1 Parent(s): 27e1d19

Use the official dataset over scraping

Browse files
Files changed (2) hide show
  1. functions.py +4 -6
  2. openllm.py +0 -59
functions.py CHANGED
@@ -2,6 +2,7 @@ import os
2
 
3
  import gradio as gr
4
  import pandas as pd
 
5
  from huggingface_hub import (
6
  CommitOperationAdd,
7
  EvalResult,
@@ -12,13 +13,11 @@ from huggingface_hub import (
12
  from huggingface_hub.repocard_data import eval_results_to_model_index
13
  from pytablewriter import MarkdownTableWriter
14
 
15
- from openllm import get_datas, get_json_format_data
16
-
17
  BOT_HF_TOKEN = os.getenv("BOT_HF_TOKEN")
18
 
19
 
20
  def search(df, value):
21
- result_df = df[df["Model"] == value]
22
  return result_df.iloc[0].to_dict() if not result_df.empty else None
23
 
24
 
@@ -157,9 +156,8 @@ def commit(
157
  message="Adding Evaluation Results",
158
  oauth_token: gr.OAuthToken | None = None,
159
  ): # specify pr number if you want to edit it, don't if you don't want
160
- data = get_json_format_data()
161
- finished_models = get_datas(data)
162
- df = pd.DataFrame(finished_models)
163
 
164
  desc = """
165
  This is an automated PR created with https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard
 
2
 
3
  import gradio as gr
4
  import pandas as pd
5
+ from datasets import load_dataset
6
  from huggingface_hub import (
7
  CommitOperationAdd,
8
  EvalResult,
 
13
  from huggingface_hub.repocard_data import eval_results_to_model_index
14
  from pytablewriter import MarkdownTableWriter
15
 
 
 
16
  BOT_HF_TOKEN = os.getenv("BOT_HF_TOKEN")
17
 
18
 
19
  def search(df, value):
20
+ result_df = df[df["fullname"] == value]
21
  return result_df.iloc[0].to_dict() if not result_df.empty else None
22
 
23
 
 
156
  message="Adding Evaluation Results",
157
  oauth_token: gr.OAuthToken | None = None,
158
  ): # specify pr number if you want to edit it, don't if you don't want
159
+ data = load_dataset("open-llm-leaderboard/contents", split="train")
160
+ df = pd.DataFrame(data)
 
161
 
162
  desc = """
163
  This is an automated PR created with https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard
openllm.py DELETED
@@ -1,59 +0,0 @@
1
- import json
2
-
3
- import requests
4
- from bs4 import BeautifulSoup
5
-
6
-
7
- def get_json_format_data():
8
- url = "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/"
9
- response = requests.get(url)
10
- soup = BeautifulSoup(response.content, "html.parser")
11
-
12
- script_elements = soup.find_all("script")
13
- json_format_data = json.loads(str(script_elements[1])[31:-10])
14
- return json_format_data
15
-
16
-
17
- def get_datas(data):
18
- for component_index in range(
19
- 0, 50, 1
20
- ): # component_index sometimes changes when they update the space, we can use this "for" loop to avoid changing component index manually
21
- try:
22
- result_list = []
23
- i = 0
24
- columns = data["components"][component_index]["props"]["headers"]
25
-
26
- while True:
27
- try:
28
- results = data["components"][component_index]["props"]["value"]["data"][i]
29
-
30
- model_fullname_index = (
31
- columns.index("fullname")
32
- if "fullname" in [col.lower() for col in columns]
33
- else -3
34
- ) # Will be used when we extract the model name from the data
35
-
36
- try:
37
- results_json = {
38
- "T": results[0],
39
- "Model": results[model_fullname_index],
40
- } # Set the first 2 manually because normally there is a link in the "Model" column
41
-
42
- if (
43
- len(columns) < 20
44
- ): # If there are less than 20 columns (this number can definetly change), we know that we are trying wrong component index, so breaking loop to try next component index.
45
- break
46
-
47
- for col_index, col_name in enumerate(columns[2:], start=2):
48
- results_json[col_name] = results[col_index]
49
-
50
- except IndexError: # Wrong component index, so breaking loop to try next component index. (NOTE: More than one component index can give you some results but we must find the right component index to get all results we want.)
51
- break
52
- result_list.append(results_json)
53
- i += 1
54
- except IndexError: # No rows to extract so return the list (We know it is the right component index because we didn't break out of loop on the other exception.)
55
- return result_list
56
- except (KeyError, TypeError):
57
- continue
58
-
59
- return result_list