Spaces:

T145
/

open-llm-leaderboard-results-to-modelcard

Running

App Files Files Community

T145 commited on Dec 5, 2024

Commit

52a5d9f

1 Parent(s): 27e1d19

Use the official dataset over scraping

Browse files

Files changed (2) hide show

functions.py +4 -6
openllm.py +0 -59

functions.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import gradio as gr
 import pandas as pd
 from huggingface_hub import (
     CommitOperationAdd,
     EvalResult,
@@ -12,13 +13,11 @@ from huggingface_hub import (
 from huggingface_hub.repocard_data import eval_results_to_model_index
 from pytablewriter import MarkdownTableWriter
-from openllm import get_datas, get_json_format_data
 BOT_HF_TOKEN = os.getenv("BOT_HF_TOKEN")
 def search(df, value):
-    result_df = df[df["Model"] == value]
     return result_df.iloc[0].to_dict() if not result_df.empty else None
@@ -157,9 +156,8 @@ def commit(
     message="Adding Evaluation Results",
     oauth_token: gr.OAuthToken | None = None,
 ):  # specify pr number if you want to edit it, don't if you don't want
-    data = get_json_format_data()
-    finished_models = get_datas(data)
-    df = pd.DataFrame(finished_models)
     desc = """
   This is an automated PR created with https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard

 import gradio as gr
 import pandas as pd
+from datasets import load_dataset
 from huggingface_hub import (
     CommitOperationAdd,
     EvalResult,
 from huggingface_hub.repocard_data import eval_results_to_model_index
 from pytablewriter import MarkdownTableWriter
 BOT_HF_TOKEN = os.getenv("BOT_HF_TOKEN")
 def search(df, value):
+    result_df = df[df["fullname"] == value]
     return result_df.iloc[0].to_dict() if not result_df.empty else None
     message="Adding Evaluation Results",
     oauth_token: gr.OAuthToken | None = None,
 ):  # specify pr number if you want to edit it, don't if you don't want
+    data = load_dataset("open-llm-leaderboard/contents", split="train")
+    df = pd.DataFrame(data)
     desc = """
   This is an automated PR created with https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard

openllm.py DELETED Viewed

@@ -1,59 +0,0 @@
-import json
-import requests
-from bs4 import BeautifulSoup
-def get_json_format_data():
-    url = "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/"
-    response = requests.get(url)
-    soup = BeautifulSoup(response.content, "html.parser")
-    script_elements = soup.find_all("script")
-    json_format_data = json.loads(str(script_elements[1])[31:-10])
-    return json_format_data
-def get_datas(data):
-    for component_index in range(
-        0, 50, 1
-    ):  # component_index sometimes changes when they update the space, we can use this "for" loop to avoid changing component index manually
-        try:
-            result_list = []
-            i = 0
-            columns = data["components"][component_index]["props"]["headers"]
-            while True:
-                try:
-                    results = data["components"][component_index]["props"]["value"]["data"][i]
-                    model_fullname_index = (
-                        columns.index("fullname")
-                        if "fullname" in [col.lower() for col in columns]
-                        else -3
-                    )  # Will be used when we extract the model name from the data
-                    try:
-                        results_json = {
-                            "T": results[0],
-                            "Model": results[model_fullname_index],
-                        }  # Set the first 2 manually because normally there is a link in the "Model" column
-                        if (
-                            len(columns) < 20
-                        ):  # If there are less than 20 columns (this number can definetly change), we know that we are trying wrong component index, so breaking loop to try next component index.
-                            break
-                        for col_index, col_name in enumerate(columns[2:], start=2):
-                            results_json[col_name] = results[col_index]
-                    except IndexError:  # Wrong component index, so breaking loop to try next component index. (NOTE: More than one component index can give you some results but we must find the right component index to get all results we want.)
-                        break
-                    result_list.append(results_json)
-                    i += 1
-                except IndexError:  # No rows to extract so return the list (We know it is the right component index because we didn't break out of loop on the other exception.)
-                    return result_list
-        except (KeyError, TypeError):
-            continue
-    return result_list