Point to the QUEUE_REPO, not eval
Browse files- app.py +3 -3
- src/populate.py +24 -11
app.py
CHANGED
@@ -105,7 +105,7 @@ LEADERBOARD_DF = get_leaderboard_df(COLS, BENCHMARK_COLS)
|
|
105 |
finished_eval_queue_df,
|
106 |
running_eval_queue_df,
|
107 |
pending_eval_queue_df,
|
108 |
-
) = get_evaluation_queue_df(
|
109 |
|
110 |
# Function to update the leaderboard
|
111 |
def update_leaderboard():
|
@@ -367,7 +367,7 @@ with demo:
|
|
367 |
logger.info(f"Added request for {model} to {QUEUE_REPO}")
|
368 |
|
369 |
# Get updated pending evaluations
|
370 |
-
_, _, pending_eval_queue_df = get_evaluation_queue_df(
|
371 |
|
372 |
# Start processing queue in background
|
373 |
scheduler.add_job(process_evaluation_queue, id='process_queue_job', replace_existing=True)
|
@@ -402,7 +402,7 @@ with demo:
|
|
402 |
|
403 |
# Update evaluation tables periodically
|
404 |
def update_evaluation_tables():
|
405 |
-
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(
|
406 |
return finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
407 |
|
408 |
# Setup schedulers
|
|
|
105 |
finished_eval_queue_df,
|
106 |
running_eval_queue_df,
|
107 |
pending_eval_queue_df,
|
108 |
+
) = get_evaluation_queue_df(EVAL_COLS)
|
109 |
|
110 |
# Function to update the leaderboard
|
111 |
def update_leaderboard():
|
|
|
367 |
logger.info(f"Added request for {model} to {QUEUE_REPO}")
|
368 |
|
369 |
# Get updated pending evaluations
|
370 |
+
_, _, pending_eval_queue_df = get_evaluation_queue_df(EVAL_COLS)
|
371 |
|
372 |
# Start processing queue in background
|
373 |
scheduler.add_job(process_evaluation_queue, id='process_queue_job', replace_existing=True)
|
|
|
402 |
|
403 |
# Update evaluation tables periodically
|
404 |
def update_evaluation_tables():
|
405 |
+
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_COLS)
|
406 |
return finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
407 |
|
408 |
# Setup schedulers
|
src/populate.py
CHANGED
@@ -10,7 +10,7 @@ from src.leaderboard.read_evals import get_raw_eval_results
|
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
12 |
from huggingface_hub import HfApi
|
13 |
-
from src.config import RESULTS_REPO
|
14 |
|
15 |
def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
|
16 |
"""Creates a dataframe from all the individual experiment results"""
|
@@ -86,18 +86,26 @@ def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
|
|
86 |
return df
|
87 |
|
88 |
|
89 |
-
def get_evaluation_queue_df(
|
90 |
-
"""Creates the different dataframes for the evaluation queues
|
91 |
-
|
92 |
all_evals = []
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
data = json.load(fp)
|
102 |
|
103 |
# Check if data is a list (multiple requests in one file)
|
@@ -109,6 +117,11 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
109 |
# Single request in the file
|
110 |
formatted_data = format_eval_data(data)
|
111 |
all_evals.append(formatted_data)
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
print(f"Found {len(all_evals)} total eval requests")
|
114 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
|
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
12 |
from huggingface_hub import HfApi
|
13 |
+
from src.config import RESULTS_REPO, QUEUE_REPO
|
14 |
|
15 |
def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
|
16 |
"""Creates a dataframe from all the individual experiment results"""
|
|
|
86 |
return df
|
87 |
|
88 |
|
89 |
+
def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
|
90 |
+
"""Creates the different dataframes for the evaluation queues requests"""
|
91 |
+
logger.info(f"Looking for eval requests in {QUEUE_REPO}")
|
92 |
all_evals = []
|
93 |
|
94 |
+
api = HfApi()
|
95 |
+
|
96 |
+
try:
|
97 |
+
# List all files in the repository
|
98 |
+
files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")
|
99 |
+
|
100 |
+
# Filter for JSON files
|
101 |
+
json_files = [f for f in files if f.endswith('.json')]
|
102 |
+
|
103 |
+
for file in json_files:
|
104 |
+
try:
|
105 |
+
# Download and read each JSON file
|
106 |
+
content = api.hf_hub_download(repo_id=QUEUE_REPO, filename=file, repo_type="dataset")
|
107 |
+
logger.info(f"Reading JSON file: {file}")
|
108 |
+
with open(content, 'r') as fp:
|
109 |
data = json.load(fp)
|
110 |
|
111 |
# Check if data is a list (multiple requests in one file)
|
|
|
117 |
# Single request in the file
|
118 |
formatted_data = format_eval_data(data)
|
119 |
all_evals.append(formatted_data)
|
120 |
+
except Exception as e:
|
121 |
+
logger.error(f"Error processing file {file}: {str(e)}", exc_info=True)
|
122 |
+
|
123 |
+
except Exception as e:
|
124 |
+
logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)
|
125 |
|
126 |
print(f"Found {len(all_evals)} total eval requests")
|
127 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|