Spaces:

stacklok
/

secure_code_leaderboard_archived

Sleeping

App Files Files Community

secure_code_leaderboard_archived / src /populate.py

lukehinds

Populate results from dataset

80fe2b4 about 1 month ago

raw

history blame

6.32 kB

	import json
	import os
	import numpy as np
	import pandas as pd
	import logging
	from typing import List, Dict, Any

	from src.display.formatting import make_clickable_model
	from src.leaderboard.read_evals import get_raw_eval_results

	logger = logging.getLogger(__name__)

	from huggingface_hub import HfApi
	from src.config import RESULTS_REPO, QUEUE_REPO

	def get_leaderboard_df(cols: List[str], benchmark_cols: List[str]) -> pd.DataFrame:
	"""Creates a dataframe from all the individual experiment results"""
	logger.info(f"Fetching evaluation results from {RESULTS_REPO}")

	try:
	# Load the dataset directly
	from datasets import load_dataset
	dataset = load_dataset(RESULTS_REPO, split="train")
	logger.debug(f"Loaded dataset with {len(dataset)} rows")
	logger.debug(f"Dataset features: {dataset.features}")

	# Convert dataset to list of dicts
	all_data_json = [
	{
	"model_id": row["model_id"],
	"revision": row["revision"],
	"precision": row["precision"],
	"security_score": row["security_score"],
	"safetensors_compliant": row["safetensors_compliant"]
	}
	for row in dataset
	]

	logger.debug(f"Converted dataset to: {json.dumps(all_data_json, indent=2)}")

	except Exception as e:
	logger.error(f"Error loading dataset from {RESULTS_REPO}: {str(e)}", exc_info=True)
	return pd.DataFrame(columns=cols) # Return empty DataFrame on error

	logger.info(f"Fetched {len(all_data_json)} results")
	logger.debug(f"Data before DataFrame creation: {all_data_json}")

	if not all_data_json:
	logger.warning("No valid data found!")
	return pd.DataFrame(columns=cols)

	df = pd.DataFrame(all_data_json)
	logger.info(f"Created DataFrame with columns: {df.columns.tolist()}")
	logger.debug(f"DataFrame before filtering:\n{df}")

	# Ensure all required columns exist
	for col in cols:
	if col not in df.columns:
	logger.info(f"Adding missing column: {col}")
	df[col] = None

	# Map dataset columns to display columns
	column_mapping = {
	"model_id": "Model",
	"security_score": "Security Score ⬆️",
	"safetensors_compliant": "Safetensors",
	"precision": "Precision"
	}

	for src, dst in column_mapping.items():
	if src in df.columns:
	df[dst] = df[src]
	logger.debug(f"Mapped column {src} to {dst}")

	# Sort by Security Score if available
	if "Security Score ⬆️" in df.columns and not df["Security Score ⬆️"].isnull().all():
	df = df.sort_values(by="Security Score ⬆️", ascending=False)
	logger.info("DataFrame sorted by Security Score")
	else:
	logger.warning("Security Score column not found or all values are null, skipping sorting")

	# Make model names clickable
	if "Model" in df.columns:
	df["Model"] = df["Model"].apply(make_clickable_model)

	# Select only the columns we want to display
	df = df[cols]

	# Round numeric columns
	numeric_cols = df.select_dtypes(include=[np.number]).columns
	for col in numeric_cols:
	df[col] = pd.to_numeric(df[col], errors='coerce')
	df[numeric_cols] = df[numeric_cols].round(decimals=2)

	logger.debug(f"DataFrame after column selection and rounding:\n{df}")
	logger.info(f"Final DataFrame has {len(df)} rows")
	return df


	def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
	"""Creates the different dataframes for the evaluation queues requests"""
	logger.info(f"Looking for eval requests in {QUEUE_REPO}")
	all_evals = []

	api = HfApi()

	try:
	# List all files in the repository
	files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")

	# Filter for JSON files
	json_files = [f for f in files if f.endswith('.json')]

	for file in json_files:
	try:
	# Download and read each JSON file
	content = api.hf_hub_download(repo_id=QUEUE_REPO, filename=file, repo_type="dataset")
	logger.info(f"Reading JSON file: {file}")
	with open(content, 'r') as fp:
	data = json.load(fp)

	# Check if data is a list (multiple requests in one file)
	if isinstance(data, list):
	for item in data:
	formatted_data = format_eval_data(item)
	all_evals.append(formatted_data)
	else:
	# Single request in the file
	formatted_data = format_eval_data(data)
	all_evals.append(formatted_data)
	except Exception as e:
	logger.error(f"Error processing file {file}: {str(e)}", exc_info=True)

	except Exception as e:
	logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)

	logger.info(f"Found {len(all_evals)} total eval requests")
	pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
	running_list = [e for e in all_evals if e["status"] == "RUNNING"]
	finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]

	logger.info(f"Pending: {len(pending_list)}, Running: {len(running_list)}, Finished: {len(finished_list)}")

	df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
	df_running = pd.DataFrame.from_records(running_list, columns=cols)
	df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
	return df_finished[cols], df_running[cols], df_pending[cols]

	def format_eval_data(data: dict) -> dict:
	"""Format the evaluation data into the required structure"""
	model_name = data.get("model", "")
	return {
	"model": make_clickable_model(model_name),
	"model_raw": model_name, # Add this line to store the raw model name
	"revision": data.get("revision", "main"),
	"private": data.get("private", False),
	"precision": data.get("precision", ""),
	"weight_type": data.get("weight_type", ""),
	"model_type": data.get("model_type", ""),
	"status": data.get("status", "")
	}