Spaces:

stacklok
/

secure_code_leaderboard_archived

Running

App Files Files Community

lukehinds commited on Jan 28

Commit

bd09cee

1 Parent(s): e20e214

Clean up

Browse files

Files changed (8) hide show

app.py +15 -24
debug.py +1 -1
hub/version.txt +1 -1
init_huggingface_dataset.py +0 -85
logs/evaluation.log +0 -0
logs/security_eval.log +0 -0
src/populate.py +4 -6
src/submission/submit.py +1 -4

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download, HfApi
 import pandas as pd
 import os
 import logging
-import json
 from datetime import datetime
 from datasets import Dataset
@@ -25,9 +24,7 @@ from src.display.utils import (
     COLS,
     EVAL_COLS,
     EVAL_TYPES,
-    AutoEvalColumn,
     ModelType,
-    fields,
     WeightType,
     Precision
 )
@@ -42,7 +39,7 @@ from src.envs import (
     TOKEN
 )
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval, initialize_queue_repo, initialize_results_repo
 # Setup logging
@@ -65,13 +62,13 @@ def initialize_space():
     logger.info("Initializing space")
     try:
         logger.info(f"Downloading queue data from {QUEUE_REPO}")
         # Initialize queue repository if needed
         if not initialize_queue_repo():
             logger.error("Failed to initialize queue repository")
             restart_space()
             return
         snapshot_download(
             repo_id=QUEUE_REPO,
             local_dir=EVAL_REQUESTS_PATH,
@@ -86,13 +83,13 @@ def initialize_space():
     try:
         logger.info(f"Downloading results data from {RESULTS_REPO}")
         # Initialize results repository if needed
         if not initialize_results_repo():
             logger.error("Failed to initialize results repository")
             restart_space()
             return
         snapshot_download(
             repo_id=RESULTS_REPO,
             local_dir=EVAL_RESULTS_PATH,
@@ -117,12 +114,6 @@ LEADERBOARD_DF = get_leaderboard_df(COLS, BENCHMARK_COLS)
     pending_eval_queue_df,
 ) = get_evaluation_queue_df(EVAL_COLS)
-# Function to update the leaderboard
-def update_leaderboard():
-    global LEADERBOARD_DF
-    LEADERBOARD_DF = get_leaderboard_df(COLS, BENCHMARK_COLS)
-    return LEADERBOARD_DF
 def process_evaluation_queue():
     """Process pending evaluation requests."""
     logger.info("Processing evaluation queue")
@@ -172,40 +163,40 @@ def update_request_status(model_name, status):
         # Load the current dataset
         from datasets import load_dataset
         dataset = load_dataset(QUEUE_REPO, split="train")
         # Convert to dictionary for easier manipulation
         data_dict = dataset.to_dict()
         # Find the most recent request for this model
         indices = [i for i, m in enumerate(data_dict["model_raw"]) if m == model_name]
         if not indices:
             logger.error(f"No request found for model {model_name}")
             return
         # Get the most recent request (last index)
         latest_index = indices[-1]
         # Update the status for the found request
         data_dict["status"][latest_index] = status
         # Create new dataset with updated status
         updated_dataset = Dataset.from_dict(data_dict)
         # Push the updated dataset back to the hub with a descriptive commit message
         updated_dataset.push_to_hub(
             QUEUE_REPO,
             split="train",
             commit_message=f"Update status to {status} for {model_name}"
         )
         logger.info(f"Updated status for {model_name} to {status}")
     except Exception as e:
         logger.error(f"Failed to update status for {model_name}: {str(e)}", exc_info=True)
 # Remove the extract_model_name function as it's no longer needed
-from huggingface_hub import HfApi
 def save_results_to_repo(results, repo):
     """Save evaluation results to the specified repository."""

 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
 import pandas as pd
 import os
 import logging
 from datetime import datetime
 from datasets import Dataset
     COLS,
     EVAL_COLS,
     EVAL_TYPES,
     ModelType,
     WeightType,
     Precision
 )
     TOKEN
 )
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import initialize_queue_repo, initialize_results_repo
 # Setup logging
     logger.info("Initializing space")
     try:
         logger.info(f"Downloading queue data from {QUEUE_REPO}")
         # Initialize queue repository if needed
         if not initialize_queue_repo():
             logger.error("Failed to initialize queue repository")
             restart_space()
             return
         snapshot_download(
             repo_id=QUEUE_REPO,
             local_dir=EVAL_REQUESTS_PATH,
     try:
         logger.info(f"Downloading results data from {RESULTS_REPO}")
         # Initialize results repository if needed
         if not initialize_results_repo():
             logger.error("Failed to initialize results repository")
             restart_space()
             return
         snapshot_download(
             repo_id=RESULTS_REPO,
             local_dir=EVAL_RESULTS_PATH,
     pending_eval_queue_df,
 ) = get_evaluation_queue_df(EVAL_COLS)
 def process_evaluation_queue():
     """Process pending evaluation requests."""
     logger.info("Processing evaluation queue")
         # Load the current dataset
         from datasets import load_dataset
         dataset = load_dataset(QUEUE_REPO, split="train")
         # Convert to dictionary for easier manipulation
         data_dict = dataset.to_dict()
         # Find the most recent request for this model
         indices = [i for i, m in enumerate(data_dict["model_raw"]) if m == model_name]
         if not indices:
             logger.error(f"No request found for model {model_name}")
             return
         # Get the most recent request (last index)
         latest_index = indices[-1]
         # Update the status for the found request
         data_dict["status"][latest_index] = status
         # Create new dataset with updated status
         updated_dataset = Dataset.from_dict(data_dict)
         # Push the updated dataset back to the hub with a descriptive commit message
         updated_dataset.push_to_hub(
             QUEUE_REPO,
             split="train",
             commit_message=f"Update status to {status} for {model_name}"
         )
         logger.info(f"Updated status for {model_name} to {status}")
     except Exception as e:
         logger.error(f"Failed to update status for {model_name}: {str(e)}", exc_info=True)
 # Remove the extract_model_name function as it's no longer needed
 def save_results_to_repo(results, repo):
     """Save evaluation results to the specified repository."""

debug.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pandas as pd
-from src.display.utils import COLS, BENCHMARK_COLS
 from src.about import Tasks
 from src.leaderboard.read_evals import get_raw_eval_results

 import pandas as pd
+from src.display.utils import BENCHMARK_COLS
 from src.about import Tasks
 from src.leaderboard.read_evals import get_raw_eval_results

hub/version.txt CHANGED Viewed

	@@ -1 +1 @@
1	- 1


1	+ 0.1

init_huggingface_dataset.py DELETED Viewed

@@ -1,85 +0,0 @@
-from datasets import Dataset
-from huggingface_hub import HfApi, login
-import os
-# Initialize the dataset with a sample entry
-initial_data = {
-    "model": ["example/model"],
-    "model_raw": ["example/model"],
-    "base_model": ["gpt2"],
-    "revision": ["main"],
-    "precision": ["fp16"],
-    "weight_type": ["Safetensors"],
-    "model_type": ["Pretrained"],
-    "status": ["PENDING"],
-    "timestamp": ["2025-01-26T15:15:09.693973"],
-    "security_score": [0.5],
-    "safetensors_compliant": [True],
-    "hub_license": ["MIT"],
-    "hub_likes": [0],
-    "params_billion": [0.5],
-    "available_on_hub": [True],
-    "model_sha": ["abc123"]
-}
-# Create a Dataset object
-dataset = Dataset.from_dict(initial_data)
-# Login to Hugging Face (you'll need to set the HUGGINGFACE_TOKEN environment variable)
-login()
-# Push the dataset to the Hugging Face Hub
-dataset.push_to_hub("stacklok/results")
-# Create a dataset card
-dataset_card = """
----
-language:
-- en
-license:
-- mit
----
-# Dataset Card for stacklok/results
-This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics.
-## Dataset Structure
-The dataset contains the following fields:
-- `model`: The identifier of the model
-- `model_raw`: The raw model identifier
-- `base_model`: The base model if applicable
-- `revision`: The revision or version of the model
-- `precision`: The precision used for the model (e.g., fp16, fp32)
-- `weight_type`: Type of weights used
-- `model_type`: Type of the model
-- `status`: Current status of the evaluation
-- `timestamp`: When the evaluation was performed
-- `security_score`: A score representing the model's security evaluation
-- `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors
-- `hub_license`: The license of the model on Hugging Face Hub
-- `hub_likes`: Number of likes on Hugging Face Hub
-- `params_billion`: Number of parameters in billions
-- `available_on_hub`: Whether the model is available on Hugging Face Hub
-- `model_sha`: SHA hash of the model
-## Usage
-This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models.
-"""
-# Write the dataset card
-with open("README.md", "w") as f:
-    f.write(dataset_card)
-# Upload the dataset card
-api = HfApi()
-api.upload_file(
-    path_or_fileobj="README.md",
-    path_in_repo="README.md",
-    repo_id="stacklok/results",
-    repo_type="dataset"
-)
-print("Dataset initialized and card uploaded successfully!")

logs/evaluation.log DELETED Viewed

File without changes

logs/security_eval.log DELETED Viewed

File without changes

src/populate.py CHANGED Viewed

@@ -1,17 +1,15 @@
 import json
-import os
 import numpy as np
 import pandas as pd
 import logging
-from typing import List, Dict, Any
 from src.display.formatting import make_clickable_model
-from src.leaderboard.read_evals import get_raw_eval_results
 logger = logging.getLogger(__name__)
-from huggingface_hub import HfApi
-from src.config import RESULTS_REPO, QUEUE_REPO
 def get_leaderboard_df(cols: List[str], benchmark_cols: List[str]) -> pd.DataFrame:
     """Creates a dataframe from all the individual experiment results"""

 import json
 import numpy as np
 import pandas as pd
 import logging
+from typing import List
+from src.config import RESULTS_REPO, QUEUE_REPO
 from src.display.formatting import make_clickable_model
 logger = logging.getLogger(__name__)
 def get_leaderboard_df(cols: List[str], benchmark_cols: List[str]) -> pd.DataFrame:
     """Creates a dataframe from all the individual experiment results"""

src/submission/submit.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import json
-import os
 import logging
 from datetime import datetime, timezone
 from typing import Dict, Tuple, Optional, Any
@@ -331,8 +329,7 @@ def initialize_results_repo():
             return True
         except Exception:
             logger.info("Results repository not initialized, creating initial dataset")
-            # Initialize with a sample entry as per init_huggingface_dataset.py
             initial_data = {
                 "model": ["example/model"],
                 "model_raw": ["example/model"],

 import logging
 from datetime import datetime, timezone
 from typing import Dict, Tuple, Optional, Any
             return True
         except Exception:
             logger.info("Results repository not initialized, creating initial dataset")
             initial_data = {
                 "model": ["example/model"],
                 "model_raw": ["example/model"],