Spaces:

stacklok
/

secure_code_leaderboard_archived

Sleeping

App Files Files Community

lukehinds commited on Jan 26

Commit

ccd50f1

1 Parent(s): b2f90ed

Align datatypes

Browse files

Files changed (2) hide show

app.py +15 -1
src/submission/submit.py +136 -1

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ from src.envs import (
     TOKEN
 )
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
 # Setup logging
@@ -65,6 +65,13 @@ def initialize_space():
     logger.info("Initializing space")
     try:
         logger.info(f"Downloading queue data from {QUEUE_REPO}")
         snapshot_download(
             repo_id=QUEUE_REPO,
             local_dir=EVAL_REQUESTS_PATH,
@@ -79,6 +86,13 @@ def initialize_space():
     try:
         logger.info(f"Downloading results data from {RESULTS_REPO}")
         snapshot_download(
             repo_id=RESULTS_REPO,
             local_dir=EVAL_RESULTS_PATH,

     TOKEN
 )
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import add_new_eval, initialize_queue_repo, initialize_results_repo
 # Setup logging
     logger.info("Initializing space")
     try:
         logger.info(f"Downloading queue data from {QUEUE_REPO}")
+        # Initialize queue repository if needed
+        if not initialize_queue_repo():
+            logger.error("Failed to initialize queue repository")
+            restart_space()
+            return
         snapshot_download(
             repo_id=QUEUE_REPO,
             local_dir=EVAL_REQUESTS_PATH,
     try:
         logger.info(f"Downloading results data from {RESULTS_REPO}")
+        # Initialize results repository if needed
+        if not initialize_results_repo():
+            logger.error("Failed to initialize results repository")
+            restart_space()
+            return
         snapshot_download(
             repo_id=RESULTS_REPO,
             local_dir=EVAL_RESULTS_PATH,

src/submission/submit.py CHANGED Viewed

@@ -20,7 +20,8 @@ from src.config import (
     ALLOWED_WEIGHT_TYPES,
     DEFAULT_REVISION,
     LOG_LEVEL,
-    EVALUATION_WAIT_TIME
 )
 from datasets import Dataset
@@ -101,6 +102,43 @@ def create_eval_entry(model: str, base_model: str, revision: str, precision: str
         "private": False,
     }
 def add_new_eval(
     model: str,
     base_model: str,
@@ -127,6 +165,10 @@ def add_new_eval(
     global USERS_TO_SUBMISSION_DATES
     global EVAL_REQUESTS_PATH
     # Check and modify EVAL_REQUESTS_PATH at the beginning
     if not EVAL_REQUESTS_PATH or EVAL_REQUESTS_PATH == "YOUR_EVAL_REQUESTS_PATH_HERE":
         return styled_error("EVAL_REQUESTS_PATH is not properly configured. Please check your configuration.")
@@ -184,3 +226,96 @@ def add_new_eval(
         f"2. Security awareness using the stacklok/insecure-code dataset\n"
         f"Please wait for up to {EVALUATION_WAIT_TIME} minutes for the model to show in the PENDING list."
     )

     ALLOWED_WEIGHT_TYPES,
     DEFAULT_REVISION,
     LOG_LEVEL,
+    EVALUATION_WAIT_TIME,
+    RESULTS_REPO
 )
 from datasets import Dataset
         "private": False,
     }
+def initialize_queue_repo():
+    """Initialize the queue repository with an empty dataset if it doesn't exist."""
+    try:
+        # Try to load the dataset
+        from datasets import load_dataset
+        try:
+            dataset = load_dataset(QUEUE_REPO, split="train")
+            logger.info("Queue repository already initialized")
+            return True
+        except Exception:
+            logger.info("Queue repository not initialized, creating empty dataset")
+            # Create an empty dataset with the required schema
+            empty_dataset = Dataset.from_dict({
+                "model": [],
+                "model_raw": [],
+                "base_model": [],
+                "revision": [],
+                "precision": [],
+                "weight_type": [],
+                "model_type": [],
+                "status": [],
+                "timestamp": []
+            })
+            # Push the empty dataset to initialize the repository
+            empty_dataset.push_to_hub(
+                QUEUE_REPO,
+                split="train",
+                commit_message="Initialize queue repository with empty dataset"
+            )
+            logger.info("Queue repository initialized successfully")
+            return True
+    except Exception as e:
+        logger.error(f"Failed to initialize queue repository: {e}")
+        return False
 def add_new_eval(
     model: str,
     base_model: str,
     global USERS_TO_SUBMISSION_DATES
     global EVAL_REQUESTS_PATH
+    # Initialize queue repository if needed
+    if not initialize_queue_repo():
+        return styled_error("Failed to initialize queue repository. Please try again later.")
     # Check and modify EVAL_REQUESTS_PATH at the beginning
     if not EVAL_REQUESTS_PATH or EVAL_REQUESTS_PATH == "YOUR_EVAL_REQUESTS_PATH_HERE":
         return styled_error("EVAL_REQUESTS_PATH is not properly configured. Please check your configuration.")
         f"2. Security awareness using the stacklok/insecure-code dataset\n"
         f"Please wait for up to {EVALUATION_WAIT_TIME} minutes for the model to show in the PENDING list."
     )
+def initialize_results_repo():
+    """Initialize the results repository with a sample entry if it doesn't exist."""
+    try:
+        # Try to load the dataset
+        from datasets import load_dataset
+        try:
+            dataset = load_dataset(RESULTS_REPO, split="train")
+            logger.info("Results repository already initialized")
+            return True
+        except Exception:
+            logger.info("Results repository not initialized, creating initial dataset")
+            # Initialize with a sample entry as per init_huggingface_dataset.py
+            initial_data = {
+                "model": ["example/model"],
+                "model_raw": ["example/model"],
+                "base_model": ["gpt2"],
+                "revision": ["main"],
+                "precision": ["fp16"],
+                "weight_type": ["Safetensors"],
+                "model_type": ["Pretrained"],
+                "status": ["PENDING"],
+                "timestamp": ["2025-01-26T15:15:09.693973"],
+                "security_score": [0.5],
+                "safetensors_compliant": [True],
+                "hub_license": ["MIT"],
+                "hub_likes": [0],
+                "params_billion": [0.5],
+                "available_on_hub": [True],
+                "model_sha": ["abc123"]
+            }
+            # Create a Dataset object
+            dataset = Dataset.from_dict(initial_data)
+            # Push the dataset to the Hugging Face Hub
+            dataset.push_to_hub(
+                RESULTS_REPO,
+                split="train",
+                commit_message="Initialize results repository with sample dataset"
+            )
+            # Create and upload the dataset card
+            dataset_card = """---
+language:
+- en
+license:
+- mit
+---
+# Dataset Card for stacklok/results
+This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics.
+## Dataset Structure
+The dataset contains the following fields:
+- `model`: The identifier of the model
+- `model_raw`: The raw model identifier
+- `base_model`: The base model if applicable
+- `revision`: The revision or version of the model
+- `precision`: The precision used for the model (e.g., fp16, fp32)
+- `weight_type`: Type of weights used
+- `model_type`: Type of the model
+- `status`: Current status of the evaluation
+- `timestamp`: When the evaluation was performed
+- `security_score`: A score representing the model's security evaluation
+- `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors
+- `hub_license`: The license of the model on Hugging Face Hub
+- `hub_likes`: Number of likes on Hugging Face Hub
+- `params_billion`: Number of parameters in billions
+- `available_on_hub`: Whether the model is available on Hugging Face Hub
+- `model_sha`: SHA hash of the model
+## Usage
+This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models.
+"""
+            # Upload the dataset card
+            API.upload_file(
+                path_or_fileobj=dataset_card.encode(),
+                path_in_repo="README.md",
+                repo_id=RESULTS_REPO,
+                repo_type="dataset"
+            )
+            logger.info("Results repository initialized successfully")
+            return True
+    except Exception as e:
+        logger.error(f"Failed to initialize results repository: {e}")
+        return False