Align datatypes
Browse files- app.py +15 -1
- src/submission/submit.py +136 -1
app.py
CHANGED
@@ -42,7 +42,7 @@ from src.envs import (
|
|
42 |
TOKEN
|
43 |
)
|
44 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
45 |
-
from src.submission.submit import add_new_eval
|
46 |
|
47 |
|
48 |
# Setup logging
|
@@ -65,6 +65,13 @@ def initialize_space():
|
|
65 |
logger.info("Initializing space")
|
66 |
try:
|
67 |
logger.info(f"Downloading queue data from {QUEUE_REPO}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
snapshot_download(
|
69 |
repo_id=QUEUE_REPO,
|
70 |
local_dir=EVAL_REQUESTS_PATH,
|
@@ -79,6 +86,13 @@ def initialize_space():
|
|
79 |
|
80 |
try:
|
81 |
logger.info(f"Downloading results data from {RESULTS_REPO}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
snapshot_download(
|
83 |
repo_id=RESULTS_REPO,
|
84 |
local_dir=EVAL_RESULTS_PATH,
|
|
|
42 |
TOKEN
|
43 |
)
|
44 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
45 |
+
from src.submission.submit import add_new_eval, initialize_queue_repo, initialize_results_repo
|
46 |
|
47 |
|
48 |
# Setup logging
|
|
|
65 |
logger.info("Initializing space")
|
66 |
try:
|
67 |
logger.info(f"Downloading queue data from {QUEUE_REPO}")
|
68 |
+
|
69 |
+
# Initialize queue repository if needed
|
70 |
+
if not initialize_queue_repo():
|
71 |
+
logger.error("Failed to initialize queue repository")
|
72 |
+
restart_space()
|
73 |
+
return
|
74 |
+
|
75 |
snapshot_download(
|
76 |
repo_id=QUEUE_REPO,
|
77 |
local_dir=EVAL_REQUESTS_PATH,
|
|
|
86 |
|
87 |
try:
|
88 |
logger.info(f"Downloading results data from {RESULTS_REPO}")
|
89 |
+
|
90 |
+
# Initialize results repository if needed
|
91 |
+
if not initialize_results_repo():
|
92 |
+
logger.error("Failed to initialize results repository")
|
93 |
+
restart_space()
|
94 |
+
return
|
95 |
+
|
96 |
snapshot_download(
|
97 |
repo_id=RESULTS_REPO,
|
98 |
local_dir=EVAL_RESULTS_PATH,
|
src/submission/submit.py
CHANGED
@@ -20,7 +20,8 @@ from src.config import (
|
|
20 |
ALLOWED_WEIGHT_TYPES,
|
21 |
DEFAULT_REVISION,
|
22 |
LOG_LEVEL,
|
23 |
-
EVALUATION_WAIT_TIME
|
|
|
24 |
)
|
25 |
from datasets import Dataset
|
26 |
|
@@ -101,6 +102,43 @@ def create_eval_entry(model: str, base_model: str, revision: str, precision: str
|
|
101 |
"private": False,
|
102 |
}
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
def add_new_eval(
|
105 |
model: str,
|
106 |
base_model: str,
|
@@ -127,6 +165,10 @@ def add_new_eval(
|
|
127 |
global USERS_TO_SUBMISSION_DATES
|
128 |
global EVAL_REQUESTS_PATH
|
129 |
|
|
|
|
|
|
|
|
|
130 |
# Check and modify EVAL_REQUESTS_PATH at the beginning
|
131 |
if not EVAL_REQUESTS_PATH or EVAL_REQUESTS_PATH == "YOUR_EVAL_REQUESTS_PATH_HERE":
|
132 |
return styled_error("EVAL_REQUESTS_PATH is not properly configured. Please check your configuration.")
|
@@ -184,3 +226,96 @@ def add_new_eval(
|
|
184 |
f"2. Security awareness using the stacklok/insecure-code dataset\n"
|
185 |
f"Please wait for up to {EVALUATION_WAIT_TIME} minutes for the model to show in the PENDING list."
|
186 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
ALLOWED_WEIGHT_TYPES,
|
21 |
DEFAULT_REVISION,
|
22 |
LOG_LEVEL,
|
23 |
+
EVALUATION_WAIT_TIME,
|
24 |
+
RESULTS_REPO
|
25 |
)
|
26 |
from datasets import Dataset
|
27 |
|
|
|
102 |
"private": False,
|
103 |
}
|
104 |
|
105 |
+
def initialize_queue_repo():
|
106 |
+
"""Initialize the queue repository with an empty dataset if it doesn't exist."""
|
107 |
+
try:
|
108 |
+
# Try to load the dataset
|
109 |
+
from datasets import load_dataset
|
110 |
+
try:
|
111 |
+
dataset = load_dataset(QUEUE_REPO, split="train")
|
112 |
+
logger.info("Queue repository already initialized")
|
113 |
+
return True
|
114 |
+
except Exception:
|
115 |
+
logger.info("Queue repository not initialized, creating empty dataset")
|
116 |
+
|
117 |
+
# Create an empty dataset with the required schema
|
118 |
+
empty_dataset = Dataset.from_dict({
|
119 |
+
"model": [],
|
120 |
+
"model_raw": [],
|
121 |
+
"base_model": [],
|
122 |
+
"revision": [],
|
123 |
+
"precision": [],
|
124 |
+
"weight_type": [],
|
125 |
+
"model_type": [],
|
126 |
+
"status": [],
|
127 |
+
"timestamp": []
|
128 |
+
})
|
129 |
+
|
130 |
+
# Push the empty dataset to initialize the repository
|
131 |
+
empty_dataset.push_to_hub(
|
132 |
+
QUEUE_REPO,
|
133 |
+
split="train",
|
134 |
+
commit_message="Initialize queue repository with empty dataset"
|
135 |
+
)
|
136 |
+
logger.info("Queue repository initialized successfully")
|
137 |
+
return True
|
138 |
+
except Exception as e:
|
139 |
+
logger.error(f"Failed to initialize queue repository: {e}")
|
140 |
+
return False
|
141 |
+
|
142 |
def add_new_eval(
|
143 |
model: str,
|
144 |
base_model: str,
|
|
|
165 |
global USERS_TO_SUBMISSION_DATES
|
166 |
global EVAL_REQUESTS_PATH
|
167 |
|
168 |
+
# Initialize queue repository if needed
|
169 |
+
if not initialize_queue_repo():
|
170 |
+
return styled_error("Failed to initialize queue repository. Please try again later.")
|
171 |
+
|
172 |
# Check and modify EVAL_REQUESTS_PATH at the beginning
|
173 |
if not EVAL_REQUESTS_PATH or EVAL_REQUESTS_PATH == "YOUR_EVAL_REQUESTS_PATH_HERE":
|
174 |
return styled_error("EVAL_REQUESTS_PATH is not properly configured. Please check your configuration.")
|
|
|
226 |
f"2. Security awareness using the stacklok/insecure-code dataset\n"
|
227 |
f"Please wait for up to {EVALUATION_WAIT_TIME} minutes for the model to show in the PENDING list."
|
228 |
)
|
229 |
+
|
230 |
+
def initialize_results_repo():
|
231 |
+
"""Initialize the results repository with a sample entry if it doesn't exist."""
|
232 |
+
try:
|
233 |
+
# Try to load the dataset
|
234 |
+
from datasets import load_dataset
|
235 |
+
try:
|
236 |
+
dataset = load_dataset(RESULTS_REPO, split="train")
|
237 |
+
logger.info("Results repository already initialized")
|
238 |
+
return True
|
239 |
+
except Exception:
|
240 |
+
logger.info("Results repository not initialized, creating initial dataset")
|
241 |
+
|
242 |
+
# Initialize with a sample entry as per init_huggingface_dataset.py
|
243 |
+
initial_data = {
|
244 |
+
"model": ["example/model"],
|
245 |
+
"model_raw": ["example/model"],
|
246 |
+
"base_model": ["gpt2"],
|
247 |
+
"revision": ["main"],
|
248 |
+
"precision": ["fp16"],
|
249 |
+
"weight_type": ["Safetensors"],
|
250 |
+
"model_type": ["Pretrained"],
|
251 |
+
"status": ["PENDING"],
|
252 |
+
"timestamp": ["2025-01-26T15:15:09.693973"],
|
253 |
+
"security_score": [0.5],
|
254 |
+
"safetensors_compliant": [True],
|
255 |
+
"hub_license": ["MIT"],
|
256 |
+
"hub_likes": [0],
|
257 |
+
"params_billion": [0.5],
|
258 |
+
"available_on_hub": [True],
|
259 |
+
"model_sha": ["abc123"]
|
260 |
+
}
|
261 |
+
|
262 |
+
# Create a Dataset object
|
263 |
+
dataset = Dataset.from_dict(initial_data)
|
264 |
+
|
265 |
+
# Push the dataset to the Hugging Face Hub
|
266 |
+
dataset.push_to_hub(
|
267 |
+
RESULTS_REPO,
|
268 |
+
split="train",
|
269 |
+
commit_message="Initialize results repository with sample dataset"
|
270 |
+
)
|
271 |
+
|
272 |
+
# Create and upload the dataset card
|
273 |
+
dataset_card = """---
|
274 |
+
language:
|
275 |
+
- en
|
276 |
+
license:
|
277 |
+
- mit
|
278 |
+
---
|
279 |
+
|
280 |
+
# Dataset Card for stacklok/results
|
281 |
+
|
282 |
+
This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics.
|
283 |
+
|
284 |
+
## Dataset Structure
|
285 |
+
|
286 |
+
The dataset contains the following fields:
|
287 |
+
- `model`: The identifier of the model
|
288 |
+
- `model_raw`: The raw model identifier
|
289 |
+
- `base_model`: The base model if applicable
|
290 |
+
- `revision`: The revision or version of the model
|
291 |
+
- `precision`: The precision used for the model (e.g., fp16, fp32)
|
292 |
+
- `weight_type`: Type of weights used
|
293 |
+
- `model_type`: Type of the model
|
294 |
+
- `status`: Current status of the evaluation
|
295 |
+
- `timestamp`: When the evaluation was performed
|
296 |
+
- `security_score`: A score representing the model's security evaluation
|
297 |
+
- `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors
|
298 |
+
- `hub_license`: The license of the model on Hugging Face Hub
|
299 |
+
- `hub_likes`: Number of likes on Hugging Face Hub
|
300 |
+
- `params_billion`: Number of parameters in billions
|
301 |
+
- `available_on_hub`: Whether the model is available on Hugging Face Hub
|
302 |
+
- `model_sha`: SHA hash of the model
|
303 |
+
|
304 |
+
## Usage
|
305 |
+
|
306 |
+
This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models.
|
307 |
+
"""
|
308 |
+
|
309 |
+
# Upload the dataset card
|
310 |
+
API.upload_file(
|
311 |
+
path_or_fileobj=dataset_card.encode(),
|
312 |
+
path_in_repo="README.md",
|
313 |
+
repo_id=RESULTS_REPO,
|
314 |
+
repo_type="dataset"
|
315 |
+
)
|
316 |
+
|
317 |
+
logger.info("Results repository initialized successfully")
|
318 |
+
return True
|
319 |
+
except Exception as e:
|
320 |
+
logger.error(f"Failed to initialize results repository: {e}")
|
321 |
+
return False
|