lukehinds commited on
Commit
ccd50f1
·
1 Parent(s): b2f90ed

Align datatypes

Browse files
Files changed (2) hide show
  1. app.py +15 -1
  2. src/submission/submit.py +136 -1
app.py CHANGED
@@ -42,7 +42,7 @@ from src.envs import (
42
  TOKEN
43
  )
44
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
45
- from src.submission.submit import add_new_eval
46
 
47
 
48
  # Setup logging
@@ -65,6 +65,13 @@ def initialize_space():
65
  logger.info("Initializing space")
66
  try:
67
  logger.info(f"Downloading queue data from {QUEUE_REPO}")
 
 
 
 
 
 
 
68
  snapshot_download(
69
  repo_id=QUEUE_REPO,
70
  local_dir=EVAL_REQUESTS_PATH,
@@ -79,6 +86,13 @@ def initialize_space():
79
 
80
  try:
81
  logger.info(f"Downloading results data from {RESULTS_REPO}")
 
 
 
 
 
 
 
82
  snapshot_download(
83
  repo_id=RESULTS_REPO,
84
  local_dir=EVAL_RESULTS_PATH,
 
42
  TOKEN
43
  )
44
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
45
+ from src.submission.submit import add_new_eval, initialize_queue_repo, initialize_results_repo
46
 
47
 
48
  # Setup logging
 
65
  logger.info("Initializing space")
66
  try:
67
  logger.info(f"Downloading queue data from {QUEUE_REPO}")
68
+
69
+ # Initialize queue repository if needed
70
+ if not initialize_queue_repo():
71
+ logger.error("Failed to initialize queue repository")
72
+ restart_space()
73
+ return
74
+
75
  snapshot_download(
76
  repo_id=QUEUE_REPO,
77
  local_dir=EVAL_REQUESTS_PATH,
 
86
 
87
  try:
88
  logger.info(f"Downloading results data from {RESULTS_REPO}")
89
+
90
+ # Initialize results repository if needed
91
+ if not initialize_results_repo():
92
+ logger.error("Failed to initialize results repository")
93
+ restart_space()
94
+ return
95
+
96
  snapshot_download(
97
  repo_id=RESULTS_REPO,
98
  local_dir=EVAL_RESULTS_PATH,
src/submission/submit.py CHANGED
@@ -20,7 +20,8 @@ from src.config import (
20
  ALLOWED_WEIGHT_TYPES,
21
  DEFAULT_REVISION,
22
  LOG_LEVEL,
23
- EVALUATION_WAIT_TIME
 
24
  )
25
  from datasets import Dataset
26
 
@@ -101,6 +102,43 @@ def create_eval_entry(model: str, base_model: str, revision: str, precision: str
101
  "private": False,
102
  }
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def add_new_eval(
105
  model: str,
106
  base_model: str,
@@ -127,6 +165,10 @@ def add_new_eval(
127
  global USERS_TO_SUBMISSION_DATES
128
  global EVAL_REQUESTS_PATH
129
 
 
 
 
 
130
  # Check and modify EVAL_REQUESTS_PATH at the beginning
131
  if not EVAL_REQUESTS_PATH or EVAL_REQUESTS_PATH == "YOUR_EVAL_REQUESTS_PATH_HERE":
132
  return styled_error("EVAL_REQUESTS_PATH is not properly configured. Please check your configuration.")
@@ -184,3 +226,96 @@ def add_new_eval(
184
  f"2. Security awareness using the stacklok/insecure-code dataset\n"
185
  f"Please wait for up to {EVALUATION_WAIT_TIME} minutes for the model to show in the PENDING list."
186
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  ALLOWED_WEIGHT_TYPES,
21
  DEFAULT_REVISION,
22
  LOG_LEVEL,
23
+ EVALUATION_WAIT_TIME,
24
+ RESULTS_REPO
25
  )
26
  from datasets import Dataset
27
 
 
102
  "private": False,
103
  }
104
 
105
+ def initialize_queue_repo():
106
+ """Initialize the queue repository with an empty dataset if it doesn't exist."""
107
+ try:
108
+ # Try to load the dataset
109
+ from datasets import load_dataset
110
+ try:
111
+ dataset = load_dataset(QUEUE_REPO, split="train")
112
+ logger.info("Queue repository already initialized")
113
+ return True
114
+ except Exception:
115
+ logger.info("Queue repository not initialized, creating empty dataset")
116
+
117
+ # Create an empty dataset with the required schema
118
+ empty_dataset = Dataset.from_dict({
119
+ "model": [],
120
+ "model_raw": [],
121
+ "base_model": [],
122
+ "revision": [],
123
+ "precision": [],
124
+ "weight_type": [],
125
+ "model_type": [],
126
+ "status": [],
127
+ "timestamp": []
128
+ })
129
+
130
+ # Push the empty dataset to initialize the repository
131
+ empty_dataset.push_to_hub(
132
+ QUEUE_REPO,
133
+ split="train",
134
+ commit_message="Initialize queue repository with empty dataset"
135
+ )
136
+ logger.info("Queue repository initialized successfully")
137
+ return True
138
+ except Exception as e:
139
+ logger.error(f"Failed to initialize queue repository: {e}")
140
+ return False
141
+
142
  def add_new_eval(
143
  model: str,
144
  base_model: str,
 
165
  global USERS_TO_SUBMISSION_DATES
166
  global EVAL_REQUESTS_PATH
167
 
168
+ # Initialize queue repository if needed
169
+ if not initialize_queue_repo():
170
+ return styled_error("Failed to initialize queue repository. Please try again later.")
171
+
172
  # Check and modify EVAL_REQUESTS_PATH at the beginning
173
  if not EVAL_REQUESTS_PATH or EVAL_REQUESTS_PATH == "YOUR_EVAL_REQUESTS_PATH_HERE":
174
  return styled_error("EVAL_REQUESTS_PATH is not properly configured. Please check your configuration.")
 
226
  f"2. Security awareness using the stacklok/insecure-code dataset\n"
227
  f"Please wait for up to {EVALUATION_WAIT_TIME} minutes for the model to show in the PENDING list."
228
  )
229
+
230
+ def initialize_results_repo():
231
+ """Initialize the results repository with a sample entry if it doesn't exist."""
232
+ try:
233
+ # Try to load the dataset
234
+ from datasets import load_dataset
235
+ try:
236
+ dataset = load_dataset(RESULTS_REPO, split="train")
237
+ logger.info("Results repository already initialized")
238
+ return True
239
+ except Exception:
240
+ logger.info("Results repository not initialized, creating initial dataset")
241
+
242
+ # Initialize with a sample entry as per init_huggingface_dataset.py
243
+ initial_data = {
244
+ "model": ["example/model"],
245
+ "model_raw": ["example/model"],
246
+ "base_model": ["gpt2"],
247
+ "revision": ["main"],
248
+ "precision": ["fp16"],
249
+ "weight_type": ["Safetensors"],
250
+ "model_type": ["Pretrained"],
251
+ "status": ["PENDING"],
252
+ "timestamp": ["2025-01-26T15:15:09.693973"],
253
+ "security_score": [0.5],
254
+ "safetensors_compliant": [True],
255
+ "hub_license": ["MIT"],
256
+ "hub_likes": [0],
257
+ "params_billion": [0.5],
258
+ "available_on_hub": [True],
259
+ "model_sha": ["abc123"]
260
+ }
261
+
262
+ # Create a Dataset object
263
+ dataset = Dataset.from_dict(initial_data)
264
+
265
+ # Push the dataset to the Hugging Face Hub
266
+ dataset.push_to_hub(
267
+ RESULTS_REPO,
268
+ split="train",
269
+ commit_message="Initialize results repository with sample dataset"
270
+ )
271
+
272
+ # Create and upload the dataset card
273
+ dataset_card = """---
274
+ language:
275
+ - en
276
+ license:
277
+ - mit
278
+ ---
279
+
280
+ # Dataset Card for stacklok/results
281
+
282
+ This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics.
283
+
284
+ ## Dataset Structure
285
+
286
+ The dataset contains the following fields:
287
+ - `model`: The identifier of the model
288
+ - `model_raw`: The raw model identifier
289
+ - `base_model`: The base model if applicable
290
+ - `revision`: The revision or version of the model
291
+ - `precision`: The precision used for the model (e.g., fp16, fp32)
292
+ - `weight_type`: Type of weights used
293
+ - `model_type`: Type of the model
294
+ - `status`: Current status of the evaluation
295
+ - `timestamp`: When the evaluation was performed
296
+ - `security_score`: A score representing the model's security evaluation
297
+ - `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors
298
+ - `hub_license`: The license of the model on Hugging Face Hub
299
+ - `hub_likes`: Number of likes on Hugging Face Hub
300
+ - `params_billion`: Number of parameters in billions
301
+ - `available_on_hub`: Whether the model is available on Hugging Face Hub
302
+ - `model_sha`: SHA hash of the model
303
+
304
+ ## Usage
305
+
306
+ This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models.
307
+ """
308
+
309
+ # Upload the dataset card
310
+ API.upload_file(
311
+ path_or_fileobj=dataset_card.encode(),
312
+ path_in_repo="README.md",
313
+ repo_id=RESULTS_REPO,
314
+ repo_type="dataset"
315
+ )
316
+
317
+ logger.info("Results repository initialized successfully")
318
+ return True
319
+ except Exception as e:
320
+ logger.error(f"Failed to initialize results repository: {e}")
321
+ return False