Spaces:

stacklok
/

secure_code_leaderboard_archived

Running

App Files Files Community

lukehinds commited on Jan 26

Commit

700205f

1 Parent(s): b833760

Conform on dataset parquet format

Browse files

Files changed (3) hide show

app.py +46 -46
src/core/evaluation.py +5 -2
src/populate.py +20 -27

app.py CHANGED Viewed

@@ -152,23 +152,27 @@ def process_evaluation_queue():
 def update_request_status(model_name, status):
     """Update the status of a request in the Hugging Face repository."""
     try:
-        api = HfApi()
-        filename = f"{model_name.replace('/', '_')}_request.json"
-        # Fetch the current request data
-        file_content = api.hf_hub_download(repo_id=QUEUE_REPO, filename=filename, repo_type="dataset")
-        with open(file_content, 'r') as f:
-            request_data = json.load(f)
-        # Update the status
-        request_data['status'] = status
-        # Create a Dataset object from the updated request data
-        dataset = Dataset.from_dict(request_data)
-        # Push the updated dataset to the Hugging Face Hub
-        dataset.push_to_hub(QUEUE_REPO)
         logger.info(f"Updated status for {model_name} to {status}")
     except Exception as e:
         logger.error(f"Failed to update status for {model_name}: {str(e)}", exc_info=True)
@@ -180,15 +184,21 @@ from huggingface_hub import HfApi
 def save_results_to_repo(results, repo):
     """Save evaluation results to the specified repository."""
     try:
-        model_id = results.get('model', '').replace('/', '_')
         if not model_id:
             raise ValueError("Model ID not found in results")
         # Create a Dataset object from the results
-        dataset = Dataset.from_dict(results)
         # Push the dataset to the Hugging Face Hub
-        dataset.push_to_hub(repo)
         logger.info(f"Saved results for {model_id} to {repo}")
     except Exception as e:
@@ -368,37 +378,27 @@ with demo:
                 try:
                     logger.info(f"New submission received for {model}")
-                    # Prepare request data
                     request_data = {
-                        "model": model,
-                        "base_model": base_model,
-                        "revision": revision if revision else "main",
-                        "precision": precision,
-                        "weight_type": weight_type,
-                        "model_type": model_type,
-                        "status": "PENDING",
-                        "timestamp": datetime.now().isoformat()
                     }
-                    # Create a unique filename for the request
-                    filename = f"{model.replace('/', '_')}_request.json"
-                    # Save request data as JSON
-                    with open(filename, 'w') as f:
-                        json.dump(request_data, f, indent=2)
-                    # Upload the JSON file to the Hub
-                    api = HfApi()
-                    api.upload_file(
-                        path_or_fileobj=filename,
-                        path_in_repo=filename,
-                        repo_id=QUEUE_REPO,
-                        repo_type="dataset"
                     )
-                    # Clean up local file
-                    os.remove(filename)
                     logger.info(f"Added request for {model} to {QUEUE_REPO}")
                     # Get updated pending evaluations

 def update_request_status(model_name, status):
     """Update the status of a request in the Hugging Face repository."""
     try:
+        # Load the current dataset
+        from datasets import load_dataset
+        dataset = load_dataset(QUEUE_REPO, split="train")
+        # Find the row for this model and update its status
+        data_dict = dataset.to_dict()
+        indices = [i for i, m in enumerate(data_dict["model"]) if m == model_name]
+        if not indices:
+            logger.error(f"No request found for model {model_name}")
+            return
+        # Update the status for the found request
+        data_dict["status"][indices[0]] = status
+        # Create new dataset with updated status
+        updated_dataset = Dataset.from_dict(data_dict)
+        # Push the updated dataset back to the hub
+        updated_dataset.push_to_hub(QUEUE_REPO, split="train")
         logger.info(f"Updated status for {model_name} to {status}")
     except Exception as e:
         logger.error(f"Failed to update status for {model_name}: {str(e)}", exc_info=True)
 def save_results_to_repo(results, repo):
     """Save evaluation results to the specified repository."""
     try:
+        model_id = results.get('model', '')
         if not model_id:
             raise ValueError("Model ID not found in results")
+        # Convert all values to lists if they aren't already
+        dataset_dict = {
+            k: [v] if not isinstance(v, list) else v
+            for k, v in results.items()
+        }
         # Create a Dataset object from the results
+        dataset = Dataset.from_dict(dataset_dict)
         # Push the dataset to the Hugging Face Hub
+        dataset.push_to_hub(repo, split="train")
         logger.info(f"Saved results for {model_id} to {repo}")
     except Exception as e:
                 try:
                     logger.info(f"New submission received for {model}")
+                    # Prepare request data as a dataset-compatible dictionary (all values must be lists)
                     request_data = {
+                        "model": [model],
+                        "model_raw": [model],  # Store raw model name for processing
+                        "base_model": [base_model if base_model else ""],
+                        "revision": [revision if revision else "main"],
+                        "precision": [precision],
+                        "weight_type": [weight_type],
+                        "model_type": [model_type],
+                        "status": ["PENDING"],
+                        "timestamp": [datetime.now().isoformat()]
                     }
+                    # Convert to dataset and push to hub
+                    dataset = Dataset.from_dict(request_data)
+                    dataset.push_to_hub(
+                        QUEUE_REPO,
+                        config_name=model.replace("/", "_"),
+                        split="train"
                     )
                     logger.info(f"Added request for {model} to {QUEUE_REPO}")
                     # Get updated pending evaluations

src/core/evaluation.py CHANGED Viewed

@@ -108,17 +108,20 @@ class EvaluationManager:
                     "model_type": request.model_type,
                 }
             # Save and backup results
             eval_id = f"{request.model.replace('/', '_')}_{request.revision}"
             result_path = os.path.join(self.results_dir, f"{eval_id}.json")
             with open(result_path, 'w') as f:
-                json.dump(results, f, indent=2)
             self.backup_results(eval_id)
             logger.info(f"Evaluation complete for {request.model}")
-            return results
         except Exception as e:
             logger.error(f"Evaluation failed: {str(e)}", exc_info=True)

                     "model_type": request.model_type,
                 }
+            # Convert results to dataset-compatible format (lists)
+            dataset_results = {k: [v] for k, v in results.items()}
             # Save and backup results
             eval_id = f"{request.model.replace('/', '_')}_{request.revision}"
             result_path = os.path.join(self.results_dir, f"{eval_id}.json")
             with open(result_path, 'w') as f:
+                json.dump(dataset_results, f, indent=2)
             self.backup_results(eval_id)
             logger.info(f"Evaluation complete for {request.model}")
+            return dataset_results
         except Exception as e:
             logger.error(f"Evaluation failed: {str(e)}", exc_info=True)

src/populate.py CHANGED Viewed

@@ -102,34 +102,27 @@ def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
     logger.info(f"Looking for eval requests in {QUEUE_REPO}")
     all_evals = []
-    api = HfApi()
     try:
-        # List all files in the repository
-        files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")
-        # Filter for JSON files
-        json_files = [f for f in files if f.endswith('.json')]
-        for file in json_files:
-            try:
-                # Download and read each JSON file
-                content = api.hf_hub_download(repo_id=QUEUE_REPO, filename=file, repo_type="dataset")
-                logger.info(f"Reading JSON file: {file}")
-                with open(content, 'r') as fp:
-                    data = json.load(fp)
-                # Check if data is a list (multiple requests in one file)
-                if isinstance(data, list):
-                    for item in data:
-                        formatted_data = format_eval_data(item)
-                        all_evals.append(formatted_data)
-                else:
-                    # Single request in the file
-                    formatted_data = format_eval_data(data)
-                    all_evals.append(formatted_data)
-            except Exception as e:
-                logger.error(f"Error processing file {file}: {str(e)}", exc_info=True)
     except Exception as e:
         logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)

     logger.info(f"Looking for eval requests in {QUEUE_REPO}")
     all_evals = []
     try:
+        # Load the dataset directly
+        from datasets import load_dataset
+        dataset = load_dataset(QUEUE_REPO, split="train")
+        logger.debug(f"Loaded dataset with {len(dataset)} rows")
+        logger.debug(f"Dataset features: {dataset.features}")
+        # Convert dataset to list of dicts
+        for row in dataset:
+            formatted_data = format_eval_data({
+                "model": row["model"],
+                "model_raw": row.get("model_raw", row["model"]),  # Fallback to model if model_raw not present
+                "base_model": row.get("base_model", ""),
+                "revision": row.get("revision", "main"),
+                "precision": row.get("precision", ""),
+                "weight_type": row.get("weight_type", ""),
+                "model_type": row.get("model_type", ""),
+                "status": row.get("status", "PENDING"),
+                "timestamp": row.get("timestamp", "")
+            })
+            all_evals.append(formatted_data)
     except Exception as e:
         logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)