Various leaderboard fixes
Browse files- src/leaderboard/read_evals.py +11 -3
- src/submission/submit.py +30 -9
src/leaderboard/read_evals.py
CHANGED
@@ -24,7 +24,7 @@ class EvalResult:
|
|
24 |
results: dict
|
25 |
precision: Precision = Precision.Unknown
|
26 |
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
27 |
-
weight_type: WeightType = WeightType.
|
28 |
architecture: str = "Unknown"
|
29 |
license: str = "?"
|
30 |
likes: int = 0
|
@@ -86,7 +86,7 @@ class EvalResult:
|
|
86 |
model=model,
|
87 |
results=results,
|
88 |
precision=precision,
|
89 |
-
revision=
|
90 |
still_on_hub=still_on_hub,
|
91 |
architecture=architecture
|
92 |
)
|
@@ -99,7 +99,15 @@ class EvalResult:
|
|
99 |
with open(request_file, "r") as f:
|
100 |
request = json.load(f)
|
101 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
self.license = request.get("license", "?")
|
104 |
self.likes = request.get("likes", 0)
|
105 |
self.num_params = request.get("params", 0)
|
|
|
24 |
results: dict
|
25 |
precision: Precision = Precision.Unknown
|
26 |
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
27 |
+
weight_type: WeightType = WeightType.PyTorch # Default to PyTorch if not specified
|
28 |
architecture: str = "Unknown"
|
29 |
license: str = "?"
|
30 |
likes: int = 0
|
|
|
86 |
model=model,
|
87 |
results=results,
|
88 |
precision=precision,
|
89 |
+
revision=config.get("model_sha", ""),
|
90 |
still_on_hub=still_on_hub,
|
91 |
architecture=architecture
|
92 |
)
|
|
|
99 |
with open(request_file, "r") as f:
|
100 |
request = json.load(f)
|
101 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
102 |
+
|
103 |
+
# Handle weight type conversion from old format
|
104 |
+
weight_type = request.get("weight_type", "PyTorch")
|
105 |
+
if weight_type == "Original":
|
106 |
+
weight_type = "PyTorch"
|
107 |
+
elif weight_type == "Adapter":
|
108 |
+
weight_type = "Other"
|
109 |
+
self.weight_type = WeightType[weight_type]
|
110 |
+
|
111 |
self.license = request.get("license", "?")
|
112 |
self.likes = request.get("likes", 0)
|
113 |
self.num_params = request.get("params", 0)
|
src/submission/submit.py
CHANGED
@@ -9,6 +9,7 @@ from src.submission.check_validity import (
|
|
9 |
check_model_card,
|
10 |
get_model_size,
|
11 |
is_model_on_hub,
|
|
|
12 |
)
|
13 |
|
14 |
REQUESTED_MODELS = None
|
@@ -44,15 +45,14 @@ def add_new_eval(
|
|
44 |
revision = "main"
|
45 |
|
46 |
# Is the model on the hub?
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
return styled_error(f'Base model "{base_model}" {error}')
|
51 |
|
52 |
-
if
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
|
57 |
# Is the model info correctly filled?
|
58 |
try:
|
@@ -72,6 +72,23 @@ def add_new_eval(
|
|
72 |
if not modelcard_OK:
|
73 |
return styled_error(error_msg)
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
# Seems good, creating the eval
|
76 |
print("Adding new eval")
|
77 |
|
@@ -115,5 +132,9 @@ def add_new_eval(
|
|
115 |
os.remove(out_path)
|
116 |
|
117 |
return styled_message(
|
118 |
-
"Your request has been submitted to the evaluation queue!\
|
|
|
|
|
|
|
|
|
119 |
)
|
|
|
9 |
check_model_card,
|
10 |
get_model_size,
|
11 |
is_model_on_hub,
|
12 |
+
check_safetensors_format,
|
13 |
)
|
14 |
|
15 |
REQUESTED_MODELS = None
|
|
|
45 |
revision = "main"
|
46 |
|
47 |
# Is the model on the hub?
|
48 |
+
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
49 |
+
if not model_on_hub:
|
50 |
+
return styled_error(f'Model "{model}" {error}')
|
|
|
51 |
|
52 |
+
# Check if model uses safetensors format
|
53 |
+
safetensors_ok, error_msg = check_safetensors_format(model, revision, TOKEN)
|
54 |
+
if not safetensors_ok:
|
55 |
+
return styled_error(error_msg)
|
56 |
|
57 |
# Is the model info correctly filled?
|
58 |
try:
|
|
|
72 |
if not modelcard_OK:
|
73 |
return styled_error(error_msg)
|
74 |
|
75 |
+
# Validate weight type
|
76 |
+
if weight_type not in ["Safetensors", "PyTorch", "Other"]:
|
77 |
+
return styled_error("Invalid weight type. Must be one of: Safetensors, PyTorch, Other")
|
78 |
+
|
79 |
+
# Force Safetensors for new submissions
|
80 |
+
if weight_type != "Safetensors":
|
81 |
+
return styled_error(
|
82 |
+
"Only Safetensors format is accepted for new submissions. Please convert your model using:\n"
|
83 |
+
"```python\n"
|
84 |
+
"from transformers import AutoModelForCausalLM\n"
|
85 |
+
"from safetensors.torch import save_file\n\n"
|
86 |
+
"model = AutoModelForCausalLM.from_pretrained('your-model')\n"
|
87 |
+
"state_dict = model.state_dict()\n"
|
88 |
+
"save_file(state_dict, 'model.safetensors')\n"
|
89 |
+
"```"
|
90 |
+
)
|
91 |
+
|
92 |
# Seems good, creating the eval
|
93 |
print("Adding new eval")
|
94 |
|
|
|
132 |
os.remove(out_path)
|
133 |
|
134 |
return styled_message(
|
135 |
+
"Your request has been submitted to the evaluation queue!\n"
|
136 |
+
"The model will be evaluated for:\n"
|
137 |
+
"1. Safetensors compliance\n"
|
138 |
+
"2. Security awareness using the stacklok/insecure-code dataset\n"
|
139 |
+
"Please wait for up to an hour for the model to show in the PENDING list."
|
140 |
)
|