Clémentine
commited on
Commit
•
7302987
1
Parent(s):
7abc6a7
Added check on tokenizer to prevent submissions which won't run
Browse files- src/submission/check_validity.py +18 -2
- src/submission/submit.py +2 -2
src/submission/check_validity.py
CHANGED
@@ -8,6 +8,7 @@ import huggingface_hub
|
|
8 |
from huggingface_hub import ModelCard
|
9 |
from huggingface_hub.hf_api import ModelInfo
|
10 |
from transformers import AutoConfig
|
|
|
11 |
|
12 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
13 |
|
@@ -36,9 +37,24 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
36 |
return True, ""
|
37 |
|
38 |
|
39 |
-
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False) -> tuple[bool, str]:
|
40 |
try:
|
41 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
return True, None, config
|
43 |
|
44 |
except ValueError:
|
@@ -48,7 +64,7 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
48 |
None
|
49 |
)
|
50 |
|
51 |
-
except Exception:
|
52 |
return False, "was not found on hub!", None
|
53 |
|
54 |
|
|
|
8 |
from huggingface_hub import ModelCard
|
9 |
from huggingface_hub.hf_api import ModelInfo
|
10 |
from transformers import AutoConfig
|
11 |
+
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
12 |
|
13 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
14 |
|
|
|
37 |
return True, ""
|
38 |
|
39 |
|
40 |
+
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
41 |
try:
|
42 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
43 |
+
if test_tokenizer:
|
44 |
+
tokenizer_config = get_tokenizer_config(model_name)
|
45 |
+
if tokenizer_config is not None:
|
46 |
+
tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
|
47 |
+
else:
|
48 |
+
tokenizer_class_candidate = config.tokenizer_class
|
49 |
+
|
50 |
+
|
51 |
+
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
52 |
+
if tokenizer_class is None:
|
53 |
+
return (
|
54 |
+
False,
|
55 |
+
f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
|
56 |
+
None
|
57 |
+
)
|
58 |
return True, None, config
|
59 |
|
60 |
except ValueError:
|
|
|
64 |
None
|
65 |
)
|
66 |
|
67 |
+
except Exception as e:
|
68 |
return False, "was not found on hub!", None
|
69 |
|
70 |
|
src/submission/submit.py
CHANGED
@@ -54,12 +54,12 @@ def add_new_eval(
|
|
54 |
|
55 |
# Is the model on the hub?
|
56 |
if weight_type in ["Delta", "Adapter"]:
|
57 |
-
base_model_on_hub, error, _ = is_model_on_hub(base_model, revision, H4_TOKEN)
|
58 |
if not base_model_on_hub:
|
59 |
return styled_error(f'Base model "{base_model}" {error}')
|
60 |
|
61 |
if not weight_type == "Adapter":
|
62 |
-
model_on_hub, error, _ = is_model_on_hub(model, revision)
|
63 |
if not model_on_hub:
|
64 |
return styled_error(f'Model "{model}" {error}')
|
65 |
|
|
|
54 |
|
55 |
# Is the model on the hub?
|
56 |
if weight_type in ["Delta", "Adapter"]:
|
57 |
+
base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=H4_TOKEN, test_tokenizer=True)
|
58 |
if not base_model_on_hub:
|
59 |
return styled_error(f'Base model "{base_model}" {error}')
|
60 |
|
61 |
if not weight_type == "Adapter":
|
62 |
+
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
|
63 |
if not model_on_hub:
|
64 |
return styled_error(f'Model "{model}" {error}')
|
65 |
|