Spaces:
Runtime error
Runtime error
File size: 6,713 Bytes
b2d95df 010b2a5 c4c8150 9b74a5d 010b2a5 b2d95df 51678bf 9b74a5d 010b2a5 9b74a5d c4c8150 51678bf c4c8150 51678bf c4c8150 b2d95df 9b74a5d c4c8150 51678bf 9b74a5d 51678bf c4c8150 51678bf 9b74a5d c4c8150 9b74a5d 010b2a5 9b74a5d 010b2a5 9b74a5d 010b2a5 9b74a5d 010b2a5 9b74a5d 51678bf 9b74a5d b2d95df 010b2a5 b2d95df 010b2a5 b2d95df 010b2a5 b2d95df 010b2a5 0c889a5 b2d95df 010b2a5 b2d95df 010b2a5 b2d95df 010b2a5 b2d95df 010b2a5 9b74a5d b2d95df 9b74a5d 39b62ef b2d95df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import glob
import json
import os
import re
import pickle
from typing import List
import huggingface_hub
from huggingface_hub import HfApi
from tqdm import tqdm
from transformers import AutoModel, AutoConfig
from accelerate import init_empty_weights
from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
from src.display_models.utils import AutoEvalColumn, model_hyperlink
api = HfApi(token=os.environ.get("H4_TOKEN", None))
def get_model_infos_from_hub(leaderboard_data: List[dict]):
# load cache from disk
try:
with open("model_info_cache.pkl", "rb") as f:
model_info_cache = pickle.load(f)
except (EOFError, FileNotFoundError):
model_info_cache = {}
try:
with open("model_size_cache.pkl", "rb") as f:
model_size_cache = pickle.load(f)
except (EOFError, FileNotFoundError):
model_size_cache = {}
for model_data in tqdm(leaderboard_data):
model_name = model_data["model_name_for_query"]
if model_name in model_info_cache:
model_info = model_info_cache[model_name]
else:
try:
model_info = api.model_info(model_name)
model_info_cache[model_name] = model_info
except huggingface_hub.utils._errors.RepositoryNotFoundError:
print("Repo not found!", model_name)
model_data[AutoEvalColumn.license.name] = None
model_data[AutoEvalColumn.likes.name] = None
if model_name not in model_size_cache:
model_size_cache[model_name] = get_model_size(model_name, None)
model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
if model_name not in model_size_cache:
model_size_cache[model_name] = get_model_size(model_name, model_info)
model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
# save cache to disk in pickle format
with open("model_info_cache.pkl", "wb") as f:
pickle.dump(model_info_cache, f)
with open("model_size_cache.pkl", "wb") as f:
pickle.dump(model_size_cache, f)
def get_model_license(model_info):
try:
return model_info.cardData["license"]
except Exception:
return "?"
def get_model_likes(model_info):
return model_info.likes
size_pattern = re.compile(r"(\d\.)?\d+(b|m)")
def get_model_size(model_name, model_info):
# In billions
try:
return round(model_info.safetensors["total"] / 1e9, 3)
except AttributeError:
try:
config = AutoConfig.from_pretrained(model_name, trust_remote_code=False)
with init_empty_weights():
model = AutoModel.from_config(config, trust_remote_code=False)
return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
except (EnvironmentError, ValueError, KeyError): # model config not found, likely private
try:
size_match = re.search(size_pattern, model_name.lower())
size = size_match.group(0)
return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
except AttributeError:
return 0
def get_model_type(leaderboard_data: List[dict]):
for model_data in leaderboard_data:
request_files = os.path.join(
"eval-queue",
model_data["model_name_for_query"] + "_eval_request_*" + ".json",
)
request_files = glob.glob(request_files)
# Select correct request file (precision)
request_file = ""
if len(request_files) == 1:
request_file = request_files[0]
elif len(request_files) > 1:
request_files = sorted(request_files, reverse=True)
for tmp_request_file in request_files:
with open(tmp_request_file, "r") as f:
req_content = json.load(f)
if (
req_content["status"] == "FINISHED"
and req_content["precision"] == model_data["Precision"].split(".")[-1]
):
request_file = tmp_request_file
try:
with open(request_file, "r") as f:
request = json.load(f)
model_type = model_type_from_str(request["model_type"])
model_data[AutoEvalColumn.model_type.name] = model_type.value.name
model_data[AutoEvalColumn.model_type_symbol.name] = model_type.value.symbol # + ("🔺" if is_delta else "")
except Exception:
if model_data["model_name_for_query"] in MODEL_TYPE_METADATA:
model_data[AutoEvalColumn.model_type.name] = MODEL_TYPE_METADATA[
model_data["model_name_for_query"]
].value.name
model_data[AutoEvalColumn.model_type_symbol.name] = MODEL_TYPE_METADATA[
model_data["model_name_for_query"]
].value.symbol # + ("🔺" if is_delta else "")
else:
model_data[AutoEvalColumn.model_type.name] = ModelType.Unknown.value.name
model_data[AutoEvalColumn.model_type_symbol.name] = ModelType.Unknown.value.symbol
def flag_models(leaderboard_data: List[dict]):
for model_data in leaderboard_data:
if model_data["model_name_for_query"] in FLAGGED_MODELS:
issue_num = FLAGGED_MODELS[model_data["model_name_for_query"]].split("/")[-1]
issue_link = model_hyperlink(
FLAGGED_MODELS[model_data["model_name_for_query"]],
f"See discussion #{issue_num}",
)
model_data[
AutoEvalColumn.model.name
] = f"{model_data[AutoEvalColumn.model.name]} has been flagged! {issue_link}"
def remove_forbidden_models(leaderboard_data: List[dict]):
indices_to_remove = []
for ix, model in enumerate(leaderboard_data):
if model["model_name_for_query"] in DO_NOT_SUBMIT_MODELS:
indices_to_remove.append(ix)
for ix in reversed(indices_to_remove):
leaderboard_data.pop(ix)
return leaderboard_data
def apply_metadata(leaderboard_data: List[dict]):
leaderboard_data = remove_forbidden_models(leaderboard_data)
get_model_type(leaderboard_data)
get_model_infos_from_hub(leaderboard_data)
flag_models(leaderboard_data)
|