Spaces:
Running
Running
baptistecolle
HF staff
Add torchao int4 weight only quantization as an option (#34)
8766911
verified
from transformers import AutoConfig | |
LLM_MODEL_ARCHS = { | |
"stablelm_epoch": "π΄ StableLM-Epoch", | |
"stablelm_alpha": "π΄ StableLM-Alpha", | |
"mixformer-sequential": "π§βπ» Phi Ο", | |
"RefinedWebModel": "π¦ Falcon", | |
"gpt_bigcode": "β StarCoder", | |
"RefinedWeb": "π¦ Falcon", | |
"baichuan": "π Baichuan ηΎε·", # river | |
"internlm": "π§βπ InternLM δΉ¦η", # scholar | |
"mistral": "βοΈ Mistral", | |
"mixtral": "βοΈ Mixtral", | |
"codegen": "βΎοΈ CodeGen", | |
"chatglm": "π¬ ChatGLM", | |
"falcon": "π¦ Falcon", | |
"bloom": "πΈ Bloom", | |
"llama": "π¦ LLaMA", | |
"rwkv": "π¦ββ¬ RWKV", | |
"deci": "π΅ deci", | |
"Yi": "π« Yi δΊΊ", # people | |
"mpt": "𧱠MPT", | |
# suggest something | |
"gpt_neox": "GPT-NeoX", | |
"gpt_neo": "GPT-Neo", | |
"gpt2": "GPT-2", | |
"gptj": "GPT-J", | |
"bart": "BART", | |
} | |
def model_hyperlink(link, model_name): | |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
def process_architectures(model): | |
# return "Unknown" | |
try: | |
config = AutoConfig.from_pretrained(model, trust_remote_code=True) | |
return LLM_MODEL_ARCHS.get(config.model_type, "Unknown") | |
except Exception: | |
return "Unknown" | |
def process_score(score, quantization): | |
if quantization != "Unquantized": | |
return f"{score:.2f}*" | |
else: | |
return f"{score:.2f} " | |
def process_quantizations(x): | |
if ( | |
x["config.backend.quantization_scheme"] == "bnb" | |
and x["config.backend.quantization_config.load_in_4bit"] is True | |
): | |
return "BnB.4bit" | |
elif ( | |
x["config.backend.quantization_scheme"] == "bnb" | |
and x["config.backend.quantization_config.load_in_8bit"] is True | |
): | |
return "BnB.8bit" | |
elif ( | |
x["config.backend.quantization_scheme"] == "gptq" | |
and x["config.backend.quantization_config.bits"] == 4 | |
): | |
return "GPTQ.4bit" | |
elif ( | |
x["config.backend.quantization_scheme"] == "awq" | |
and x["config.backend.quantization_config.bits"] == 4 | |
): | |
return "AWQ.4bit" | |
elif ( | |
x["config.backend.quantization_scheme"] == "torchao" | |
and x["config.backend.quantization_config.quant_type"] == "int4_weight_only" | |
): | |
return "torchao.4bit" | |
else: | |
return "Unquantized" | |
def process_kernels(x): | |
if ( | |
x["config.backend.quantization_scheme"] == "gptq" | |
and x["config.backend.quantization_config.version"] == 1 | |
): | |
return "GPTQ.ExllamaV1" | |
elif ( | |
x["config.backend.quantization_scheme"] == "gptq" | |
and x["config.backend.quantization_config.version"] == 2 | |
): | |
return "GPTQ.ExllamaV2" | |
elif ( | |
x["config.backend.quantization_scheme"] == "awq" | |
and x["config.backend.quantization_config.version"] == "gemm" | |
): | |
return "AWQ.GEMM" | |
elif ( | |
x["config.backend.quantization_scheme"] == "awq" | |
and x["config.backend.quantization_config.version"] == "gemv" | |
): | |
return "AWQ.GEMV" | |
else: | |
return "No Kernel" | |
# def change_tab(query_param): | |
# query_param = query_param.replace("'", '"') | |
# query_param = json.loads(query_param) | |
# if isinstance(query_param, dict) and "tab" in query_param and query_param["tab"] == "plot": | |
# return gr.Tabs.update(selected=1) | |
# else: | |
# return gr.Tabs.update(selected=0) | |