Spaces:
Running
Running
File size: 10,769 Bytes
9506213 edf5256 9506213 e23b1fe 9506213 edf5256 f5301ba edf5256 9506213 872b151 9506213 e23b1fe 9506213 e23b1fe 9506213 edf5256 9506213 f5301ba 9c20644 3dd4499 f5301ba edf5256 f5301ba d4c3e31 9c20644 d243de5 f5301ba d243de5 f5301ba 9506213 f5301ba e3dbcc5 f5301ba 6bdca2e 9506213 f5301ba d243de5 f5301ba a3e2f44 6bdca2e 9c20644 f5301ba 6bdca2e f5301ba d243de5 f5301ba d243de5 f5301ba d243de5 f5301ba 9506213 d114365 d243de5 d114365 d243de5 edf5256 d114365 0cbd808 d243de5 0cbd808 da32672 e23b1fe da32672 e23b1fe edf5256 e23b1fe edf5256 e23b1fe 0cbd808 d243de5 0cbd808 e23b1fe d243de5 e23b1fe 0cbd808 872b151 0cbd808 e23b1fe 0cbd808 e23b1fe 0cbd808 d114365 0cbd808 9506213 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 |
import os
import shutil
import gradio as gr
from huggingface_hub import HfApi, whoami, ModelCard, model_info
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from textwrap import dedent
from pathlib import Path
from tempfile import TemporaryDirectory
from huggingface_hub.file_download import repo_folder_name
from optimum.exporters import TasksManager
from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
from optimum.intel import (
OVModelForAudioClassification,
OVModelForCausalLM,
OVModelForFeatureExtraction,
OVModelForImageClassification,
OVModelForMaskedLM,
OVModelForQuestionAnswering,
OVModelForSeq2SeqLM,
OVModelForSequenceClassification,
OVModelForTokenClassification,
OVStableDiffusionPipeline,
OVStableDiffusionXLPipeline,
OVLatentConsistencyModelPipeline,
OVWeightQuantizationConfig,
)
from diffusers import ConfigMixin
_HEAD_TO_AUTOMODELS = {
"feature-extraction": "OVModelForFeatureExtraction",
"fill-mask": "OVModelForMaskedLM",
"text-generation": "OVModelForCausalLM",
"text-classification": "OVModelForSequenceClassification",
"token-classification": "OVModelForTokenClassification",
"question-answering": "OVModelForQuestionAnswering",
"image-classification": "OVModelForImageClassification",
"audio-classification": "OVModelForAudioClassification",
"stable-diffusion": "OVStableDiffusionPipeline",
"stable-diffusion-xl": "OVStableDiffusionXLPipeline",
"latent-consistency": "OVLatentConsistencyModelPipeline",
}
def quantize_model(
model_id: str,
dtype: str,
calibration_dataset: str,
ratio: str,
private_repo: bool,
overwritte: bool,
oauth_token: gr.OAuthToken,
):
if oauth_token.token is None:
return "You must be logged in to use this space"
if not model_id:
return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
try:
model_name = model_id.split("/")[-1]
username = whoami(oauth_token.token)["name"]
w_t = dtype.replace("-", "")
suffix = f"{w_t}" if model_name.endswith("openvino") else f"openvino-{w_t}"
new_repo_id = f"{username}/{model_name}-{suffix}"
library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
if library_name == "diffusers":
ConfigMixin.config_name = "model_index.json"
class_name = ConfigMixin.load_config(model_id, token=oauth_token.token)["_class_name"].lower()
if "xl" in class_name:
task = "stable-diffusion-xl"
elif "consistency" in class_name:
task = "latent-consistency"
else:
task = "stable-diffusion"
else:
task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
if task == "text2text-generation":
return "Export of Seq2Seq models is currently disabled."
if task not in _HEAD_TO_AUTOMODELS:
return f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
auto_model_class = _HEAD_TO_AUTOMODELS[task]
ov_files = _find_files_matching_pattern(
model_id,
pattern=r"(.*)?openvino(.*)?\_model.xml",
use_auth_token=oauth_token.token,
)
export = len(ov_files) == 0
if calibration_dataset == "None":
calibration_dataset = None
is_int8 = dtype == "8-bit"
# if library_name == "diffusers":
# quant_method = "hybrid"
if not is_int8 and calibration_dataset is not None:
quant_method = "awq"
else:
if calibration_dataset is not None:
print("Default quantization was selected, calibration dataset won't be used")
quant_method = "default"
quantization_config = OVWeightQuantizationConfig(
bits=8 if is_int8 else 4,
quant_method=quant_method,
dataset=None if quant_method=="default" else calibration_dataset,
ratio=1.0 if is_int8 else ratio,
num_samples=None if quant_method=="default" else 20,
)
api = HfApi(token=oauth_token.token)
if api.repo_exists(new_repo_id) and not overwritte:
return f"Model {new_repo_id} already exist, please tick the overwritte box to push on an existing repository"
with TemporaryDirectory() as d:
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
os.makedirs(folder)
try:
api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
ov_model = eval(auto_model_class).from_pretrained(
model_id,
export=export,
cache_dir=folder,
token=oauth_token.token,
quantization_config=quantization_config
)
ov_model.save_pretrained(folder)
new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
new_repo_id = new_repo_url.repo_id
print("Repository created successfully!", new_repo_url)
folder = Path(folder)
for dir_name in (
"",
"vae_encoder",
"vae_decoder",
"text_encoder",
"text_encoder_2",
"unet",
"tokenizer",
"tokenizer_2",
"scheduler",
"feature_extractor",
):
if not (folder / dir_name).is_dir():
continue
for file_path in (folder / dir_name).iterdir():
if file_path.is_file():
try:
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=os.path.join(dir_name, file_path.name),
repo_id=new_repo_id,
)
except Exception as e:
return f"Error uploading file {file_path}: {e}"
try:
card = ModelCard.load(model_id, token=oauth_token.token)
except:
card = ModelCard("")
if card.data.tags is None:
card.data.tags = []
if "openvino" not in card.data.tags:
card.data.tags.append("openvino")
card.data.tags.append("nncf")
card.data.tags.append(dtype)
card.data.base_model = model_id
card.text = dedent(
f"""
This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and is converted to the OpenVINO format. This model was obtained via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space with [optimum-intel](https://github.com/huggingface/optimum-intel).
First make sure you have `optimum-intel` installed:
```bash
pip install optimum[openvino]
```
To load your model you can do as follows:
```python
from optimum.intel import {auto_model_class}
model_id = "{new_repo_id}"
model = {auto_model_class}.from_pretrained(model_id)
```
"""
)
card_path = os.path.join(folder, "README.md")
card.save(card_path)
api.upload_file(
path_or_fileobj=card_path,
path_in_repo="README.md",
repo_id=new_repo_id,
)
return f"This model was successfully quantized, find it under your repository {new_repo_url}"
finally:
shutil.rmtree(folder, ignore_errors=True)
except Exception as e:
return f"### Error: {e}"
DESCRIPTION = """
This Space uses [Optimum Intel](https://github.com/huggingface/optimum-intel) to automatically apply NNCF [Weight Only Quantization](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) (WOQ) on your model and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
After conversion, a repository will be pushed under your namespace with the resulting model.
The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
"""
model_id = HuggingfaceHubSearch(
label="Hub Model ID",
placeholder="Search for model id on the hub",
search_type="model",
)
dtype = gr.Dropdown(
["8-bit", "4-bit"],
value="8-bit",
label="Weights precision",
filterable=False,
visible=True,
)
"""
quant_method = gr.Dropdown(
["default", "awq", "hybrid"],
value="default",
label="Quantization method",
filterable=False,
visible=True,
)
"""
calibration_dataset = gr.Dropdown(
[
"None",
"wikitext2",
"c4",
"c4-new",
"conceptual_captions",
"laion/220k-GPT4Vision-captions-from-LIVIS",
"laion/filtered-wit",
],
value="None",
label="Calibration dataset",
filterable=False,
visible=True,
)
ratio = gr.Slider(
label="Ratio",
info="Parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization",
minimum=0.0,
maximum=1.0,
step=0.1,
value=1.0,
)
private_repo = gr.Checkbox(
value=False,
label="Private repository",
info="Create a private repository instead of a public one",
)
overwritte = gr.Checkbox(
value=False,
label="Overwrite repository content",
info="Enable pushing files on existing repositories, potentially overwriting existing files",
)
interface = gr.Interface(
fn=quantize_model,
inputs=[
model_id,
dtype,
calibration_dataset,
ratio,
private_repo,
overwritte,
],
outputs=[
gr.Markdown(label="output"),
],
title="Quantize your model with NNCF",
description=DESCRIPTION,
api_name=False,
)
with gr.Blocks() as demo:
gr.Markdown("You must be logged in to use this space")
gr.LoginButton(min_width=250)
interface.render()
demo.launch()
|