echarlaix HF staff commited on
Commit
f5301ba
·
1 Parent(s): edf5256
Files changed (1) hide show
  1. app.py +129 -129
app.py CHANGED
@@ -30,17 +30,17 @@ from optimum.intel import (
30
  from diffusers import ConfigMixin
31
 
32
  _HEAD_TO_AUTOMODELS = {
33
- "feature-extraction": OVModelForFeatureExtraction,
34
- "fill-mask": OVModelForMaskedLM,
35
- "text-generation": OVModelForCausalLM,
36
- "text-classification": OVModelForSequenceClassification,
37
- "token-classification": OVModelForTokenClassification,
38
- "question-answering": OVModelForQuestionAnswering,
39
- "image-classification": OVModelForImageClassification,
40
- "audio-classification": OVModelForAudioClassification,
41
- "stable-diffusion": OVStableDiffusionPipeline,
42
- "stable-diffusion-xl": OVStableDiffusionXLPipeline,
43
- "latent-consistency": OVLatentConsistencyModelPipeline,
44
  }
45
 
46
  def quantize_model(
@@ -58,143 +58,143 @@ def quantize_model(
58
  if not model_id:
59
  return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
60
 
61
- model_name = model_id.split("/")[-1]
62
- username = whoami(oauth_token.token)["name"]
63
- new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
64
- library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
 
65
 
66
- if library_name == "diffusers":
67
- ConfigMixin.config_name = "model_index.json"
68
- class_name = ConfigMixin.load_config(model_id, token=oauth_token.token)["_class_name"].lower()
69
- if "xl" in class_name:
70
- task = "stable-diffusion-xl"
71
- elif "consistency" in class_name:
72
- task = "latent-consistency"
 
 
73
  else:
74
- task = "stable-diffusion"
75
- else:
76
- task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
77
 
78
- if task == "text2text-generation":
79
- return "Export of Seq2Seq models is currently disabled."
80
 
81
- if task not in _HEAD_TO_AUTOMODELS:
82
- return f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
83
 
84
- auto_model_class = _HEAD_TO_AUTOMODELS[task]
85
- ov_files = _find_files_matching_pattern(
86
- model_id,
87
- pattern=r"(.*)?openvino(.*)?\_model.xml",
88
- use_auth_token=oauth_token.token,
89
- )
90
- export = len(ov_files) == 0
91
 
92
- is_int8 = dtype == "int8"
93
- if library_name == "diffusers":
94
- quant_method = "hybrid"
95
- elif not is_int8:
96
- quant_method = "awq"
97
- else:
98
- quant_method = "default"
99
 
100
- quantization_config = OVWeightQuantizationConfig(
101
- bits=8 if is_int8 else 4,
102
- quant_method=quant_method,
103
- dataset=None if quant_method=="default" else calibration_dataset,
104
- ratio=1.0 if is_int8 else ratio,
105
- )
106
 
107
- api = HfApi(token=oauth_token.token)
108
- if api.repo_exists(new_repo_id) and not overwritte:
109
- return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo"
110
 
111
- with TemporaryDirectory() as d:
112
- folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
113
- os.makedirs(folder)
114
 
115
- try:
116
- api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
117
- ov_model = eval(auto_model_class).from_pretrained(
118
- model_id,
119
- export=export,
120
- cache_dir=folder,
121
- token=oauth_token.token,
122
- quantization_config=quantization_config
123
- )
124
- ov_model.save_pretrained(folder)
125
- new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
126
- new_repo_id = new_repo_url.repo_id
127
- print("Repo created successfully!", new_repo_url)
128
 
129
- folder = Path(folder)
130
- for dir_name in (
131
- "",
132
- "vae_encoder",
133
- "vae_decoder",
134
- "text_encoder",
135
- "text_encoder_2",
136
- "unet",
137
- "tokenizer",
138
- "tokenizer_2",
139
- "scheduler",
140
- "feature_extractor",
141
- ):
142
- if not (folder / dir_name).is_dir():
143
- continue
144
- for file_path in (folder / dir_name).iterdir():
145
- if file_path.is_file():
146
- try:
147
- api.upload_file(
148
- path_or_fileobj=file_path,
149
- path_in_repo=os.path.join(dir_name, file_path.name),
150
- repo_id=new_repo_id,
151
- )
152
- except Exception as e:
153
- return f"Error uploading file {file_path}: {e}"
154
 
155
- try:
156
- card = ModelCard.load(model_id, token=oauth_token.token)
157
- except:
158
- card = ModelCard("")
159
 
160
- if card.data.tags is None:
161
- card.data.tags = []
162
- card.data.tags.append("openvino")
163
- card.data.base_model = model_id
164
- card.text = dedent(
165
- f"""
166
- This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and was exported to the OpenVINO format using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
167
-
168
- First make sure you have optimum-intel installed:
169
 
170
- ```bash
171
- pip install optimum[openvino]
172
- ```
173
 
174
- To load your model you can do as follows:
 
 
175
 
176
- ```python
177
- from optimum.intel import {auto_model_class}
178
 
179
- model_id = "{new_repo_id}"
180
- model = {auto_model_class}.from_pretrained(model_id)
181
- ```
182
- """
183
- )
184
- card_path = os.path.join(folder, "README.md")
185
- card.save(card_path)
186
 
187
- api.upload_file(
188
- path_or_fileobj=card_path,
189
- path_in_repo="README.md",
190
- repo_id=new_repo_id,
191
- )
192
- return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
193
- except Exception as e:
194
- return f"### Error: {e}"
195
- finally:
196
- shutil.rmtree(folder, ignore_errors=True)
197
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  DESCRIPTION = """
200
  This Space uses [Optimum Intel](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) to automatically apply NNCF weight only quantization on a model hosted on the [Hub](https://huggingface.co/models) and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
 
30
  from diffusers import ConfigMixin
31
 
32
  _HEAD_TO_AUTOMODELS = {
33
+ "feature-extraction": "OVModelForFeatureExtraction",
34
+ "fill-mask": "OVModelForMaskedLM",
35
+ "text-generation": "OVModelForCausalLM",
36
+ "text-classification": "OVModelForSequenceClassification",
37
+ "token-classification": "OVModelForTokenClassification",
38
+ "question-answering": "OVModelForQuestionAnswering",
39
+ "image-classification": "OVModelForImageClassification",
40
+ "audio-classification": "OVModelForAudioClassification",
41
+ "stable-diffusion": "OVStableDiffusionPipeline",
42
+ "stable-diffusion-xl": "OVStableDiffusionXLPipeline",
43
+ "latent-consistency": "OVLatentConsistencyModelPipeline",
44
  }
45
 
46
  def quantize_model(
 
58
  if not model_id:
59
  return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
60
 
61
+ try:
62
+ model_name = model_id.split("/")[-1]
63
+ username = whoami(oauth_token.token)["name"]
64
+ new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
65
+ library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
66
 
67
+ if library_name == "diffusers":
68
+ ConfigMixin.config_name = "model_index.json"
69
+ class_name = ConfigMixin.load_config(model_id, token=oauth_token.token)["_class_name"].lower()
70
+ if "xl" in class_name:
71
+ task = "stable-diffusion-xl"
72
+ elif "consistency" in class_name:
73
+ task = "latent-consistency"
74
+ else:
75
+ task = "stable-diffusion"
76
  else:
77
+ task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
 
 
78
 
79
+ if task == "text2text-generation":
80
+ return "Export of Seq2Seq models is currently disabled."
81
 
82
+ if task not in _HEAD_TO_AUTOMODELS:
83
+ return f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
84
 
85
+ auto_model_class = _HEAD_TO_AUTOMODELS[task]
86
+ ov_files = _find_files_matching_pattern(
87
+ model_id,
88
+ pattern=r"(.*)?openvino(.*)?\_model.xml",
89
+ use_auth_token=oauth_token.token,
90
+ )
91
+ export = len(ov_files) == 0
92
 
93
+ is_int8 = dtype == "int8"
94
+ if library_name == "diffusers":
95
+ quant_method = "hybrid"
96
+ elif not is_int8:
97
+ quant_method = "awq"
98
+ else:
99
+ quant_method = "default"
100
 
101
+ quantization_config = OVWeightQuantizationConfig(
102
+ bits=8 if is_int8 else 4,
103
+ quant_method=quant_method,
104
+ dataset=None if quant_method=="default" else calibration_dataset,
105
+ ratio=1.0 if is_int8 else ratio,
106
+ )
107
 
108
+ api = HfApi(token=oauth_token.token)
109
+ if api.repo_exists(new_repo_id) and not overwritte:
110
+ return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo"
111
 
112
+ with TemporaryDirectory() as d:
113
+ folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
114
+ os.makedirs(folder)
115
 
116
+ try:
117
+ api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
118
+ ov_model = eval(auto_model_class).from_pretrained(
119
+ model_id,
120
+ export=export,
121
+ cache_dir=folder,
122
+ token=oauth_token.token,
123
+ quantization_config=quantization_config
124
+ )
125
+ ov_model.save_pretrained(folder)
126
+ new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
127
+ new_repo_id = new_repo_url.repo_id
128
+ print("Repo created successfully!", new_repo_url)
129
 
130
+ folder = Path(folder)
131
+ for dir_name in (
132
+ "",
133
+ "vae_encoder",
134
+ "vae_decoder",
135
+ "text_encoder",
136
+ "text_encoder_2",
137
+ "unet",
138
+ "tokenizer",
139
+ "tokenizer_2",
140
+ "scheduler",
141
+ "feature_extractor",
142
+ ):
143
+ if not (folder / dir_name).is_dir():
144
+ continue
145
+ for file_path in (folder / dir_name).iterdir():
146
+ if file_path.is_file():
147
+ try:
148
+ api.upload_file(
149
+ path_or_fileobj=file_path,
150
+ path_in_repo=os.path.join(dir_name, file_path.name),
151
+ repo_id=new_repo_id,
152
+ )
153
+ except Exception as e:
154
+ return f"Error uploading file {file_path}: {e}"
155
 
156
+ try:
157
+ card = ModelCard.load(model_id, token=oauth_token.token)
158
+ except:
159
+ card = ModelCard("")
160
 
161
+ if card.data.tags is None:
162
+ card.data.tags = []
163
+ card.data.tags.append("openvino")
164
+ card.data.base_model = model_id
165
+ card.text = dedent(
166
+ f"""
167
+ This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and was exported to the OpenVINO format using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
 
 
168
 
169
+ First make sure you have optimum-intel installed:
 
 
170
 
171
+ ```bash
172
+ pip install optimum[openvino]
173
+ ```
174
 
175
+ To load your model you can do as follows:
 
176
 
177
+ ```python
178
+ from optimum.intel import {auto_model_class}
 
 
 
 
 
179
 
180
+ model_id = "{new_repo_id}"
181
+ model = {auto_model_class}.from_pretrained(model_id)
182
+ ```
183
+ """
184
+ )
185
+ card_path = os.path.join(folder, "README.md")
186
+ card.save(card_path)
 
 
 
187
 
188
+ api.upload_file(
189
+ path_or_fileobj=card_path,
190
+ path_in_repo="README.md",
191
+ repo_id=new_repo_id,
192
+ )
193
+ return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
194
+ finally:
195
+ shutil.rmtree(folder, ignore_errors=True)
196
+ except Exception as e:
197
+ return f"### Error: {e}"
198
 
199
  DESCRIPTION = """
200
  This Space uses [Optimum Intel](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) to automatically apply NNCF weight only quantization on a model hosted on the [Hub](https://huggingface.co/models) and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.