echarlaix HF staff commited on
Commit
9506213
·
1 Parent(s): 93de0d3
Files changed (3) hide show
  1. README.md +9 -4
  2. app.py +202 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,11 +1,16 @@
1
  ---
2
- title: Nncf Quantization
3
- emoji: 🔥
4
- colorFrom: red
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.37.2
8
  app_file: app.py
 
 
 
 
 
9
  pinned: false
10
  license: apache-2.0
11
  ---
 
1
  ---
2
+ title: OpenVINO NNCF quantization
3
+ emoji: 🦀
4
+ colorFrom: pink
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.37.2
8
  app_file: app.py
9
+ hf_oauth: true
10
+ hf_oauth_scopes:
11
+ - read-repos
12
+ - write-repos
13
+ - manage-repos
14
  pinned: false
15
  license: apache-2.0
16
  ---
app.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import gradio as gr
4
+ from huggingface_hub import HfApi, whoami, ModelCard
5
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
6
+ from textwrap import dedent
7
+
8
+
9
+ from tempfile import TemporaryDirectory
10
+
11
+ from huggingface_hub.file_download import repo_folder_name
12
+ from optimum.exporters.tasks import TasksManager
13
+ from optimum.intel.utils.constant import _TASK_ALIASES
14
+ from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
15
+ from optimum.exporters import TasksManager
16
+
17
+ from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
18
+ from optimum.intel import (
19
+ OVModelForAudioClassification,
20
+ OVModelForCausalLM,
21
+ OVModelForFeatureExtraction,
22
+ OVModelForImageClassification,
23
+ OVModelForMaskedLM,
24
+ OVModelForQuestionAnswering,
25
+ OVModelForSeq2SeqLM,
26
+ OVModelForSequenceClassification,
27
+ OVModelForTokenClassification,
28
+ OVStableDiffusionPipeline,
29
+ OVStableDiffusionXLPipeline,
30
+ OVLatentConsistencyModelPipeline,
31
+ OVModelForPix2Struct,
32
+ OVWeightQuantizationConfig,
33
+ )
34
+
35
+ HF_TOKEN = os.environ.get("HF_TOKEN")
36
+
37
+
38
+ def process_model(
39
+ model_id: str,
40
+ dtype: str,
41
+ private_repo: bool,
42
+ task: str,
43
+ calibration_dataset: str,
44
+ oauth_token: gr.OAuthToken,
45
+ ):
46
+ if oauth_token.token is None:
47
+ raise ValueError("You must be logged in to use this space")
48
+
49
+ model_name = model_id.split("/")[-1]
50
+ username = whoami(oauth_token.token)["name"]
51
+ new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
52
+
53
+ task = TasksManager.map_from_synonym(task)
54
+ if task == "auto":
55
+ try:
56
+ task = TasksManager.infer_task_from_model(model_id)
57
+ except Exception as e:
58
+ raise ValueError(
59
+ "The task could not be automatically inferred. "
60
+ f"Please pass explicitely the task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. {e}"
61
+ )
62
+
63
+ task = _TASK_ALIASES.get(task, task)
64
+ if task not in _HEAD_TO_AUTOMODELS:
65
+ raise ValueError(
66
+ f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
67
+ )
68
+
69
+ if task == "text2text-generation":
70
+ raise ValueError("Export of Seq2Seq models is currently disabled.")
71
+
72
+ auto_model_class = _HEAD_TO_AUTOMODELS[task]
73
+ pattern = r"(.*)?openvino(.*)?\_model.xml"
74
+ ov_files = _find_files_matching_pattern(
75
+ model_id, pattern, use_auth_token=oauth_token.token
76
+ )
77
+ export = len(ov_files) == 0
78
+ quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
79
+ api = HfApi(token=oauth_token.token)
80
+
81
+ with TemporaryDirectory() as d:
82
+ folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
83
+ os.makedirs(folder)
84
+ try:
85
+ api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
86
+
87
+ ov_model = eval(auto_model_class).from_pretrained(
88
+ model_id, export=export, quantization_config=quantization_config
89
+ )
90
+ ov_model.save_pretrained(folder)
91
+
92
+ new_repo_url = api.create_repo(
93
+ repo_id=new_repo_id, exist_ok=True, private=private_repo
94
+ )
95
+ new_repo_id = new_repo_url.repo_id
96
+ print("Repo created successfully!", new_repo_url)
97
+
98
+ file_names = (f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)))
99
+
100
+ for file in file_names:
101
+ file_path = os.path.join(folder, file)
102
+ try:
103
+ api.upload_file(
104
+ path_or_fileobj=file_path,
105
+ path_in_repo=file,
106
+ repo_id=new_repo_id,
107
+ )
108
+
109
+ except Exception as e:
110
+ raise Exception(f"Error uploading file {file_path}: {e}")
111
+
112
+ try:
113
+ card = ModelCard.load(model_id, token=oauth_token.token)
114
+ except:
115
+ card = ModelCard("")
116
+
117
+ if card.data.tags is None:
118
+ card.data.tags = []
119
+ card.data.tags.append("openvino")
120
+ card.data.base_model = model_id
121
+ card.text = dedent(
122
+ f"""
123
+ This model was exported to OpenVINO from [`{model_id}`](https://huggingface.co/{model_id}) using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
124
+
125
+ Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
126
+
127
+ First make sure you have optimum-intel installed:
128
+
129
+ ```bash
130
+ pip install optimum[openvino]
131
+ ```
132
+
133
+ To load your model you can do as follows:
134
+
135
+ ```python
136
+ from optimum.intel import {auto_model_class}
137
+
138
+ model_id = {new_repo_id}
139
+ model = {auto_model_class}.from_pretrained(model_id)
140
+ ```
141
+ """
142
+ )
143
+ card_path = os.path.join(folder, "README.md")
144
+ card.save(card_path)
145
+
146
+ api.upload_file(
147
+ path_or_fileobj=card_path,
148
+ path_in_repo="README.md",
149
+ repo_id=new_repo_id,
150
+ )
151
+ return f"Uploaded successfully with {dtype} option! Find your repo <a href='{new_repo_url}'"
152
+ finally:
153
+ shutil.rmtree(folder, ignore_errors=True)
154
+
155
+
156
+ with gr.Blocks() as demo:
157
+ gr.Markdown("You must be logged in to use this space")
158
+ gr.LoginButton(min_width=250)
159
+
160
+ model_id = HuggingfaceHubSearch(
161
+ label="Hub Model ID",
162
+ placeholder="Search for model id on the hub",
163
+ search_type="model",
164
+ )
165
+ dtype = gr.Dropdown(
166
+ ["int8", "int4"],
167
+ value="int8",
168
+ label="Precision data types",
169
+ filterable=False,
170
+ visible=True,
171
+ )
172
+ private_repo = gr.Checkbox(
173
+ value=False,
174
+ label="Private Repo",
175
+ info="Create a private repo under your username",
176
+ )
177
+ task = gr.File(
178
+ value="auto",
179
+ label="Task : can be left to auto, will be automatically inferred",
180
+ max_lines=1,
181
+ )
182
+ calibration_dataset = gr.File(label="Calibration dataset", value="", visible=False)
183
+ interface = gr.Interface(
184
+ fn=process_model,
185
+ inputs=[
186
+ model_id,
187
+ dtype,
188
+ private_repo,
189
+ calibration_dataset,
190
+ task,
191
+ ],
192
+ outputs=[
193
+ gr.Markdown(label="output"),
194
+ ],
195
+ title="Quantize your model with OpenVINO NNCF ⚡!",
196
+ description="The space takes an HF repo as an input, quantize it and export it to OpenVINO, then push it to a repo under your HF user namespace.",
197
+ api_name=False,
198
+ )
199
+
200
+ interface.render()
201
+
202
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ huggingface_hub==0.23.4
2
+ optimum[diffusers]==1.20.0
3
+ optimum-intel[openvino]==1.18.0
4
+ gradio[oauth]>=4.28.0
5
+ gradio_huggingfacehub_search==0.0.6