Spaces:

OpenVINO
/

nncf-quantization

Running

App Files Files Community

echarlaix HF staff commited on Jul 9, 2024

Commit

9506213

1 Parent(s): 93de0d3

Add demo

Browse files

Files changed (3) hide show

README.md +9 -4
app.py +202 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,11 +1,16 @@
 ---
-title: Nncf Quantization
-emoji: 🔥
-colorFrom: red
-colorTo: indigo
 sdk: gradio
 sdk_version: 4.37.2
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---

 ---
+title: OpenVINO NNCF quantization
+emoji: 🦀
+colorFrom: pink
+colorTo: blue
 sdk: gradio
 sdk_version: 4.37.2
 app_file: app.py
+hf_oauth: true
+hf_oauth_scopes:
+  - read-repos
+  - write-repos
+  - manage-repos
 pinned: false
 license: apache-2.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import os
+import shutil
+import gradio as gr
+from huggingface_hub import HfApi, whoami, ModelCard
+from gradio_huggingfacehub_search import HuggingfaceHubSearch
+from textwrap import dedent
+from tempfile import TemporaryDirectory
+from huggingface_hub.file_download import repo_folder_name
+from optimum.exporters.tasks import TasksManager
+from optimum.intel.utils.constant import _TASK_ALIASES
+from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
+from optimum.exporters import TasksManager
+from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
+from optimum.intel import (
+    OVModelForAudioClassification,
+    OVModelForCausalLM,
+    OVModelForFeatureExtraction,
+    OVModelForImageClassification,
+    OVModelForMaskedLM,
+    OVModelForQuestionAnswering,
+    OVModelForSeq2SeqLM,
+    OVModelForSequenceClassification,
+    OVModelForTokenClassification,
+    OVStableDiffusionPipeline,
+    OVStableDiffusionXLPipeline,
+    OVLatentConsistencyModelPipeline,
+    OVModelForPix2Struct,
+    OVWeightQuantizationConfig,
+)
+HF_TOKEN = os.environ.get("HF_TOKEN")
+def process_model(
+    model_id: str,
+    dtype: str,
+    private_repo: bool,
+    task: str,
+    calibration_dataset: str,
+    oauth_token: gr.OAuthToken,
+):
+    if oauth_token.token is None:
+        raise ValueError("You must be logged in to use this space")
+    model_name = model_id.split("/")[-1]
+    username = whoami(oauth_token.token)["name"]
+    new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
+    task = TasksManager.map_from_synonym(task)
+    if task == "auto":
+        try:
+            task = TasksManager.infer_task_from_model(model_id)
+        except Exception as e:
+            raise ValueError(
+                "The task could not be automatically inferred. "
+                f"Please pass explicitely the task with the relevant task from {', '.join(TasksManager.get_all_tasks())}.  {e}"
+            )
+    task = _TASK_ALIASES.get(task, task)
+    if task not in _HEAD_TO_AUTOMODELS:
+        raise ValueError(
+            f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
+        )
+    if task == "text2text-generation":
+        raise ValueError("Export of Seq2Seq models is currently disabled.")
+    auto_model_class = _HEAD_TO_AUTOMODELS[task]
+    pattern = r"(.*)?openvino(.*)?\_model.xml"
+    ov_files = _find_files_matching_pattern(
+        model_id, pattern, use_auth_token=oauth_token.token
+    )
+    export = len(ov_files) == 0
+    quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
+    api = HfApi(token=oauth_token.token)
+    with TemporaryDirectory() as d:
+        folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
+        os.makedirs(folder)
+        try:
+            api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
+            ov_model = eval(auto_model_class).from_pretrained(
+                model_id, export=export, quantization_config=quantization_config
+            )
+            ov_model.save_pretrained(folder)
+            new_repo_url = api.create_repo(
+                repo_id=new_repo_id, exist_ok=True, private=private_repo
+            )
+            new_repo_id = new_repo_url.repo_id
+            print("Repo created successfully!", new_repo_url)
+            file_names = (f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)))
+            for file in file_names:
+                file_path = os.path.join(folder, file)
+                try:
+                    api.upload_file(
+                        path_or_fileobj=file_path,
+                        path_in_repo=file,
+                        repo_id=new_repo_id,
+                    )
+                except Exception as e:
+                    raise Exception(f"Error uploading file {file_path}: {e}")
+            try:
+                card = ModelCard.load(model_id, token=oauth_token.token)
+            except:
+                card = ModelCard("")
+            if card.data.tags is None:
+                card.data.tags = []
+            card.data.tags.append("openvino")
+            card.data.base_model = model_id
+            card.text = dedent(
+                f"""
+                This model was exported to OpenVINO from [`{model_id}`](https://huggingface.co/{model_id}) using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
+                Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
+                First make sure you have optimum-intel installed:
+                ```bash
+                pip install optimum[openvino]
+                ```
+                To load your model you can do as follows:
+                ```python
+                from optimum.intel import {auto_model_class}
+                model_id = {new_repo_id}
+                model = {auto_model_class}.from_pretrained(model_id)
+                ```
+                """
+            )
+            card_path = os.path.join(folder, "README.md")
+            card.save(card_path)
+            api.upload_file(
+                path_or_fileobj=card_path,
+                path_in_repo="README.md",
+                repo_id=new_repo_id,
+            )
+            return f"Uploaded successfully with {dtype} option! Find your repo <a href='{new_repo_url}'"
+        finally:
+            shutil.rmtree(folder, ignore_errors=True)
+with gr.Blocks() as demo:
+    gr.Markdown("You must be logged in to use this space")
+    gr.LoginButton(min_width=250)
+    model_id = HuggingfaceHubSearch(
+        label="Hub Model ID",
+        placeholder="Search for model id on the hub",
+        search_type="model",
+    )
+    dtype = gr.Dropdown(
+        ["int8", "int4"],
+        value="int8",
+        label="Precision data types",
+        filterable=False,
+        visible=True,
+    )
+    private_repo = gr.Checkbox(
+        value=False,
+        label="Private Repo",
+        info="Create a private repo under your username",
+    )
+    task = gr.File(
+        value="auto",
+        label="Task : can be left to auto, will be automatically inferred",
+        max_lines=1,
+    )
+    calibration_dataset = gr.File(label="Calibration dataset", value="", visible=False)
+    interface = gr.Interface(
+        fn=process_model,
+        inputs=[
+            model_id,
+            dtype,
+            private_repo,
+            calibration_dataset,
+            task,
+        ],
+        outputs=[
+            gr.Markdown(label="output"),
+        ],
+        title="Quantize your model with OpenVINO NNCF ⚡!",
+        description="The space takes an HF repo as an input, quantize it and export it to OpenVINO, then push it to a repo under your HF user namespace.",
+        api_name=False,
+    )
+    interface.render()
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+huggingface_hub==0.23.4
+optimum[diffusers]==1.20.0
+optimum-intel[openvino]==1.18.0
+gradio[oauth]>=4.28.0
+gradio_huggingfacehub_search==0.0.6