Spaces:

OpenVINO
/

nncf-quantization

Running

App Files Files Community

echarlaix HF Staff commited on Jul 21, 2024

Commit

f5301ba

1 Parent(s): edf5256

fix

Browse files

Files changed (1) hide show

app.py +129 -129

app.py CHANGED Viewed

@@ -30,17 +30,17 @@ from optimum.intel import (
 from diffusers import ConfigMixin
 _HEAD_TO_AUTOMODELS = {
-    "feature-extraction": OVModelForFeatureExtraction,
-    "fill-mask": OVModelForMaskedLM,
-    "text-generation": OVModelForCausalLM,
-    "text-classification": OVModelForSequenceClassification,
-    "token-classification": OVModelForTokenClassification,
-    "question-answering": OVModelForQuestionAnswering,
-    "image-classification": OVModelForImageClassification,
-    "audio-classification": OVModelForAudioClassification,
-    "stable-diffusion": OVStableDiffusionPipeline,
-    "stable-diffusion-xl": OVStableDiffusionXLPipeline,
-    "latent-consistency": OVLatentConsistencyModelPipeline,
 }
 def quantize_model(
@@ -58,143 +58,143 @@ def quantize_model(
     if not model_id:
         return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
-    model_name = model_id.split("/")[-1]
-    username = whoami(oauth_token.token)["name"]
-    new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
-    library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
-    if library_name == "diffusers":
-        ConfigMixin.config_name = "model_index.json"
-        class_name = ConfigMixin.load_config(model_id, token=oauth_token.token)["_class_name"].lower()
-        if "xl" in class_name:
-            task = "stable-diffusion-xl"
-        elif "consistency" in class_name:
-            task = "latent-consistency"
         else:
-            task = "stable-diffusion"
-    else:
-        task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
-    if task == "text2text-generation":
-        return "Export of Seq2Seq models is currently disabled."
-    if task not in _HEAD_TO_AUTOMODELS:
-        return f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
-    auto_model_class = _HEAD_TO_AUTOMODELS[task]
-    ov_files = _find_files_matching_pattern(
-        model_id,
-        pattern=r"(.*)?openvino(.*)?\_model.xml",
-        use_auth_token=oauth_token.token,
-    )
-    export = len(ov_files) == 0
-    is_int8 = dtype == "int8"
-    if library_name == "diffusers":
-        quant_method = "hybrid"
-    elif not is_int8:
-        quant_method = "awq"
-    else:
-        quant_method = "default"
-    quantization_config = OVWeightQuantizationConfig(
-        bits=8 if is_int8 else 4,
-        quant_method=quant_method,
-        dataset=None if quant_method=="default" else calibration_dataset,
-        ratio=1.0 if is_int8 else ratio,
-    )
-    api = HfApi(token=oauth_token.token)
-    if api.repo_exists(new_repo_id) and not overwritte:
-        return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo"
-    with TemporaryDirectory() as d:
-        folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
-        os.makedirs(folder)
-        try:
-            api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
-            ov_model = eval(auto_model_class).from_pretrained(
-                model_id,
-                export=export,
-                cache_dir=folder,
-                token=oauth_token.token,
-                quantization_config=quantization_config
-            )
-            ov_model.save_pretrained(folder)
-            new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
-            new_repo_id = new_repo_url.repo_id
-            print("Repo created successfully!", new_repo_url)
-            folder = Path(folder)
-            for dir_name in (
-                "",
-                "vae_encoder",
-                "vae_decoder",
-                "text_encoder",
-                "text_encoder_2",
-                "unet",
-                "tokenizer",
-                "tokenizer_2",
-                "scheduler",
-                "feature_extractor",
-            ):
-                if not (folder / dir_name).is_dir():
-                    continue
-                for file_path in (folder / dir_name).iterdir():
-                    if file_path.is_file():
-                        try:
-                            api.upload_file(
-                                path_or_fileobj=file_path,
-                                path_in_repo=os.path.join(dir_name, file_path.name),
-                                repo_id=new_repo_id,
-                            )
-                        except Exception as e:
-                            return f"Error uploading file {file_path}: {e}"
-            try:
-                card = ModelCard.load(model_id, token=oauth_token.token)
-            except:
-                card = ModelCard("")
-            if card.data.tags is None:
-                card.data.tags = []
-            card.data.tags.append("openvino")
-            card.data.base_model = model_id
-            card.text = dedent(
-                f"""
-                This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and was exported to the OpenVINO format using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
-                First make sure you have optimum-intel installed:
-                ```bash
-                pip install optimum[openvino]
-                ```
-                To load your model you can do as follows:
-                ```python
-                from optimum.intel import {auto_model_class}
-                model_id = "{new_repo_id}"
-                model = {auto_model_class}.from_pretrained(model_id)
-                ```
-                """
-            )
-            card_path = os.path.join(folder, "README.md")
-            card.save(card_path)
-            api.upload_file(
-                path_or_fileobj=card_path,
-                path_in_repo="README.md",
-                repo_id=new_repo_id,
-            )
-            return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
-        except Exception as e:
-            return f"### Error: {e}"
-        finally:
-            shutil.rmtree(folder, ignore_errors=True)
 DESCRIPTION = """
 This Space uses [Optimum Intel](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) to automatically apply NNCF weight only quantization on a model hosted on the [Hub](https://huggingface.co/models) and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.

 from diffusers import ConfigMixin
 _HEAD_TO_AUTOMODELS = {
+    "feature-extraction": "OVModelForFeatureExtraction",
+    "fill-mask": "OVModelForMaskedLM",
+    "text-generation": "OVModelForCausalLM",
+    "text-classification": "OVModelForSequenceClassification",
+    "token-classification": "OVModelForTokenClassification",
+    "question-answering": "OVModelForQuestionAnswering",
+    "image-classification": "OVModelForImageClassification",
+    "audio-classification": "OVModelForAudioClassification",
+    "stable-diffusion": "OVStableDiffusionPipeline",
+    "stable-diffusion-xl": "OVStableDiffusionXLPipeline",
+    "latent-consistency": "OVLatentConsistencyModelPipeline",
 }
 def quantize_model(
     if not model_id:
         return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
+    try:
+        model_name = model_id.split("/")[-1]
+        username = whoami(oauth_token.token)["name"]
+        new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
+        library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
+        if library_name == "diffusers":
+            ConfigMixin.config_name = "model_index.json"
+            class_name = ConfigMixin.load_config(model_id, token=oauth_token.token)["_class_name"].lower()
+            if "xl" in class_name:
+                task = "stable-diffusion-xl"
+            elif "consistency" in class_name:
+                task = "latent-consistency"
+            else:
+                task = "stable-diffusion"
         else:
+            task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
+        if task == "text2text-generation":
+            return "Export of Seq2Seq models is currently disabled."
+        if task not in _HEAD_TO_AUTOMODELS:
+            return f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
+        auto_model_class = _HEAD_TO_AUTOMODELS[task]
+        ov_files = _find_files_matching_pattern(
+            model_id,
+            pattern=r"(.*)?openvino(.*)?\_model.xml",
+            use_auth_token=oauth_token.token,
+        )
+        export = len(ov_files) == 0
+        is_int8 = dtype == "int8"
+        if library_name == "diffusers":
+            quant_method = "hybrid"
+        elif not is_int8:
+            quant_method = "awq"
+        else:
+            quant_method = "default"
+        quantization_config = OVWeightQuantizationConfig(
+            bits=8 if is_int8 else 4,
+            quant_method=quant_method,
+            dataset=None if quant_method=="default" else calibration_dataset,
+            ratio=1.0 if is_int8 else ratio,
+        )
+        api = HfApi(token=oauth_token.token)
+        if api.repo_exists(new_repo_id) and not overwritte:
+            return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo"
+        with TemporaryDirectory() as d:
+            folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
+            os.makedirs(folder)
+            try:
+                api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
+                ov_model = eval(auto_model_class).from_pretrained(
+                    model_id,
+                    export=export,
+                    cache_dir=folder,
+                    token=oauth_token.token,
+                    quantization_config=quantization_config
+                )
+                ov_model.save_pretrained(folder)
+                new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
+                new_repo_id = new_repo_url.repo_id
+                print("Repo created successfully!", new_repo_url)
+                folder = Path(folder)
+                for dir_name in (
+                    "",
+                    "vae_encoder",
+                    "vae_decoder",
+                    "text_encoder",
+                    "text_encoder_2",
+                    "unet",
+                    "tokenizer",
+                    "tokenizer_2",
+                    "scheduler",
+                    "feature_extractor",
+                ):
+                    if not (folder / dir_name).is_dir():
+                        continue
+                    for file_path in (folder / dir_name).iterdir():
+                        if file_path.is_file():
+                            try:
+                                api.upload_file(
+                                    path_or_fileobj=file_path,
+                                    path_in_repo=os.path.join(dir_name, file_path.name),
+                                    repo_id=new_repo_id,
+                                )
+                            except Exception as e:
+                                return f"Error uploading file {file_path}: {e}"
+                try:
+                    card = ModelCard.load(model_id, token=oauth_token.token)
+                except:
+                    card = ModelCard("")
+                if card.data.tags is None:
+                    card.data.tags = []
+                card.data.tags.append("openvino")
+                card.data.base_model = model_id
+                card.text = dedent(
+                    f"""
+                    This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and was exported to the OpenVINO format using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
+                    First make sure you have optimum-intel installed:
+                    ```bash
+                    pip install optimum[openvino]
+                    ```
+                    To load your model you can do as follows:
+                    ```python
+                    from optimum.intel import {auto_model_class}
+                    model_id = "{new_repo_id}"
+                    model = {auto_model_class}.from_pretrained(model_id)
+                    ```
+                    """
+                )
+                card_path = os.path.join(folder, "README.md")
+                card.save(card_path)
+                api.upload_file(
+                    path_or_fileobj=card_path,
+                    path_in_repo="README.md",
+                    repo_id=new_repo_id,
+                )
+                return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
+            finally:
+                shutil.rmtree(folder, ignore_errors=True)
+    except Exception as e:
+        return f"### Error: {e}"
 DESCRIPTION = """
 This Space uses [Optimum Intel](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) to automatically apply NNCF weight only quantization on a model hosted on the [Hub](https://huggingface.co/models) and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.