Spaces:

varun4
/

qMTEB

Sleeping

App Files Files Community

varun4 commited on Sep 13, 2023

Commit

81e28a1

1 Parent(s): 0606100

removed unneded files

Browse files

Files changed (10) hide show

data.json +4 -4
ort.py +0 -14
ort_sentence_transformers.py +0 -44
qMTEB.yml +468 -0
quantize_bitsandbytes.py +27 -0
quantize_fx.py +41 -0
quantize_torch.py +13 -0
quantized2tf.py +32 -0
quantized2torch.py +29 -0
setup.sh +6 -8

data.json CHANGED Viewed

@@ -6,12 +6,12 @@
     86.67845249176025
   ],
   "Score": [
-    5
   ],
   "q8 Model Size (MB)": [
-    55.91230869293213
   ],
   "q8 Score": [
-    0.26228089622461903
   ]
-}

     86.67845249176025
   ],
   "Score": [
+    0.5057898558837041
   ],
   "q8 Model Size (MB)": [
+    21.908215522766113
   ],
   "q8 Score": [
+    0.48741800571641297
   ]
+}

ort.py DELETED Viewed

@@ -1,14 +0,0 @@
-from transformers import AutoTokenizer
-from optimum.onnxruntime import ORTModelForFeatureExtraction
-import torch
-tokenizer = AutoTokenizer.from_pretrained("./models/optimum/all-MiniLM-L6-v2")
-model = ORTModelForFeatureExtraction.from_pretrained("./models/optimum/all-MiniLM-L6-v2")
-inputs = tokenizer("My name is Philipp and I live in Germany.", return_tensors="np")
-outputs = model(**inputs)
-last_hidden_state = outputs.last_hidden_state
-# no mean pooling
-print(list(last_hidden_state.shape))

ort_sentence_transformers.py DELETED Viewed

@@ -1,44 +0,0 @@
-from sentence_transformers import SentenceTransformer
-from scipy.spatial import distance
-device = "mps"
-# Load the models
-# so close, and yet, .! so far ~!~ ~
-providers = [
-    ('CoreMLExecutionProvider', {
-        'device_id': 0,
-    }),
-    'CPUExecutionProvider',
-]
-model1 = SentenceTransformer('./models/optimum/all-MiniLM-L6-v2', device=device,
-                             model_args={
-                                    "providers": providers
-                                })
-print("\033[91m", model1.modules(), "\033[0m")
-# model2 = SentenceTransformer('./models/all-MiniLM-L6-v2', device=device)
-sentences = [
-    'This framework generates embeddings for each input sentence',
-    'Sentences are passed as a list of string.',
-    'The quick brown fox jumps over the lazy dog.'
-]
-# Get embeddings for each sentence from both models
-embeddings1 = model1.encode(sentences)
-# embeddings2 = model2.encode(sentences)
-# Compute and print the cosine similarity for each sentence's embeddings from the two models
-for sentence, emb1, emb2 in zip(sentences, embeddings1, range(3)):
-    sim = 1 - distance.cosine(emb1, emb2)  # Cosine similarity is the complement of cosine distance
-    print(f"Sentence: {sentence}")
-    print(f"Cosine Similarity: {sim:.4f}")
-    print("")
-# print(model2.device)
-# should be working perfectly :))

qMTEB.yml ADDED Viewed

	@@ -0,0 +1,468 @@

+name: base
+channels:
+  - pytorch-nightly
+  - anaconda
+  - conda-forge
+  - defaults
+dependencies:
+  - backports=1.0=pyhd8ed1ab_3
+  - backports.lzma=0.0.14=py39hd5dad98_3
+  - blas=1.0=mkl
+  - blosc=1.21.0=h8346a28_1
+  - brotli-bin=1.0.9=hca72f7f_7
+  - brotlipy=0.7.0=py39h9ed2024_1003
+  - brunsli=0.1=h23ab428_0
+  - bzip2=1.0.8=h1de35cc_0
+  - c-ares=1.18.1=hca72f7f_0
+  - ca-certificates=2022.10.11=hecd8cb5_0
+  - certifi=2022.12.7=py39hecd8cb5_0
+  - cffi=1.14.4=py39h2125817_0
+  - cfitsio=3.470=hbd21bf8_7
+  - chardet=3.0.4=py39hecd8cb5_1003
+  - charls=2.2.0=h23ab428_0
+  - cloudpickle=2.0.0=pyhd3eb1b0_0
+  - conda=22.11.1=py39hecd8cb5_4
+  - conda-package-handling=1.7.2=py39h9ed2024_1
+  - cryptography=38.0.4=py39hf6deb26_0
+  - dask-core=2022.7.0=py39hecd8cb5_0
+  - expat=2.5.0=hf0c8a7f_0
+  - ffmpeg=4.2.2=h97e5cf8_0
+  - fftw=3.3.9=h9ed2024_1
+  - freetype=2.12.1=hd8bbffd_0
+  - fsspec=2022.11.0=py39hecd8cb5_0
+  - gettext=0.21.0=h7535e17_0
+  - giflib=5.2.1=haf1e3a3_0
+  - gmp=6.2.1=he9d5cce_3
+  - gmpy2=2.1.2=py39hd5de756_0
+  - gnutls=3.6.15=hed9c0bf_0
+  - icu=58.2=h0a44026_3
+  - idna=2.10=py_0
+  - imagecodecs=2021.8.26=py39h0f85e6e_1
+  - intel-openmp=2021.4.0=hecd8cb5_3538
+  - jpeg=9e=hca72f7f_0
+  - jxrlib=1.1=haf1e3a3_2
+  - krb5=1.19.2=hcd88c3b_0
+  - lame=3.100=h1de35cc_0
+  - lcms2=2.12=hf1fd2bf_0
+  - lerc=3.0=he9d5cce_0
+  - libaec=1.0.4=hb1e8313_1
+  - libbrotlicommon=1.0.9=hca72f7f_7
+  - libbrotlidec=1.0.9=hca72f7f_7
+  - libbrotlienc=1.0.9=hca72f7f_7
+  - libcurl=7.86.0=ha585b31_0
+  - libcxx=14.0.6=h9765a3e_0
+  - libdeflate=1.8=h9ed2024_5
+  - libedit=3.1.20221030=h6c40b1e_0
+  - libev=4.33=h9ed2024_1
+  - libffi=3.3=hb1e8313_2
+  - libgfortran=5.0.0=11_3_0_hecd8cb5_28
+  - libgfortran5=11.3.0=h9dfd629_28
+  - libiconv=1.16=hca72f7f_2
+  - libidn2=2.3.2=h9ed2024_0
+  - libnghttp2=1.46.0=ha29bfda_0
+  - libopus=1.3.1=h1de35cc_0
+  - libpng=1.6.37=ha441bb4_0
+  - libssh2=1.10.0=h0a4fc7d_0
+  - libtasn1=4.16.0=h9ed2024_0
+  - libtiff=4.4.0=h2ef1027_0
+  - libunistring=0.9.10=h9ed2024_0
+  - libvpx=1.7.0=h378b8a2_0
+  - libwebp=1.2.4=h56c3ce4_0
+  - libwebp-base=1.2.4=hca72f7f_0
+  - libxml2=2.9.14=hbf8cd5e_0
+  - libzopfli=1.0.3=hb1e8313_0
+  - llvm-openmp=14.0.6=h0dcd299_0
+  - locket=1.0.0=py39hecd8cb5_0
+  - lz4-c=1.9.3=h23ab428_1
+  - mkl=2021.4.0=hecd8cb5_637
+  - mkl-service=2.4.0=py39h9ed2024_0
+  - mkl_fft=1.3.1=py39h4ab4a9b_0
+  - mkl_random=1.2.2=py39hb2f4e1b_0
+  - mpc=1.1.0=h6ef4df4_1
+  - mpfr=4.0.2=h9066e36_1
+  - mpmath=1.2.1=py39hecd8cb5_0
+  - ncurses=6.3=hca72f7f_3
+  - nettle=3.7.3=h230ac6f_1
+  - openh264=2.1.1=h8346a28_0
+  - openjpeg=2.4.0=h66ea3da_0
+  - openssl=1.1.1s=hca72f7f_0
+  - partd=1.2.0=pyhd3eb1b0_1
+  - pathlib2=2.3.7.post1=py39h6e9494a_2
+  - pluggy=1.0.0=py39hecd8cb5_1
+  - progress=1.5=py39hecd8cb5_0
+  - pthread-stubs=0.4=hc929b4f_1001
+  - pycosat=0.6.3=py39h9ed2024_0
+  - pycparser=2.20=py_2
+  - pyopenssl=20.0.0=pyhd3eb1b0_1
+  - pypubsub=4.0.3=py_0
+  - pysocks=1.7.1=py39hecd8cb5_0
+  - python=3.9.1=h88f2d9e_2
+  - python.app=3=py39h9ed2024_0
+  - python_abi=3.9=2_cp39
+  - pywavelets=1.4.1=py39h6c40b1e_0
+  - pyyaml=6.0=py39h6c40b1e_1
+  - readline=8.0=h1de35cc_0
+  - requests=2.28.1=py39hecd8cb5_0
+  - ruamel.yaml=0.16.12=py39h9ed2024_1
+  - ruamel.yaml.clib=0.2.6=py39hca72f7f_0
+  - ruamel_yaml=0.15.80=py39h9ed2024_0
+  - scikit-image=0.19.3=py39hcec6c5f_1
+  - six=1.15.0=py39hecd8cb5_0
+  - snappy=1.1.9=he9d5cce_0
+  - sqlite=3.33.0=hffcf06c_0
+  - sympy=1.11.1=py39hecd8cb5_0
+  - tifffile=2021.7.2=pyhd3eb1b0_2
+  - tk=8.6.10=hb0a8c7a_0
+  - torchaudio=0.14.0.dev20221214=py39_cpu
+  - torchvision=0.15.0.dev20221214=py39_cpu
+  - typing_extensions=4.3.0=py39hecd8cb5_0
+  - tzdata=2020d=h14c3975_0
+  - wheel=0.36.1=pyhd3eb1b0_0
+  - x264=1!157.20191217=h1de35cc_0
+  - xz=5.2.8=h6c40b1e_0
+  - yaml=0.2.5=haf1e3a3_0
+  - zfp=0.5.5=he9d5cce_6
+  - zlib=1.2.13=h4dc903c_0
+  - zstd=1.5.2=hcb37349_0
+  - pip:
+      - absl-py==1.4.0
+      - addict==2.4.0
+      - aiohttp==3.8.3
+      - aioice==0.7.6
+      - aiortc==1.3.2
+      - aiosignal==1.3.1
+      - antlr4-python3-runtime==4.9.3
+      - anyio==3.6.2
+      - appdirs==1.4.4
+      - appnope==0.1.3
+      - argon2-cffi==21.3.0
+      - argon2-cffi-bindings==21.2.0
+      - arrow==1.2.3
+      - asttokens==2.2.1
+      - astunparse==1.6.3
+      - async-generator==1.10
+      - async-timeout==4.0.2
+      - attrs==22.1.0
+      - audioread==3.0.0
+      - autobahn==22.7.1
+      - av==9.2.0
+      - babel==2.11.0
+      - backcall==0.2.0
+      - beautifulsoup4==4.10.0
+      - beir==2.0.0
+      - bidict==0.22.1
+      - bitstring==4.0.1
+      - bleach==5.0.1
+      - blis==0.7.4
+      - brotli==1.0.9
+      - bs4==0.0.1
+      - cachetools==4.2.4
+      - catalogue==2.0.6
+      - charset-normalizer==2.1.1
+      - click==7.1.2
+      - clip==1.0
+      - coloredlogs==15.0.1
+      - colour-science==0.4.2
+      - comm==0.1.2
+      - configargparse==1.5.3
+      - contourpy==1.0.7
+      - courlan==0.9.3
+      - cycler==0.10.0
+      - cymem==2.0.5
+      - cython==0.29.35
+      - cytoolz==0.11.0
+      - dash==2.7.1
+      - dash-core-components==2.0.0
+      - dash-html-components==2.0.0
+      - dash-table==5.0.0
+      - datasets==2.13.1
+      - dateparser==1.1.8
+      - debugpy==1.6.4
+      - decorator==4.4.2
+      - deep-learning==0.0.2
+      - defusedxml==0.7.1
+      - demucs==4.0.0
+      - descartes==1.1.0
+      - diffq==0.2.4
+      - dill==0.3.6
+      - dnspython==2.2.1
+      - docker-pycreds==0.4.0
+      - docstring-parser==0.14.1
+      - dora-search==0.1.12
+      - einops==0.6.1
+      - elasticsearch==7.9.1
+      - entrypoints==0.4
+      - exceptiongroup==1.0.0
+      - executing==1.2.0
+      - faiss-cpu==1.7.4
+      - fastjsonschema==2.16.2
+      - ffmpeg-python==0.2.0
+      - filelock==3.8.2
+      - fire==0.5.0
+      - flask==2.2.2
+      - flatbuffers==23.1.21
+      - fonttools==4.38.0
+      - fqdn==1.5.1
+      - frozendict==2.3.4
+      - frozenlist==1.3.3
+      - ftfy==6.1.1
+      - functorch==0.2.1
+      - future==0.18.3
+      - gast==0.4.0
+      - gdown==4.6.0
+      - gitdb==4.0.10
+      - gitpython==3.1.30
+      - google-auth==2.16.0
+      - google-auth-oauthlib==0.4.6
+      - google-crc32c==1.5.0
+      - google-pasta==0.2.0
+      - grpcio==1.51.1
+      - h11==0.12.0
+      - h2==4.1.0
+      - h5py==3.7.0
+      - hpack==4.0.0
+      - htmldate==1.4.3
+      - httpcore==0.13.7
+      - httpx==0.19.0
+      - huggingface-hub==0.11.1
+      - humanfriendly==10.0
+      - humanize==4.4.0
+      - hyperframe==6.0.1
+      - hyperlink==21.0.0
+      - igraph==0.10.5
+      - imageio==2.24.0
+      - importlib-metadata==5.1.0
+      - importlib-resources==6.0.0
+      - install==1.3.5
+      - instructorembedding==1.0.1
+      - ipykernel==6.19.2
+      - ipython==8.7.0
+      - ipython-genutils==0.2.0
+      - ipywidgets==8.0.3
+      - isoduration==20.11.0
+      - itsdangerous==2.1.2
+      - jedi==0.18.2
+      - jellyfish==0.8.8
+      - jinja2==3.1.2
+      - joblib==1.3.2
+      - json5==0.9.11
+      - jsonlines==3.1.0
+      - jsonpointer==2.3
+      - jsonschema==4.17.3
+      - julius==0.2.7
+      - jupyter==1.0.0
+      - jupyter-client==7.4.8
+      - jupyter-console==6.4.4
+      - jupyter-core==5.1.0
+      - jupyter-events==0.5.0
+      - jupyter-server==2.0.1
+      - jupyter-server-proxy==3.2.2
+      - jupyter-server-terminals==0.4.2
+      - jupyterlab==3.5.2
+      - jupyterlab-pygments==0.2.2
+      - jupyterlab-server==2.19.0
+      - jupyterlab-widgets==3.0.4
+      - justext==3.0.0
+      - keras==2.11.0
+      - kiwisolver==1.3.1
+      - lameenc==1.4.2
+      - langcodes==3.3.0
+      - libclang==15.0.6.1
+      - librosa==0.8.1
+      - llvmlite==0.38.1
+      - lpips==0.1.4
+      - lxml==4.9.3
+      - markdown==3.4.1
+      - markdown-it-py==2.1.0
+      - markupsafe==2.1.2
+      - matplotlib==3.7.2
+      - matplotlib-inline==0.1.6
+      - mdurl==0.1.2
+      - mediapy==1.1.4
+      - mistune==2.0.4
+      - msgpack==1.0.4
+      - msgpack-numpy==0.4.8
+      - mteb==0.0.2
+      - multidict==6.0.3
+      - multiprocess==0.70.14
+      - murmurhash==1.0.5
+      - mutagen==1.46.0
+      - mypy==0.991
+      - mypy-extensions==0.4.3
+      - nbclassic==0.4.8
+      - nbclient==0.7.2
+      - nbconvert==7.2.6
+      - nbformat==5.5.0
+      - nerfacc==0.3.3
+      - nerfstudio==0.1.15
+      - nest-asyncio==1.5.6
+      - netifaces==0.11.0
+      - networkx==2.5.1
+      - ninja==1.11.1
+      - nltk==3.6.5
+      - norbert==0.2.1
+      - notebook==6.5.2
+      - notebook-shim==0.2.2
+      - numba==0.55.2
+      - numpy==1.22.4
+      - nuscenes-devkit==1.1.9
+      - oauthlib==3.2.2
+      - omegaconf==2.3.0
+      - onnxruntime==1.15.1
+      - open3d==0.16.1
+      - openai==0.27.0
+      - opencv-python==4.6.0.66
+      - openunmix==1.2.1
+      - opt-einsum==3.3.0
+      - outcome==1.2.0
+      - packaging==21.0
+      - pandas==1.3.4
+      - pandocfilters==1.5.0
+      - parso==0.8.3
+      - pathtools==0.1.2
+      - pathy==0.6.0
+      - pexpect==4.8.0
+      - pickleshare==0.7.5
+      - pillow==8.2.0
+      - pip==22.3.1
+      - platformdirs==2.6.0
+      - plotly==5.12.0
+      - pooch==1.7.0
+      - preshed==3.0.5
+      - prometheus-client==0.15.0
+      - prompt-toolkit==3.0.36
+      - protobuf==3.19.6
+      - psutil==5.9.4
+      - ptyprocess==0.7.0
+      - pure-eval==0.2.2
+      - pyarrow==12.0.1
+      - pyasn1==0.4.8
+      - pyasn1-modules==0.2.8
+      - pyaudio==0.2.13
+      - pybind11==2.10.3
+      - pycairo==1.24.0
+      - pycocotools==2.0.6
+      - pycryptodomex==3.18.0
+      - pydantic==1.8.2
+      - pyee==9.0.4
+      - pygments==2.13.0
+      - pylibsrtp==0.7.1
+      - pymeshlab==2022.2.post2
+      - pyngrok==5.2.1
+      - pyopengl==3.1.6
+      - pyopengl-accelerate==3.1.5
+      - pypandoc==1.10
+      - pyparsing==2.4.7
+      - pyphen==0.11.0
+      - pyquaternion==0.9.9
+      - pyrsistent==0.19.2
+      - pysimplegui==4.60.5
+      - python-dateutil==2.8.2
+      - python-engineio==4.3.4
+      - python-igraph==0.10.5
+      - python-json-logger==2.0.4
+      - python-socketio==5.7.2
+      - pytrec-eval==0.5
+      - pytube==15.0.0
+      - pytz==2021.3
+      - pyzmq==24.0.1
+      - qtconsole==5.4.0
+      - qtpy==2.3.0
+      - regex==2021.10.8
+      - requests-oauthlib==1.3.1
+      - resampy==0.4.2
+      - retrying==1.3.4
+      - rfc3339-validator==0.1.4
+      - rfc3986==1.5.0
+      - rfc3986-validator==0.1.1
+      - rich==13.2.0
+      - rsa==4.9
+      - scikit-learn==1.3.0
+      - scipy==1.7.1
+      - seaborn==0.11.2
+      - segtok==1.5.10
+      - selenium==4.5.0
+      - send2trash==1.8.0
+      - sentence-transformers==2.2.1
+      - sentencepiece==0.1.99
+      - sentry-sdk==1.13.0
+      - setproctitle==1.3.2
+      - setuptools==66.1.1
+      - shapely==2.0.0
+      - shtab==1.5.8
+      - simpervisor==0.4
+      - smart-open==5.2.1
+      - smmap==5.0.0
+      - sniffio==1.3.0
+      - sortedcontainers==2.4.0
+      - sounddevice==0.4.6
+      - soundfile==0.12.1
+      - soupsieve==2.3.1
+      - spacy==3.1.3
+      - spacy-legacy==3.0.8
+      - spleeter==2.3.2
+      - srsly==2.4.1
+      - stack-data==0.6.2
+      - stockfish==3.28.0
+      - submitit==1.4.5
+      - tabulate==0.8.9
+      - tenacity==8.1.0
+      - tensorboard==2.11.2
+      - tensorboard-data-server==0.6.1
+      - tensorboard-plugin-wit==1.8.1
+      - tensorflow==2.11.0
+      - tensorflow-estimator==2.11.0
+      - tensorflow-io-gcs-filesystem==0.30.0
+      - termcolor==2.1.1
+      - terminado==0.17.1
+      - textacy==0.11.0
+      - texttable==1.6.7
+      - thinc==8.0.10
+      - threadpoolctl==3.0.0
+      - timm==0.6.12
+      - tinycss2==1.2.1
+      - tld==0.13
+      - tokenizers==0.13.2
+      - tomli==2.0.1
+      - toolz==0.11.1
+      - torch==1.12.1
+      - torch-fidelity==0.3.0
+      - torchmetrics==0.11.0
+      - torchtyping==0.1.4
+      - tornado==6.2
+      - tqdm==4.65.0
+      - trafilatura==1.6.1
+      - traitlets==5.7.1
+      - transformers==4.26.0
+      - treetable==0.2.5
+      - trio==0.22.0
+      - trio-websocket==0.9.2
+      - txaio==22.2.1
+      - typeguard==2.13.3
+      - typer==0.3.2
+      - typing-extensions==3.10.0.2
+      - tyro==0.3.37
+      - tzlocal==5.0.1
+      - u-msgpack-python==2.7.2
+      - uri-template==1.2.0
+      - urllib3==1.26.12
+      - vpython==7.6.4
+      - wandb==0.13.9
+      - wasabi==0.8.2
+      - wcwidth==0.2.5
+      - webcolors==1.12
+      - webencodings==0.5.1
+      - websocket-client==1.4.2
+      - websockets==11.0.3
+      - werkzeug==2.2.2
+      - widgetsnbextension==4.0.4
+      - wrapt==1.14.1
+      - wsproto==1.2.0
+      - wxpython==4.2.0
+      - xatlas==0.0.7
+      - xxhash==3.2.0
+      - yake==0.4.8
+      - yarl==1.8.2
+      - yt-dlp==2023.7.6
+      - zipp==3.11.0
+prefix: /opt/miniconda3

quantize_bitsandbytes.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os
+from transformers import AutoModel
+from accelerate import Accelerator, init_empty_weights
+from accelerate.utils import BnbQuantizationConfig, load_and_quantize_model
+# Make sure transformers works offline
+os.environ["TRANSFORMERS_OFFLINE"] = "1"
+# 1. Initialize the empty model
+model_fp32 = AutoModel.from_pretrained("./models/all-MiniLM-L6-v2")
+with init_empty_weights():
+    empty_model = model_fp32
+# 2. Get the path to the weights of your model. For now, we'll assume it's in the same folder.
+weights_location = "./models/all-MiniLM-L6-v2-unquantized/pytorch_model.bin"
+# 3. Set quantization configuration (8-bit for this example)
+bnb_quantization_config = BnbQuantizationConfig(load_in_8bit=True, llm_int8_threshold=6)
+# 4. Quantize the empty model
+quantized_model = load_and_quantize_model(empty_model, weights_location=weights_location,
+                                          bnb_quantization_config=bnb_quantization_config, device_map="auto")
+# 5. Save the quantized model
+accelerator = Accelerator()
+new_weights_location = "./models/all-MiniLM-L6-v2-unquantized-q8"
+accelerator.save_model(quantized_model, new_weights_location)

quantize_fx.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+from torch.ao.quantization import quantize_dynamic
+from optimum.fx.optimization import Transformation
+from transformers import AutoModel, AutoTokenizer
+from transformers.utils.fx import symbolic_trace
+# Define the Dynamic Quantization Transformation
+class DynamicQuantization(Transformation):
+    def __init__(self, dtype=torch.qint8, qconfig_spec=None, mapping=None):
+        super().__init__()
+        self.dtype = dtype
+        self.qconfig_spec = qconfig_spec
+        self.mapping = mapping
+    def transform(self, graph_module):
+        # Use torch's quantize_dynamic function to quantize the module
+        quantized_module = quantize_dynamic(
+            graph_module, qconfig_spec=self.qconfig_spec, dtype=self.dtype, mapping=self.mapping, inplace=False
+        )
+        return quantized_module
+# Load the model
+model_path = "./models/all-MiniLM-L6-v2"
+model = AutoModel.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+# Symbolically trace the model
+# Note: For certain models, you might need to modify the input_names
+input_names = ["input_ids", "attention_mask"]
+traced_model = symbolic_trace(model, input_names=input_names)
+# Apply dynamic quantization
+transformation = DynamicQuantization(dtype=torch.qint8)
+quantized_model = transformation(traced_model)
+print(type(quantized_model.))
+#
+# # Save the quantized model
+# quantized_model_path = "./models/all-MiniLM-L6-v2-unquantized-q8/"
+# quantized_model.save(quantized_model_path)
+# tokenizer.save_pretrained(quantized_model_path)  # Save the tokenizer as well

quantize_torch.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import torch
+from transformers import AutoModel
+import os
+os.environ["TRANSFORMERS_OFFLINE"] = "1"  # 1 for offline
+model_fp32 = AutoModel.from_pretrained("./models/all-MiniLM-L6-v2")
+model_int8 = torch.ao.quantization.quantize_dynamic(
+    model_fp32,  # the original model
+    {torch.nn.Linear},  # a set of layers to dynamically quantize
+    dtype=torch.float16)
+torch.save(model_int8.state_dict(), "./models/all-MiniLM-L6-v2-unquantized-q16/pytorch_model.bin")

quantized2tf.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import sys
+import os
+import onnx
+from onnx_tf.backend import prepare
+def onnx_to_torch_converter(dir_name):
+    if not os.path.exists(dir_name):
+        print(f"Directory {dir_name} does not exist!")
+        return
+    onnx_model_path = os.path.join(dir_name, "onnx", "model.onnx")
+    if not os.path.exists(onnx_model_path):
+        print(f"ONNX model at {onnx_model_path} does not exist!")
+        return
+    onnx_model = onnx.load(onnx_model_path)
+    tf_rep = prepare(onnx_model)  # prepare tf representation
+    tf_model_save_path = os.path.join(dir_name, "tf_model")
+    tf_rep.export_graph(tf_model_save_path)  # export the model
+    print(f"PyTorch model saved at {tf_model_save_path}")
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python onnx2torch.py [directory_path]")
+    else:
+        dir_name = sys.argv[1]
+        onnx_to_torch_converter(dir_name)

quantized2torch.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import sys
+import os
+from onnx2torch import convert
+import torch
+def onnx_to_torch_converter(dir_name):
+    if not os.path.exists(dir_name):
+        print(f"Directory {dir_name} does not exist!")
+        return
+    onnx_model_path = os.path.join(dir_name, "onnx", "model.onnx")
+    if not os.path.exists(onnx_model_path):
+        print(f"ONNX model at {onnx_model_path} does not exist!")
+        return
+    torch_model = convert(onnx_model_path)
+    torch_model_save_path = os.path.join(dir_name, "pytorch_model.bin")
+    torch.save(torch_model.state_dict(), torch_model_save_path)
+    print(f"PyTorch model saved at {torch_model_save_path}")
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python onnx2torch.py [directory_path]")
+    else:
+        dir_name = sys.argv[1]
+        onnx_to_torch_converter(dir_name)

setup.sh CHANGED Viewed

@@ -5,19 +5,17 @@ conda create --name qMTEB python=3.9 -y
 source activate qMTEB
 conda install -c intel openmp
 conda install nomkl
-conda install pytorch torchvision -c pytorch
-conda install -c conda-forge sentence-transformers
-conda install -c huggingface transformers
 pip install mteb
-rm -rf results/
 source link.sh
 echo "Setup completed!"

 source activate qMTEB
 conda install -c intel openmp
 conda install nomkl
+pip install torch torchvision torchaudio
+pip install  -e /Users/varun/documents/python/embeddings/sentence-transformers
 pip install mteb
+pip install onnxruntime-silicon
+python -m pip install "optimum[onnxruntime]@git+https://github.com/huggingface/optimum.git"
 source link.sh
+source activate qMTEB
 echo "Setup completed!"