removed unneded files
Browse files- data.json +4 -4
- ort.py +0 -14
- ort_sentence_transformers.py +0 -44
- qMTEB.yml +468 -0
- quantize_bitsandbytes.py +27 -0
- quantize_fx.py +41 -0
- quantize_torch.py +13 -0
- quantized2tf.py +32 -0
- quantized2torch.py +29 -0
- setup.sh +6 -8
data.json
CHANGED
@@ -6,12 +6,12 @@
|
|
6 |
86.67845249176025
|
7 |
],
|
8 |
"Score": [
|
9 |
-
|
10 |
],
|
11 |
"q8 Model Size (MB)": [
|
12 |
-
|
13 |
],
|
14 |
"q8 Score": [
|
15 |
-
0.
|
16 |
]
|
17 |
-
}
|
|
|
6 |
86.67845249176025
|
7 |
],
|
8 |
"Score": [
|
9 |
+
0.5057898558837041
|
10 |
],
|
11 |
"q8 Model Size (MB)": [
|
12 |
+
21.908215522766113
|
13 |
],
|
14 |
"q8 Score": [
|
15 |
+
0.48741800571641297
|
16 |
]
|
17 |
+
}
|
ort.py
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
from transformers import AutoTokenizer
|
2 |
-
from optimum.onnxruntime import ORTModelForFeatureExtraction
|
3 |
-
import torch
|
4 |
-
|
5 |
-
tokenizer = AutoTokenizer.from_pretrained("./models/optimum/all-MiniLM-L6-v2")
|
6 |
-
model = ORTModelForFeatureExtraction.from_pretrained("./models/optimum/all-MiniLM-L6-v2")
|
7 |
-
|
8 |
-
inputs = tokenizer("My name is Philipp and I live in Germany.", return_tensors="np")
|
9 |
-
|
10 |
-
outputs = model(**inputs)
|
11 |
-
last_hidden_state = outputs.last_hidden_state
|
12 |
-
|
13 |
-
# no mean pooling
|
14 |
-
print(list(last_hidden_state.shape))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ort_sentence_transformers.py
DELETED
@@ -1,44 +0,0 @@
|
|
1 |
-
from sentence_transformers import SentenceTransformer
|
2 |
-
from scipy.spatial import distance
|
3 |
-
|
4 |
-
|
5 |
-
device = "mps"
|
6 |
-
# Load the models
|
7 |
-
# so close, and yet, .! so far ~!~ ~
|
8 |
-
providers = [
|
9 |
-
('CoreMLExecutionProvider', {
|
10 |
-
'device_id': 0,
|
11 |
-
}),
|
12 |
-
'CPUExecutionProvider',
|
13 |
-
]
|
14 |
-
|
15 |
-
model1 = SentenceTransformer('./models/optimum/all-MiniLM-L6-v2', device=device,
|
16 |
-
model_args={
|
17 |
-
"providers": providers
|
18 |
-
})
|
19 |
-
print("\033[91m", model1.modules(), "\033[0m")
|
20 |
-
# model2 = SentenceTransformer('./models/all-MiniLM-L6-v2', device=device)
|
21 |
-
|
22 |
-
sentences = [
|
23 |
-
'This framework generates embeddings for each input sentence',
|
24 |
-
'Sentences are passed as a list of string.',
|
25 |
-
'The quick brown fox jumps over the lazy dog.'
|
26 |
-
]
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
# Get embeddings for each sentence from both models
|
31 |
-
embeddings1 = model1.encode(sentences)
|
32 |
-
# embeddings2 = model2.encode(sentences)
|
33 |
-
|
34 |
-
|
35 |
-
# Compute and print the cosine similarity for each sentence's embeddings from the two models
|
36 |
-
for sentence, emb1, emb2 in zip(sentences, embeddings1, range(3)):
|
37 |
-
sim = 1 - distance.cosine(emb1, emb2) # Cosine similarity is the complement of cosine distance
|
38 |
-
print(f"Sentence: {sentence}")
|
39 |
-
print(f"Cosine Similarity: {sim:.4f}")
|
40 |
-
print("")
|
41 |
-
|
42 |
-
# print(model2.device)
|
43 |
-
|
44 |
-
# should be working perfectly :))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
qMTEB.yml
ADDED
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: base
|
2 |
+
channels:
|
3 |
+
- pytorch-nightly
|
4 |
+
- anaconda
|
5 |
+
- conda-forge
|
6 |
+
- defaults
|
7 |
+
dependencies:
|
8 |
+
- backports=1.0=pyhd8ed1ab_3
|
9 |
+
- backports.lzma=0.0.14=py39hd5dad98_3
|
10 |
+
- blas=1.0=mkl
|
11 |
+
- blosc=1.21.0=h8346a28_1
|
12 |
+
- brotli-bin=1.0.9=hca72f7f_7
|
13 |
+
- brotlipy=0.7.0=py39h9ed2024_1003
|
14 |
+
- brunsli=0.1=h23ab428_0
|
15 |
+
- bzip2=1.0.8=h1de35cc_0
|
16 |
+
- c-ares=1.18.1=hca72f7f_0
|
17 |
+
- ca-certificates=2022.10.11=hecd8cb5_0
|
18 |
+
- certifi=2022.12.7=py39hecd8cb5_0
|
19 |
+
- cffi=1.14.4=py39h2125817_0
|
20 |
+
- cfitsio=3.470=hbd21bf8_7
|
21 |
+
- chardet=3.0.4=py39hecd8cb5_1003
|
22 |
+
- charls=2.2.0=h23ab428_0
|
23 |
+
- cloudpickle=2.0.0=pyhd3eb1b0_0
|
24 |
+
- conda=22.11.1=py39hecd8cb5_4
|
25 |
+
- conda-package-handling=1.7.2=py39h9ed2024_1
|
26 |
+
- cryptography=38.0.4=py39hf6deb26_0
|
27 |
+
- dask-core=2022.7.0=py39hecd8cb5_0
|
28 |
+
- expat=2.5.0=hf0c8a7f_0
|
29 |
+
- ffmpeg=4.2.2=h97e5cf8_0
|
30 |
+
- fftw=3.3.9=h9ed2024_1
|
31 |
+
- freetype=2.12.1=hd8bbffd_0
|
32 |
+
- fsspec=2022.11.0=py39hecd8cb5_0
|
33 |
+
- gettext=0.21.0=h7535e17_0
|
34 |
+
- giflib=5.2.1=haf1e3a3_0
|
35 |
+
- gmp=6.2.1=he9d5cce_3
|
36 |
+
- gmpy2=2.1.2=py39hd5de756_0
|
37 |
+
- gnutls=3.6.15=hed9c0bf_0
|
38 |
+
- icu=58.2=h0a44026_3
|
39 |
+
- idna=2.10=py_0
|
40 |
+
- imagecodecs=2021.8.26=py39h0f85e6e_1
|
41 |
+
- intel-openmp=2021.4.0=hecd8cb5_3538
|
42 |
+
- jpeg=9e=hca72f7f_0
|
43 |
+
- jxrlib=1.1=haf1e3a3_2
|
44 |
+
- krb5=1.19.2=hcd88c3b_0
|
45 |
+
- lame=3.100=h1de35cc_0
|
46 |
+
- lcms2=2.12=hf1fd2bf_0
|
47 |
+
- lerc=3.0=he9d5cce_0
|
48 |
+
- libaec=1.0.4=hb1e8313_1
|
49 |
+
- libbrotlicommon=1.0.9=hca72f7f_7
|
50 |
+
- libbrotlidec=1.0.9=hca72f7f_7
|
51 |
+
- libbrotlienc=1.0.9=hca72f7f_7
|
52 |
+
- libcurl=7.86.0=ha585b31_0
|
53 |
+
- libcxx=14.0.6=h9765a3e_0
|
54 |
+
- libdeflate=1.8=h9ed2024_5
|
55 |
+
- libedit=3.1.20221030=h6c40b1e_0
|
56 |
+
- libev=4.33=h9ed2024_1
|
57 |
+
- libffi=3.3=hb1e8313_2
|
58 |
+
- libgfortran=5.0.0=11_3_0_hecd8cb5_28
|
59 |
+
- libgfortran5=11.3.0=h9dfd629_28
|
60 |
+
- libiconv=1.16=hca72f7f_2
|
61 |
+
- libidn2=2.3.2=h9ed2024_0
|
62 |
+
- libnghttp2=1.46.0=ha29bfda_0
|
63 |
+
- libopus=1.3.1=h1de35cc_0
|
64 |
+
- libpng=1.6.37=ha441bb4_0
|
65 |
+
- libssh2=1.10.0=h0a4fc7d_0
|
66 |
+
- libtasn1=4.16.0=h9ed2024_0
|
67 |
+
- libtiff=4.4.0=h2ef1027_0
|
68 |
+
- libunistring=0.9.10=h9ed2024_0
|
69 |
+
- libvpx=1.7.0=h378b8a2_0
|
70 |
+
- libwebp=1.2.4=h56c3ce4_0
|
71 |
+
- libwebp-base=1.2.4=hca72f7f_0
|
72 |
+
- libxml2=2.9.14=hbf8cd5e_0
|
73 |
+
- libzopfli=1.0.3=hb1e8313_0
|
74 |
+
- llvm-openmp=14.0.6=h0dcd299_0
|
75 |
+
- locket=1.0.0=py39hecd8cb5_0
|
76 |
+
- lz4-c=1.9.3=h23ab428_1
|
77 |
+
- mkl=2021.4.0=hecd8cb5_637
|
78 |
+
- mkl-service=2.4.0=py39h9ed2024_0
|
79 |
+
- mkl_fft=1.3.1=py39h4ab4a9b_0
|
80 |
+
- mkl_random=1.2.2=py39hb2f4e1b_0
|
81 |
+
- mpc=1.1.0=h6ef4df4_1
|
82 |
+
- mpfr=4.0.2=h9066e36_1
|
83 |
+
- mpmath=1.2.1=py39hecd8cb5_0
|
84 |
+
- ncurses=6.3=hca72f7f_3
|
85 |
+
- nettle=3.7.3=h230ac6f_1
|
86 |
+
- openh264=2.1.1=h8346a28_0
|
87 |
+
- openjpeg=2.4.0=h66ea3da_0
|
88 |
+
- openssl=1.1.1s=hca72f7f_0
|
89 |
+
- partd=1.2.0=pyhd3eb1b0_1
|
90 |
+
- pathlib2=2.3.7.post1=py39h6e9494a_2
|
91 |
+
- pluggy=1.0.0=py39hecd8cb5_1
|
92 |
+
- progress=1.5=py39hecd8cb5_0
|
93 |
+
- pthread-stubs=0.4=hc929b4f_1001
|
94 |
+
- pycosat=0.6.3=py39h9ed2024_0
|
95 |
+
- pycparser=2.20=py_2
|
96 |
+
- pyopenssl=20.0.0=pyhd3eb1b0_1
|
97 |
+
- pypubsub=4.0.3=py_0
|
98 |
+
- pysocks=1.7.1=py39hecd8cb5_0
|
99 |
+
- python=3.9.1=h88f2d9e_2
|
100 |
+
- python.app=3=py39h9ed2024_0
|
101 |
+
- python_abi=3.9=2_cp39
|
102 |
+
- pywavelets=1.4.1=py39h6c40b1e_0
|
103 |
+
- pyyaml=6.0=py39h6c40b1e_1
|
104 |
+
- readline=8.0=h1de35cc_0
|
105 |
+
- requests=2.28.1=py39hecd8cb5_0
|
106 |
+
- ruamel.yaml=0.16.12=py39h9ed2024_1
|
107 |
+
- ruamel.yaml.clib=0.2.6=py39hca72f7f_0
|
108 |
+
- ruamel_yaml=0.15.80=py39h9ed2024_0
|
109 |
+
- scikit-image=0.19.3=py39hcec6c5f_1
|
110 |
+
- six=1.15.0=py39hecd8cb5_0
|
111 |
+
- snappy=1.1.9=he9d5cce_0
|
112 |
+
- sqlite=3.33.0=hffcf06c_0
|
113 |
+
- sympy=1.11.1=py39hecd8cb5_0
|
114 |
+
- tifffile=2021.7.2=pyhd3eb1b0_2
|
115 |
+
- tk=8.6.10=hb0a8c7a_0
|
116 |
+
- torchaudio=0.14.0.dev20221214=py39_cpu
|
117 |
+
- torchvision=0.15.0.dev20221214=py39_cpu
|
118 |
+
- typing_extensions=4.3.0=py39hecd8cb5_0
|
119 |
+
- tzdata=2020d=h14c3975_0
|
120 |
+
- wheel=0.36.1=pyhd3eb1b0_0
|
121 |
+
- x264=1!157.20191217=h1de35cc_0
|
122 |
+
- xz=5.2.8=h6c40b1e_0
|
123 |
+
- yaml=0.2.5=haf1e3a3_0
|
124 |
+
- zfp=0.5.5=he9d5cce_6
|
125 |
+
- zlib=1.2.13=h4dc903c_0
|
126 |
+
- zstd=1.5.2=hcb37349_0
|
127 |
+
- pip:
|
128 |
+
- absl-py==1.4.0
|
129 |
+
- addict==2.4.0
|
130 |
+
- aiohttp==3.8.3
|
131 |
+
- aioice==0.7.6
|
132 |
+
- aiortc==1.3.2
|
133 |
+
- aiosignal==1.3.1
|
134 |
+
- antlr4-python3-runtime==4.9.3
|
135 |
+
- anyio==3.6.2
|
136 |
+
- appdirs==1.4.4
|
137 |
+
- appnope==0.1.3
|
138 |
+
- argon2-cffi==21.3.0
|
139 |
+
- argon2-cffi-bindings==21.2.0
|
140 |
+
- arrow==1.2.3
|
141 |
+
- asttokens==2.2.1
|
142 |
+
- astunparse==1.6.3
|
143 |
+
- async-generator==1.10
|
144 |
+
- async-timeout==4.0.2
|
145 |
+
- attrs==22.1.0
|
146 |
+
- audioread==3.0.0
|
147 |
+
- autobahn==22.7.1
|
148 |
+
- av==9.2.0
|
149 |
+
- babel==2.11.0
|
150 |
+
- backcall==0.2.0
|
151 |
+
- beautifulsoup4==4.10.0
|
152 |
+
- beir==2.0.0
|
153 |
+
- bidict==0.22.1
|
154 |
+
- bitstring==4.0.1
|
155 |
+
- bleach==5.0.1
|
156 |
+
- blis==0.7.4
|
157 |
+
- brotli==1.0.9
|
158 |
+
- bs4==0.0.1
|
159 |
+
- cachetools==4.2.4
|
160 |
+
- catalogue==2.0.6
|
161 |
+
- charset-normalizer==2.1.1
|
162 |
+
- click==7.1.2
|
163 |
+
- clip==1.0
|
164 |
+
- coloredlogs==15.0.1
|
165 |
+
- colour-science==0.4.2
|
166 |
+
- comm==0.1.2
|
167 |
+
- configargparse==1.5.3
|
168 |
+
- contourpy==1.0.7
|
169 |
+
- courlan==0.9.3
|
170 |
+
- cycler==0.10.0
|
171 |
+
- cymem==2.0.5
|
172 |
+
- cython==0.29.35
|
173 |
+
- cytoolz==0.11.0
|
174 |
+
- dash==2.7.1
|
175 |
+
- dash-core-components==2.0.0
|
176 |
+
- dash-html-components==2.0.0
|
177 |
+
- dash-table==5.0.0
|
178 |
+
- datasets==2.13.1
|
179 |
+
- dateparser==1.1.8
|
180 |
+
- debugpy==1.6.4
|
181 |
+
- decorator==4.4.2
|
182 |
+
- deep-learning==0.0.2
|
183 |
+
- defusedxml==0.7.1
|
184 |
+
- demucs==4.0.0
|
185 |
+
- descartes==1.1.0
|
186 |
+
- diffq==0.2.4
|
187 |
+
- dill==0.3.6
|
188 |
+
- dnspython==2.2.1
|
189 |
+
- docker-pycreds==0.4.0
|
190 |
+
- docstring-parser==0.14.1
|
191 |
+
- dora-search==0.1.12
|
192 |
+
- einops==0.6.1
|
193 |
+
- elasticsearch==7.9.1
|
194 |
+
- entrypoints==0.4
|
195 |
+
- exceptiongroup==1.0.0
|
196 |
+
- executing==1.2.0
|
197 |
+
- faiss-cpu==1.7.4
|
198 |
+
- fastjsonschema==2.16.2
|
199 |
+
- ffmpeg-python==0.2.0
|
200 |
+
- filelock==3.8.2
|
201 |
+
- fire==0.5.0
|
202 |
+
- flask==2.2.2
|
203 |
+
- flatbuffers==23.1.21
|
204 |
+
- fonttools==4.38.0
|
205 |
+
- fqdn==1.5.1
|
206 |
+
- frozendict==2.3.4
|
207 |
+
- frozenlist==1.3.3
|
208 |
+
- ftfy==6.1.1
|
209 |
+
- functorch==0.2.1
|
210 |
+
- future==0.18.3
|
211 |
+
- gast==0.4.0
|
212 |
+
- gdown==4.6.0
|
213 |
+
- gitdb==4.0.10
|
214 |
+
- gitpython==3.1.30
|
215 |
+
- google-auth==2.16.0
|
216 |
+
- google-auth-oauthlib==0.4.6
|
217 |
+
- google-crc32c==1.5.0
|
218 |
+
- google-pasta==0.2.0
|
219 |
+
- grpcio==1.51.1
|
220 |
+
- h11==0.12.0
|
221 |
+
- h2==4.1.0
|
222 |
+
- h5py==3.7.0
|
223 |
+
- hpack==4.0.0
|
224 |
+
- htmldate==1.4.3
|
225 |
+
- httpcore==0.13.7
|
226 |
+
- httpx==0.19.0
|
227 |
+
- huggingface-hub==0.11.1
|
228 |
+
- humanfriendly==10.0
|
229 |
+
- humanize==4.4.0
|
230 |
+
- hyperframe==6.0.1
|
231 |
+
- hyperlink==21.0.0
|
232 |
+
- igraph==0.10.5
|
233 |
+
- imageio==2.24.0
|
234 |
+
- importlib-metadata==5.1.0
|
235 |
+
- importlib-resources==6.0.0
|
236 |
+
- install==1.3.5
|
237 |
+
- instructorembedding==1.0.1
|
238 |
+
- ipykernel==6.19.2
|
239 |
+
- ipython==8.7.0
|
240 |
+
- ipython-genutils==0.2.0
|
241 |
+
- ipywidgets==8.0.3
|
242 |
+
- isoduration==20.11.0
|
243 |
+
- itsdangerous==2.1.2
|
244 |
+
- jedi==0.18.2
|
245 |
+
- jellyfish==0.8.8
|
246 |
+
- jinja2==3.1.2
|
247 |
+
- joblib==1.3.2
|
248 |
+
- json5==0.9.11
|
249 |
+
- jsonlines==3.1.0
|
250 |
+
- jsonpointer==2.3
|
251 |
+
- jsonschema==4.17.3
|
252 |
+
- julius==0.2.7
|
253 |
+
- jupyter==1.0.0
|
254 |
+
- jupyter-client==7.4.8
|
255 |
+
- jupyter-console==6.4.4
|
256 |
+
- jupyter-core==5.1.0
|
257 |
+
- jupyter-events==0.5.0
|
258 |
+
- jupyter-server==2.0.1
|
259 |
+
- jupyter-server-proxy==3.2.2
|
260 |
+
- jupyter-server-terminals==0.4.2
|
261 |
+
- jupyterlab==3.5.2
|
262 |
+
- jupyterlab-pygments==0.2.2
|
263 |
+
- jupyterlab-server==2.19.0
|
264 |
+
- jupyterlab-widgets==3.0.4
|
265 |
+
- justext==3.0.0
|
266 |
+
- keras==2.11.0
|
267 |
+
- kiwisolver==1.3.1
|
268 |
+
- lameenc==1.4.2
|
269 |
+
- langcodes==3.3.0
|
270 |
+
- libclang==15.0.6.1
|
271 |
+
- librosa==0.8.1
|
272 |
+
- llvmlite==0.38.1
|
273 |
+
- lpips==0.1.4
|
274 |
+
- lxml==4.9.3
|
275 |
+
- markdown==3.4.1
|
276 |
+
- markdown-it-py==2.1.0
|
277 |
+
- markupsafe==2.1.2
|
278 |
+
- matplotlib==3.7.2
|
279 |
+
- matplotlib-inline==0.1.6
|
280 |
+
- mdurl==0.1.2
|
281 |
+
- mediapy==1.1.4
|
282 |
+
- mistune==2.0.4
|
283 |
+
- msgpack==1.0.4
|
284 |
+
- msgpack-numpy==0.4.8
|
285 |
+
- mteb==0.0.2
|
286 |
+
- multidict==6.0.3
|
287 |
+
- multiprocess==0.70.14
|
288 |
+
- murmurhash==1.0.5
|
289 |
+
- mutagen==1.46.0
|
290 |
+
- mypy==0.991
|
291 |
+
- mypy-extensions==0.4.3
|
292 |
+
- nbclassic==0.4.8
|
293 |
+
- nbclient==0.7.2
|
294 |
+
- nbconvert==7.2.6
|
295 |
+
- nbformat==5.5.0
|
296 |
+
- nerfacc==0.3.3
|
297 |
+
- nerfstudio==0.1.15
|
298 |
+
- nest-asyncio==1.5.6
|
299 |
+
- netifaces==0.11.0
|
300 |
+
- networkx==2.5.1
|
301 |
+
- ninja==1.11.1
|
302 |
+
- nltk==3.6.5
|
303 |
+
- norbert==0.2.1
|
304 |
+
- notebook==6.5.2
|
305 |
+
- notebook-shim==0.2.2
|
306 |
+
- numba==0.55.2
|
307 |
+
- numpy==1.22.4
|
308 |
+
- nuscenes-devkit==1.1.9
|
309 |
+
- oauthlib==3.2.2
|
310 |
+
- omegaconf==2.3.0
|
311 |
+
- onnxruntime==1.15.1
|
312 |
+
- open3d==0.16.1
|
313 |
+
- openai==0.27.0
|
314 |
+
- opencv-python==4.6.0.66
|
315 |
+
- openunmix==1.2.1
|
316 |
+
- opt-einsum==3.3.0
|
317 |
+
- outcome==1.2.0
|
318 |
+
- packaging==21.0
|
319 |
+
- pandas==1.3.4
|
320 |
+
- pandocfilters==1.5.0
|
321 |
+
- parso==0.8.3
|
322 |
+
- pathtools==0.1.2
|
323 |
+
- pathy==0.6.0
|
324 |
+
- pexpect==4.8.0
|
325 |
+
- pickleshare==0.7.5
|
326 |
+
- pillow==8.2.0
|
327 |
+
- pip==22.3.1
|
328 |
+
- platformdirs==2.6.0
|
329 |
+
- plotly==5.12.0
|
330 |
+
- pooch==1.7.0
|
331 |
+
- preshed==3.0.5
|
332 |
+
- prometheus-client==0.15.0
|
333 |
+
- prompt-toolkit==3.0.36
|
334 |
+
- protobuf==3.19.6
|
335 |
+
- psutil==5.9.4
|
336 |
+
- ptyprocess==0.7.0
|
337 |
+
- pure-eval==0.2.2
|
338 |
+
- pyarrow==12.0.1
|
339 |
+
- pyasn1==0.4.8
|
340 |
+
- pyasn1-modules==0.2.8
|
341 |
+
- pyaudio==0.2.13
|
342 |
+
- pybind11==2.10.3
|
343 |
+
- pycairo==1.24.0
|
344 |
+
- pycocotools==2.0.6
|
345 |
+
- pycryptodomex==3.18.0
|
346 |
+
- pydantic==1.8.2
|
347 |
+
- pyee==9.0.4
|
348 |
+
- pygments==2.13.0
|
349 |
+
- pylibsrtp==0.7.1
|
350 |
+
- pymeshlab==2022.2.post2
|
351 |
+
- pyngrok==5.2.1
|
352 |
+
- pyopengl==3.1.6
|
353 |
+
- pyopengl-accelerate==3.1.5
|
354 |
+
- pypandoc==1.10
|
355 |
+
- pyparsing==2.4.7
|
356 |
+
- pyphen==0.11.0
|
357 |
+
- pyquaternion==0.9.9
|
358 |
+
- pyrsistent==0.19.2
|
359 |
+
- pysimplegui==4.60.5
|
360 |
+
- python-dateutil==2.8.2
|
361 |
+
- python-engineio==4.3.4
|
362 |
+
- python-igraph==0.10.5
|
363 |
+
- python-json-logger==2.0.4
|
364 |
+
- python-socketio==5.7.2
|
365 |
+
- pytrec-eval==0.5
|
366 |
+
- pytube==15.0.0
|
367 |
+
- pytz==2021.3
|
368 |
+
- pyzmq==24.0.1
|
369 |
+
- qtconsole==5.4.0
|
370 |
+
- qtpy==2.3.0
|
371 |
+
- regex==2021.10.8
|
372 |
+
- requests-oauthlib==1.3.1
|
373 |
+
- resampy==0.4.2
|
374 |
+
- retrying==1.3.4
|
375 |
+
- rfc3339-validator==0.1.4
|
376 |
+
- rfc3986==1.5.0
|
377 |
+
- rfc3986-validator==0.1.1
|
378 |
+
- rich==13.2.0
|
379 |
+
- rsa==4.9
|
380 |
+
- scikit-learn==1.3.0
|
381 |
+
- scipy==1.7.1
|
382 |
+
- seaborn==0.11.2
|
383 |
+
- segtok==1.5.10
|
384 |
+
- selenium==4.5.0
|
385 |
+
- send2trash==1.8.0
|
386 |
+
- sentence-transformers==2.2.1
|
387 |
+
- sentencepiece==0.1.99
|
388 |
+
- sentry-sdk==1.13.0
|
389 |
+
- setproctitle==1.3.2
|
390 |
+
- setuptools==66.1.1
|
391 |
+
- shapely==2.0.0
|
392 |
+
- shtab==1.5.8
|
393 |
+
- simpervisor==0.4
|
394 |
+
- smart-open==5.2.1
|
395 |
+
- smmap==5.0.0
|
396 |
+
- sniffio==1.3.0
|
397 |
+
- sortedcontainers==2.4.0
|
398 |
+
- sounddevice==0.4.6
|
399 |
+
- soundfile==0.12.1
|
400 |
+
- soupsieve==2.3.1
|
401 |
+
- spacy==3.1.3
|
402 |
+
- spacy-legacy==3.0.8
|
403 |
+
- spleeter==2.3.2
|
404 |
+
- srsly==2.4.1
|
405 |
+
- stack-data==0.6.2
|
406 |
+
- stockfish==3.28.0
|
407 |
+
- submitit==1.4.5
|
408 |
+
- tabulate==0.8.9
|
409 |
+
- tenacity==8.1.0
|
410 |
+
- tensorboard==2.11.2
|
411 |
+
- tensorboard-data-server==0.6.1
|
412 |
+
- tensorboard-plugin-wit==1.8.1
|
413 |
+
- tensorflow==2.11.0
|
414 |
+
- tensorflow-estimator==2.11.0
|
415 |
+
- tensorflow-io-gcs-filesystem==0.30.0
|
416 |
+
- termcolor==2.1.1
|
417 |
+
- terminado==0.17.1
|
418 |
+
- textacy==0.11.0
|
419 |
+
- texttable==1.6.7
|
420 |
+
- thinc==8.0.10
|
421 |
+
- threadpoolctl==3.0.0
|
422 |
+
- timm==0.6.12
|
423 |
+
- tinycss2==1.2.1
|
424 |
+
- tld==0.13
|
425 |
+
- tokenizers==0.13.2
|
426 |
+
- tomli==2.0.1
|
427 |
+
- toolz==0.11.1
|
428 |
+
- torch==1.12.1
|
429 |
+
- torch-fidelity==0.3.0
|
430 |
+
- torchmetrics==0.11.0
|
431 |
+
- torchtyping==0.1.4
|
432 |
+
- tornado==6.2
|
433 |
+
- tqdm==4.65.0
|
434 |
+
- trafilatura==1.6.1
|
435 |
+
- traitlets==5.7.1
|
436 |
+
- transformers==4.26.0
|
437 |
+
- treetable==0.2.5
|
438 |
+
- trio==0.22.0
|
439 |
+
- trio-websocket==0.9.2
|
440 |
+
- txaio==22.2.1
|
441 |
+
- typeguard==2.13.3
|
442 |
+
- typer==0.3.2
|
443 |
+
- typing-extensions==3.10.0.2
|
444 |
+
- tyro==0.3.37
|
445 |
+
- tzlocal==5.0.1
|
446 |
+
- u-msgpack-python==2.7.2
|
447 |
+
- uri-template==1.2.0
|
448 |
+
- urllib3==1.26.12
|
449 |
+
- vpython==7.6.4
|
450 |
+
- wandb==0.13.9
|
451 |
+
- wasabi==0.8.2
|
452 |
+
- wcwidth==0.2.5
|
453 |
+
- webcolors==1.12
|
454 |
+
- webencodings==0.5.1
|
455 |
+
- websocket-client==1.4.2
|
456 |
+
- websockets==11.0.3
|
457 |
+
- werkzeug==2.2.2
|
458 |
+
- widgetsnbextension==4.0.4
|
459 |
+
- wrapt==1.14.1
|
460 |
+
- wsproto==1.2.0
|
461 |
+
- wxpython==4.2.0
|
462 |
+
- xatlas==0.0.7
|
463 |
+
- xxhash==3.2.0
|
464 |
+
- yake==0.4.8
|
465 |
+
- yarl==1.8.2
|
466 |
+
- yt-dlp==2023.7.6
|
467 |
+
- zipp==3.11.0
|
468 |
+
prefix: /opt/miniconda3
|
quantize_bitsandbytes.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from transformers import AutoModel
|
3 |
+
from accelerate import Accelerator, init_empty_weights
|
4 |
+
from accelerate.utils import BnbQuantizationConfig, load_and_quantize_model
|
5 |
+
|
6 |
+
# Make sure transformers works offline
|
7 |
+
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
8 |
+
|
9 |
+
# 1. Initialize the empty model
|
10 |
+
model_fp32 = AutoModel.from_pretrained("./models/all-MiniLM-L6-v2")
|
11 |
+
with init_empty_weights():
|
12 |
+
empty_model = model_fp32
|
13 |
+
|
14 |
+
# 2. Get the path to the weights of your model. For now, we'll assume it's in the same folder.
|
15 |
+
weights_location = "./models/all-MiniLM-L6-v2-unquantized/pytorch_model.bin"
|
16 |
+
|
17 |
+
# 3. Set quantization configuration (8-bit for this example)
|
18 |
+
bnb_quantization_config = BnbQuantizationConfig(load_in_8bit=True, llm_int8_threshold=6)
|
19 |
+
|
20 |
+
# 4. Quantize the empty model
|
21 |
+
quantized_model = load_and_quantize_model(empty_model, weights_location=weights_location,
|
22 |
+
bnb_quantization_config=bnb_quantization_config, device_map="auto")
|
23 |
+
|
24 |
+
# 5. Save the quantized model
|
25 |
+
accelerator = Accelerator()
|
26 |
+
new_weights_location = "./models/all-MiniLM-L6-v2-unquantized-q8"
|
27 |
+
accelerator.save_model(quantized_model, new_weights_location)
|
quantize_fx.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch.ao.quantization import quantize_dynamic
|
3 |
+
from optimum.fx.optimization import Transformation
|
4 |
+
from transformers import AutoModel, AutoTokenizer
|
5 |
+
from transformers.utils.fx import symbolic_trace
|
6 |
+
|
7 |
+
# Define the Dynamic Quantization Transformation
|
8 |
+
class DynamicQuantization(Transformation):
|
9 |
+
def __init__(self, dtype=torch.qint8, qconfig_spec=None, mapping=None):
|
10 |
+
super().__init__()
|
11 |
+
self.dtype = dtype
|
12 |
+
self.qconfig_spec = qconfig_spec
|
13 |
+
self.mapping = mapping
|
14 |
+
|
15 |
+
def transform(self, graph_module):
|
16 |
+
# Use torch's quantize_dynamic function to quantize the module
|
17 |
+
quantized_module = quantize_dynamic(
|
18 |
+
graph_module, qconfig_spec=self.qconfig_spec, dtype=self.dtype, mapping=self.mapping, inplace=False
|
19 |
+
)
|
20 |
+
return quantized_module
|
21 |
+
|
22 |
+
# Load the model
|
23 |
+
model_path = "./models/all-MiniLM-L6-v2"
|
24 |
+
model = AutoModel.from_pretrained(model_path)
|
25 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
26 |
+
|
27 |
+
# Symbolically trace the model
|
28 |
+
# Note: For certain models, you might need to modify the input_names
|
29 |
+
input_names = ["input_ids", "attention_mask"]
|
30 |
+
traced_model = symbolic_trace(model, input_names=input_names)
|
31 |
+
|
32 |
+
# Apply dynamic quantization
|
33 |
+
transformation = DynamicQuantization(dtype=torch.qint8)
|
34 |
+
quantized_model = transformation(traced_model)
|
35 |
+
|
36 |
+
print(type(quantized_model.))
|
37 |
+
#
|
38 |
+
# # Save the quantized model
|
39 |
+
# quantized_model_path = "./models/all-MiniLM-L6-v2-unquantized-q8/"
|
40 |
+
# quantized_model.save(quantized_model_path)
|
41 |
+
# tokenizer.save_pretrained(quantized_model_path) # Save the tokenizer as well
|
quantize_torch.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoModel
|
3 |
+
import os
|
4 |
+
os.environ["TRANSFORMERS_OFFLINE"] = "1" # 1 for offline
|
5 |
+
|
6 |
+
model_fp32 = AutoModel.from_pretrained("./models/all-MiniLM-L6-v2")
|
7 |
+
|
8 |
+
model_int8 = torch.ao.quantization.quantize_dynamic(
|
9 |
+
model_fp32, # the original model
|
10 |
+
{torch.nn.Linear}, # a set of layers to dynamically quantize
|
11 |
+
dtype=torch.float16)
|
12 |
+
|
13 |
+
torch.save(model_int8.state_dict(), "./models/all-MiniLM-L6-v2-unquantized-q16/pytorch_model.bin")
|
quantized2tf.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
|
4 |
+
import onnx
|
5 |
+
from onnx_tf.backend import prepare
|
6 |
+
|
7 |
+
def onnx_to_torch_converter(dir_name):
|
8 |
+
if not os.path.exists(dir_name):
|
9 |
+
print(f"Directory {dir_name} does not exist!")
|
10 |
+
return
|
11 |
+
|
12 |
+
onnx_model_path = os.path.join(dir_name, "onnx", "model.onnx")
|
13 |
+
|
14 |
+
if not os.path.exists(onnx_model_path):
|
15 |
+
print(f"ONNX model at {onnx_model_path} does not exist!")
|
16 |
+
return
|
17 |
+
|
18 |
+
onnx_model = onnx.load(onnx_model_path)
|
19 |
+
tf_rep = prepare(onnx_model) # prepare tf representation
|
20 |
+
tf_model_save_path = os.path.join(dir_name, "tf_model")
|
21 |
+
|
22 |
+
tf_rep.export_graph(tf_model_save_path) # export the model
|
23 |
+
|
24 |
+
print(f"PyTorch model saved at {tf_model_save_path}")
|
25 |
+
|
26 |
+
|
27 |
+
if __name__ == "__main__":
|
28 |
+
if len(sys.argv) != 2:
|
29 |
+
print("Usage: python onnx2torch.py [directory_path]")
|
30 |
+
else:
|
31 |
+
dir_name = sys.argv[1]
|
32 |
+
onnx_to_torch_converter(dir_name)
|
quantized2torch.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
from onnx2torch import convert
|
4 |
+
import torch
|
5 |
+
|
6 |
+
def onnx_to_torch_converter(dir_name):
|
7 |
+
if not os.path.exists(dir_name):
|
8 |
+
print(f"Directory {dir_name} does not exist!")
|
9 |
+
return
|
10 |
+
|
11 |
+
onnx_model_path = os.path.join(dir_name, "onnx", "model.onnx")
|
12 |
+
|
13 |
+
if not os.path.exists(onnx_model_path):
|
14 |
+
print(f"ONNX model at {onnx_model_path} does not exist!")
|
15 |
+
return
|
16 |
+
|
17 |
+
torch_model = convert(onnx_model_path)
|
18 |
+
|
19 |
+
torch_model_save_path = os.path.join(dir_name, "pytorch_model.bin")
|
20 |
+
torch.save(torch_model.state_dict(), torch_model_save_path)
|
21 |
+
print(f"PyTorch model saved at {torch_model_save_path}")
|
22 |
+
|
23 |
+
|
24 |
+
if __name__ == "__main__":
|
25 |
+
if len(sys.argv) != 2:
|
26 |
+
print("Usage: python onnx2torch.py [directory_path]")
|
27 |
+
else:
|
28 |
+
dir_name = sys.argv[1]
|
29 |
+
onnx_to_torch_converter(dir_name)
|
setup.sh
CHANGED
@@ -5,19 +5,17 @@ conda create --name qMTEB python=3.9 -y
|
|
5 |
|
6 |
source activate qMTEB
|
7 |
|
8 |
-
|
9 |
conda install -c intel openmp
|
10 |
conda install nomkl
|
11 |
-
|
12 |
-
|
13 |
-
conda install -c conda-forge sentence-transformers
|
14 |
-
conda install -c huggingface transformers
|
15 |
-
|
16 |
pip install mteb
|
17 |
-
|
18 |
-
|
19 |
|
20 |
source link.sh
|
21 |
|
|
|
|
|
22 |
echo "Setup completed!"
|
23 |
|
|
|
5 |
|
6 |
source activate qMTEB
|
7 |
|
|
|
8 |
conda install -c intel openmp
|
9 |
conda install nomkl
|
10 |
+
pip install torch torchvision torchaudio
|
11 |
+
pip install -e /Users/varun/documents/python/embeddings/sentence-transformers
|
|
|
|
|
|
|
12 |
pip install mteb
|
13 |
+
pip install onnxruntime-silicon
|
14 |
+
python -m pip install "optimum[onnxruntime]@git+https://github.com/huggingface/optimum.git"
|
15 |
|
16 |
source link.sh
|
17 |
|
18 |
+
source activate qMTEB
|
19 |
+
|
20 |
echo "Setup completed!"
|
21 |
|