NobodySpecial
commited on
Commit
•
b47deb3
1
Parent(s):
eef34c4
Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
base_model:
|
3 |
- TheDrummer/Lazarus-2407-100B
|
|
|
1 |
+
# Quantization Notes:
|
2 |
+
|
3 |
+
bpw: 5
|
4 |
+
hb: 6
|
5 |
+
calibration_length: 8192
|
6 |
+
measurement_length: 8192
|
7 |
+
|
8 |
+
## Quantization Code:
|
9 |
+
|
10 |
+
Posting this here for convenience in case anyone is interested or finds it useful. I run this code using a conda 3.12 python env in WSL 2 Ubuntu. Steps to run include creating conda env and installing / upgrading exllamav2, logging into huggingface using the "huggingface-cli login" terminal command, configuring the config.yaml file, then running the python script.
|
11 |
+
|
12 |
+
```config.yaml
|
13 |
+
base_model_name: "Endurance-100B-v1"
|
14 |
+
input_model: "~/models/TheDrummer_Endurance-100B-v1"
|
15 |
+
download_output_dir: "~/models"
|
16 |
+
output_base_path: "~/models/exl2-converted"
|
17 |
+
hf_username: "NobodySpecial"
|
18 |
+
default_hb: 6 # Default head bits value
|
19 |
+
exllama_path: "~/exllamav2"
|
20 |
+
|
21 |
+
quantizations:
|
22 |
+
- bpw: 5
|
23 |
+
calibration_length: 8192 # Optional: specify calibration length in tokens
|
24 |
+
measurement_length: 8192 # Optional: specify measurement length in tokens
|
25 |
+
- bpw: 6
|
26 |
+
hb: 8 # Optional
|
27 |
+
calibration_length: 8192 # Optional: specify calibration length in tokens
|
28 |
+
measurement_length: 8192 # Optional: specify measurement length in tokens
|
29 |
+
```
|
30 |
+
|
31 |
+
```script.py
|
32 |
+
import yaml
|
33 |
+
import os
|
34 |
+
import sys
|
35 |
+
import subprocess
|
36 |
+
import logging
|
37 |
+
import re
|
38 |
+
from tqdm import tqdm
|
39 |
+
from pathlib import Path
|
40 |
+
from huggingface_hub import HfApi, create_repo, login, hf_hub_download
|
41 |
+
|
42 |
+
# Set up logging
|
43 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
44 |
+
|
45 |
+
def run_command(command_list, timeout=300):
|
46 |
+
try:
|
47 |
+
process = subprocess.Popen(
|
48 |
+
command_list,
|
49 |
+
stdout=subprocess.PIPE,
|
50 |
+
stderr=subprocess.PIPE,
|
51 |
+
text=True,
|
52 |
+
bufsize=1,
|
53 |
+
universal_newlines=True
|
54 |
+
)
|
55 |
+
|
56 |
+
while True:
|
57 |
+
output = process.stdout.readline()
|
58 |
+
if output == '' and process.poll() is not None:
|
59 |
+
break
|
60 |
+
if output:
|
61 |
+
logging.info(output.strip())
|
62 |
+
|
63 |
+
rc = process.poll()
|
64 |
+
if rc != 0:
|
65 |
+
error_output = process.stderr.read()
|
66 |
+
logging.error(f"Error executing command: {' '.join(command_list)}")
|
67 |
+
logging.error(f"Error output: {error_output}")
|
68 |
+
return False
|
69 |
+
|
70 |
+
logging.info(f"Command executed successfully: {' '.join(command_list)}")
|
71 |
+
return True
|
72 |
+
except subprocess.TimeoutExpired:
|
73 |
+
logging.error(f"Command timed out: {' '.join(command_list)}")
|
74 |
+
process.kill()
|
75 |
+
return False
|
76 |
+
except Exception as e:
|
77 |
+
logging.error(f"Unexpected error executing command: {' '.join(command_list)}")
|
78 |
+
logging.error(f"Error: {str(e)}")
|
79 |
+
return False
|
80 |
+
|
81 |
+
def validate_config(config):
|
82 |
+
required_keys = [
|
83 |
+
'exllama_path',
|
84 |
+
'base_model_name',
|
85 |
+
'input_model',
|
86 |
+
'output_base_path',
|
87 |
+
'hf_username',
|
88 |
+
'quantizations'
|
89 |
+
]
|
90 |
+
missing_keys = [key for key in required_keys if key not in config]
|
91 |
+
if missing_keys:
|
92 |
+
logging.error(f"Missing required configuration keys: {', '.join(missing_keys)}")
|
93 |
+
return False
|
94 |
+
|
95 |
+
# Validate exllama_path
|
96 |
+
if not os.path.isdir(os.path.expanduser(config['exllama_path'])):
|
97 |
+
logging.error(f"exllama_path does not exist or is not a directory: {config['exllama_path']}")
|
98 |
+
return False
|
99 |
+
|
100 |
+
# Validate output_base_path
|
101 |
+
output_base_path = os.path.expanduser(config['output_base_path'])
|
102 |
+
if not os.path.isdir(output_base_path):
|
103 |
+
try:
|
104 |
+
os.makedirs(output_base_path, exist_ok=True)
|
105 |
+
logging.info(f"Created output_base_path directory: {output_base_path}")
|
106 |
+
except OSError as e:
|
107 |
+
logging.error(f"Failed to create output_base_path directory: {str(e)}")
|
108 |
+
return False
|
109 |
+
|
110 |
+
return True
|
111 |
+
|
112 |
+
def authenticate_hf():
|
113 |
+
try:
|
114 |
+
# Read the token from the local cache file
|
115 |
+
token_path = os.path.expanduser("~/.cache/huggingface/token")
|
116 |
+
with open(token_path, "r") as token_file:
|
117 |
+
hf_token = token_file.read().strip()
|
118 |
+
|
119 |
+
# Use the token to login
|
120 |
+
login(token=hf_token)
|
121 |
+
logging.info("Authenticated with Hugging Face successfully.")
|
122 |
+
except Exception as e:
|
123 |
+
logging.error(f"Failed to authenticate with Hugging Face: {str(e)}")
|
124 |
+
return False
|
125 |
+
return True
|
126 |
+
|
127 |
+
def sanitize_model_and_branch_names(model, branch):
|
128 |
+
# Remove trailing slash if present
|
129 |
+
model = model.rstrip('/')
|
130 |
+
|
131 |
+
# Remove base URL if present
|
132 |
+
if model.startswith("https://huggingface.co/"):
|
133 |
+
model = model[len("https://huggingface.co/"):]
|
134 |
+
|
135 |
+
# Split model and branch if provided in model name
|
136 |
+
model_parts = model.split(":")
|
137 |
+
model = model_parts[0]
|
138 |
+
branch = model_parts[1] if len(model_parts) > 1 else branch
|
139 |
+
|
140 |
+
# Use 'main' as default branch if not specified
|
141 |
+
if branch is None:
|
142 |
+
branch = "main"
|
143 |
+
|
144 |
+
# Validate branch name
|
145 |
+
if not re.match(r"^[a-zA-Z0-9._-]+$", branch):
|
146 |
+
raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.")
|
147 |
+
|
148 |
+
return model, branch
|
149 |
+
|
150 |
+
def download_model(model_name, branch_name, output_dir):
|
151 |
+
# Sanitize model and branch names
|
152 |
+
model_name, branch_name = sanitize_model_and_branch_names(model_name, branch_name)
|
153 |
+
|
154 |
+
# Expand user directory if needed
|
155 |
+
output_dir = os.path.expanduser(output_dir)
|
156 |
+
|
157 |
+
# Initialize Hugging Face API
|
158 |
+
api = HfApi()
|
159 |
+
|
160 |
+
# Create output directory
|
161 |
+
output_folder = Path(output_dir) / f"{'_'.join(model_name.split('/')[-2:])}"
|
162 |
+
if branch_name != "main":
|
163 |
+
output_folder = output_folder.with_name(f"{output_folder.name}_{branch_name}")
|
164 |
+
output_folder.mkdir(parents=True, exist_ok=True)
|
165 |
+
|
166 |
+
# Get file list
|
167 |
+
try:
|
168 |
+
files = api.list_repo_files(model_name, revision=branch_name)
|
169 |
+
except Exception as e:
|
170 |
+
logging.error(f"Error accessing repository: {e}")
|
171 |
+
return None
|
172 |
+
|
173 |
+
# Download files
|
174 |
+
for file in tqdm(files, desc="Downloading files"):
|
175 |
+
try:
|
176 |
+
hf_hub_download(
|
177 |
+
repo_id=model_name,
|
178 |
+
filename=file,
|
179 |
+
revision=branch_name,
|
180 |
+
local_dir=output_folder,
|
181 |
+
local_dir_use_symlinks=False
|
182 |
+
)
|
183 |
+
except Exception as e:
|
184 |
+
logging.error(f"Error downloading {file}: {e}")
|
185 |
+
|
186 |
+
logging.info(f"Model downloaded to {output_folder}")
|
187 |
+
return output_folder
|
188 |
+
|
189 |
+
def resolve_input_model(config):
|
190 |
+
input_model = os.path.expanduser(config['input_model'])
|
191 |
+
if os.path.isdir(input_model):
|
192 |
+
# Input model is a local directory
|
193 |
+
logging.info(f"Using local model directory: {input_model}")
|
194 |
+
return input_model
|
195 |
+
else:
|
196 |
+
# Input model is a Hugging Face repository
|
197 |
+
logging.info(f"Input model is a Hugging Face model: {input_model}")
|
198 |
+
download_output_dir = os.path.expanduser(config.get('download_output_dir', './models'))
|
199 |
+
if not os.path.isdir(download_output_dir):
|
200 |
+
try:
|
201 |
+
os.makedirs(download_output_dir, exist_ok=True)
|
202 |
+
logging.info(f"Created download_output_dir directory: {download_output_dir}")
|
203 |
+
except OSError as e:
|
204 |
+
logging.error(f"Failed to create download_output_dir directory: {str(e)}")
|
205 |
+
sys.exit(1)
|
206 |
+
model_name, branch_name = sanitize_model_and_branch_names(input_model, branch=None)
|
207 |
+
output_folder = download_model(model_name, branch_name, download_output_dir)
|
208 |
+
if output_folder is None:
|
209 |
+
logging.error("Failed to download the model.")
|
210 |
+
sys.exit(1)
|
211 |
+
return str(output_folder)
|
212 |
+
|
213 |
+
def quantize_and_upload(config, input_model_path):
|
214 |
+
exllama_path = os.path.expanduser(config['exllama_path'])
|
215 |
+
base_model_name = config['base_model_name']
|
216 |
+
output_base_path = os.path.expanduser(config['output_base_path'])
|
217 |
+
hf_username = config['hf_username']
|
218 |
+
default_hb = config.get('default_hb', 8)
|
219 |
+
|
220 |
+
for quant_config in config['quantizations']:
|
221 |
+
if 'bpw' not in quant_config:
|
222 |
+
logging.warning("Skipping quantization config without 'bpw'.")
|
223 |
+
continue
|
224 |
+
|
225 |
+
bpw = quant_config['bpw']
|
226 |
+
hb = quant_config.get('hb', default_hb)
|
227 |
+
calibration_length = quant_config.get('calibration_length', 2048)
|
228 |
+
measurement_length = quant_config.get('measurement_length', calibration_length)
|
229 |
+
|
230 |
+
if not isinstance(calibration_length, int) or not isinstance(measurement_length, int):
|
231 |
+
logging.error(f"Invalid calibration_length or measurement_length values. Expected integers.")
|
232 |
+
continue
|
233 |
+
|
234 |
+
if calibration_length <= 0 or measurement_length <= 0:
|
235 |
+
logging.error(f"Invalid calibration_length or measurement_length values. Must be positive integers.")
|
236 |
+
continue
|
237 |
+
|
238 |
+
quant_name = f"{base_model_name}-exl2-{bpw}bpw"
|
239 |
+
work_dir = os.path.join(output_base_path, base_model_name, f"{quant_name}-work")
|
240 |
+
output_dir = os.path.join(output_base_path, base_model_name, quant_name)
|
241 |
+
|
242 |
+
try:
|
243 |
+
os.makedirs(work_dir, exist_ok=True)
|
244 |
+
os.makedirs(output_dir, exist_ok=True)
|
245 |
+
logging.info(f"Directories created or already exist: {work_dir}, {output_dir}")
|
246 |
+
except OSError as e:
|
247 |
+
logging.error(f"Failed to create directories for {quant_name}: {str(e)}")
|
248 |
+
continue
|
249 |
+
|
250 |
+
# Run quantization
|
251 |
+
command_list = [
|
252 |
+
"python", os.path.join(exllama_path, "convert.py"),
|
253 |
+
"-i", input_model_path,
|
254 |
+
"-o", work_dir,
|
255 |
+
"-cf", output_dir,
|
256 |
+
"-b", str(bpw),
|
257 |
+
"-hb", str(hb),
|
258 |
+
"-l", str(calibration_length),
|
259 |
+
"-ml", str(measurement_length)
|
260 |
+
]
|
261 |
+
if not run_command(command_list):
|
262 |
+
logging.error(f"Quantization failed for {quant_name}. Skipping upload.")
|
263 |
+
continue
|
264 |
+
|
265 |
+
logging.info(f"Quantization completed for {quant_name}")
|
266 |
+
|
267 |
+
# Try to upload to Hugging Face
|
268 |
+
repo_name = f"{hf_username}/{quant_name}"
|
269 |
+
try:
|
270 |
+
create_repo(repo_name, repo_type="model", exist_ok=True)
|
271 |
+
logging.info(f"Repository '{repo_name}' is ready.")
|
272 |
+
api = HfApi()
|
273 |
+
api.upload_folder(
|
274 |
+
folder_path=output_dir,
|
275 |
+
repo_id=repo_name,
|
276 |
+
repo_type="model"
|
277 |
+
)
|
278 |
+
logging.info(f"Successfully uploaded {quant_name} to Hugging Face")
|
279 |
+
except Exception as e:
|
280 |
+
logging.error(f"Failed to upload {quant_name} to Hugging Face: {str(e)}")
|
281 |
+
logging.info(f"Quantized model is still available locally at {output_dir}")
|
282 |
+
|
283 |
+
logging.info(f"Completed processing for {quant_name}")
|
284 |
+
|
285 |
+
if __name__ == "__main__":
|
286 |
+
config_path = "config.yaml"
|
287 |
+
try:
|
288 |
+
with open(config_path, "r") as f:
|
289 |
+
config = yaml.safe_load(f)
|
290 |
+
logging.info(f"Configuration loaded from {config_path}")
|
291 |
+
except yaml.YAMLError as e:
|
292 |
+
logging.error(f"Error parsing {config_path}: {str(e)}")
|
293 |
+
sys.exit(1)
|
294 |
+
except FileNotFoundError:
|
295 |
+
logging.error(f"{config_path} not found. Please create a config file.")
|
296 |
+
sys.exit(1)
|
297 |
+
|
298 |
+
if not validate_config(config):
|
299 |
+
logging.error("Configuration validation failed. Exiting.")
|
300 |
+
sys.exit(1)
|
301 |
+
|
302 |
+
if not authenticate_hf():
|
303 |
+
logging.error("Hugging Face authentication failed. Exiting.")
|
304 |
+
sys.exit(1)
|
305 |
+
|
306 |
+
input_model_path = resolve_input_model(config)
|
307 |
+
if not input_model_path:
|
308 |
+
logging.error("Failed to resolve input model path. Exiting.")
|
309 |
+
sys.exit(1)
|
310 |
+
|
311 |
+
quantize_and_upload(config, input_model_path)
|
312 |
+
logging.info("Script execution completed.")
|
313 |
+
```
|
314 |
+
|
315 |
---
|
316 |
base_model:
|
317 |
- TheDrummer/Lazarus-2407-100B
|