NobodySpecial commited on
Commit
b47deb3
1 Parent(s): eef34c4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +314 -0
README.md CHANGED
@@ -1,3 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  base_model:
3
  - TheDrummer/Lazarus-2407-100B
 
1
+ # Quantization Notes:
2
+
3
+ bpw: 5
4
+ hb: 6
5
+ calibration_length: 8192
6
+ measurement_length: 8192
7
+
8
+ ## Quantization Code:
9
+
10
+ Posting this here for convenience in case anyone is interested or finds it useful. I run this code using a conda 3.12 python env in WSL 2 Ubuntu. Steps to run include creating conda env and installing / upgrading exllamav2, logging into huggingface using the "huggingface-cli login" terminal command, configuring the config.yaml file, then running the python script.
11
+
12
+ ```config.yaml
13
+ base_model_name: "Endurance-100B-v1"
14
+ input_model: "~/models/TheDrummer_Endurance-100B-v1"
15
+ download_output_dir: "~/models"
16
+ output_base_path: "~/models/exl2-converted"
17
+ hf_username: "NobodySpecial"
18
+ default_hb: 6 # Default head bits value
19
+ exllama_path: "~/exllamav2"
20
+
21
+ quantizations:
22
+ - bpw: 5
23
+ calibration_length: 8192 # Optional: specify calibration length in tokens
24
+ measurement_length: 8192 # Optional: specify measurement length in tokens
25
+ - bpw: 6
26
+ hb: 8 # Optional
27
+ calibration_length: 8192 # Optional: specify calibration length in tokens
28
+ measurement_length: 8192 # Optional: specify measurement length in tokens
29
+ ```
30
+
31
+ ```script.py
32
+ import yaml
33
+ import os
34
+ import sys
35
+ import subprocess
36
+ import logging
37
+ import re
38
+ from tqdm import tqdm
39
+ from pathlib import Path
40
+ from huggingface_hub import HfApi, create_repo, login, hf_hub_download
41
+
42
+ # Set up logging
43
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
44
+
45
+ def run_command(command_list, timeout=300):
46
+ try:
47
+ process = subprocess.Popen(
48
+ command_list,
49
+ stdout=subprocess.PIPE,
50
+ stderr=subprocess.PIPE,
51
+ text=True,
52
+ bufsize=1,
53
+ universal_newlines=True
54
+ )
55
+
56
+ while True:
57
+ output = process.stdout.readline()
58
+ if output == '' and process.poll() is not None:
59
+ break
60
+ if output:
61
+ logging.info(output.strip())
62
+
63
+ rc = process.poll()
64
+ if rc != 0:
65
+ error_output = process.stderr.read()
66
+ logging.error(f"Error executing command: {' '.join(command_list)}")
67
+ logging.error(f"Error output: {error_output}")
68
+ return False
69
+
70
+ logging.info(f"Command executed successfully: {' '.join(command_list)}")
71
+ return True
72
+ except subprocess.TimeoutExpired:
73
+ logging.error(f"Command timed out: {' '.join(command_list)}")
74
+ process.kill()
75
+ return False
76
+ except Exception as e:
77
+ logging.error(f"Unexpected error executing command: {' '.join(command_list)}")
78
+ logging.error(f"Error: {str(e)}")
79
+ return False
80
+
81
+ def validate_config(config):
82
+ required_keys = [
83
+ 'exllama_path',
84
+ 'base_model_name',
85
+ 'input_model',
86
+ 'output_base_path',
87
+ 'hf_username',
88
+ 'quantizations'
89
+ ]
90
+ missing_keys = [key for key in required_keys if key not in config]
91
+ if missing_keys:
92
+ logging.error(f"Missing required configuration keys: {', '.join(missing_keys)}")
93
+ return False
94
+
95
+ # Validate exllama_path
96
+ if not os.path.isdir(os.path.expanduser(config['exllama_path'])):
97
+ logging.error(f"exllama_path does not exist or is not a directory: {config['exllama_path']}")
98
+ return False
99
+
100
+ # Validate output_base_path
101
+ output_base_path = os.path.expanduser(config['output_base_path'])
102
+ if not os.path.isdir(output_base_path):
103
+ try:
104
+ os.makedirs(output_base_path, exist_ok=True)
105
+ logging.info(f"Created output_base_path directory: {output_base_path}")
106
+ except OSError as e:
107
+ logging.error(f"Failed to create output_base_path directory: {str(e)}")
108
+ return False
109
+
110
+ return True
111
+
112
+ def authenticate_hf():
113
+ try:
114
+ # Read the token from the local cache file
115
+ token_path = os.path.expanduser("~/.cache/huggingface/token")
116
+ with open(token_path, "r") as token_file:
117
+ hf_token = token_file.read().strip()
118
+
119
+ # Use the token to login
120
+ login(token=hf_token)
121
+ logging.info("Authenticated with Hugging Face successfully.")
122
+ except Exception as e:
123
+ logging.error(f"Failed to authenticate with Hugging Face: {str(e)}")
124
+ return False
125
+ return True
126
+
127
+ def sanitize_model_and_branch_names(model, branch):
128
+ # Remove trailing slash if present
129
+ model = model.rstrip('/')
130
+
131
+ # Remove base URL if present
132
+ if model.startswith("https://huggingface.co/"):
133
+ model = model[len("https://huggingface.co/"):]
134
+
135
+ # Split model and branch if provided in model name
136
+ model_parts = model.split(":")
137
+ model = model_parts[0]
138
+ branch = model_parts[1] if len(model_parts) > 1 else branch
139
+
140
+ # Use 'main' as default branch if not specified
141
+ if branch is None:
142
+ branch = "main"
143
+
144
+ # Validate branch name
145
+ if not re.match(r"^[a-zA-Z0-9._-]+$", branch):
146
+ raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.")
147
+
148
+ return model, branch
149
+
150
+ def download_model(model_name, branch_name, output_dir):
151
+ # Sanitize model and branch names
152
+ model_name, branch_name = sanitize_model_and_branch_names(model_name, branch_name)
153
+
154
+ # Expand user directory if needed
155
+ output_dir = os.path.expanduser(output_dir)
156
+
157
+ # Initialize Hugging Face API
158
+ api = HfApi()
159
+
160
+ # Create output directory
161
+ output_folder = Path(output_dir) / f"{'_'.join(model_name.split('/')[-2:])}"
162
+ if branch_name != "main":
163
+ output_folder = output_folder.with_name(f"{output_folder.name}_{branch_name}")
164
+ output_folder.mkdir(parents=True, exist_ok=True)
165
+
166
+ # Get file list
167
+ try:
168
+ files = api.list_repo_files(model_name, revision=branch_name)
169
+ except Exception as e:
170
+ logging.error(f"Error accessing repository: {e}")
171
+ return None
172
+
173
+ # Download files
174
+ for file in tqdm(files, desc="Downloading files"):
175
+ try:
176
+ hf_hub_download(
177
+ repo_id=model_name,
178
+ filename=file,
179
+ revision=branch_name,
180
+ local_dir=output_folder,
181
+ local_dir_use_symlinks=False
182
+ )
183
+ except Exception as e:
184
+ logging.error(f"Error downloading {file}: {e}")
185
+
186
+ logging.info(f"Model downloaded to {output_folder}")
187
+ return output_folder
188
+
189
+ def resolve_input_model(config):
190
+ input_model = os.path.expanduser(config['input_model'])
191
+ if os.path.isdir(input_model):
192
+ # Input model is a local directory
193
+ logging.info(f"Using local model directory: {input_model}")
194
+ return input_model
195
+ else:
196
+ # Input model is a Hugging Face repository
197
+ logging.info(f"Input model is a Hugging Face model: {input_model}")
198
+ download_output_dir = os.path.expanduser(config.get('download_output_dir', './models'))
199
+ if not os.path.isdir(download_output_dir):
200
+ try:
201
+ os.makedirs(download_output_dir, exist_ok=True)
202
+ logging.info(f"Created download_output_dir directory: {download_output_dir}")
203
+ except OSError as e:
204
+ logging.error(f"Failed to create download_output_dir directory: {str(e)}")
205
+ sys.exit(1)
206
+ model_name, branch_name = sanitize_model_and_branch_names(input_model, branch=None)
207
+ output_folder = download_model(model_name, branch_name, download_output_dir)
208
+ if output_folder is None:
209
+ logging.error("Failed to download the model.")
210
+ sys.exit(1)
211
+ return str(output_folder)
212
+
213
+ def quantize_and_upload(config, input_model_path):
214
+ exllama_path = os.path.expanduser(config['exllama_path'])
215
+ base_model_name = config['base_model_name']
216
+ output_base_path = os.path.expanduser(config['output_base_path'])
217
+ hf_username = config['hf_username']
218
+ default_hb = config.get('default_hb', 8)
219
+
220
+ for quant_config in config['quantizations']:
221
+ if 'bpw' not in quant_config:
222
+ logging.warning("Skipping quantization config without 'bpw'.")
223
+ continue
224
+
225
+ bpw = quant_config['bpw']
226
+ hb = quant_config.get('hb', default_hb)
227
+ calibration_length = quant_config.get('calibration_length', 2048)
228
+ measurement_length = quant_config.get('measurement_length', calibration_length)
229
+
230
+ if not isinstance(calibration_length, int) or not isinstance(measurement_length, int):
231
+ logging.error(f"Invalid calibration_length or measurement_length values. Expected integers.")
232
+ continue
233
+
234
+ if calibration_length <= 0 or measurement_length <= 0:
235
+ logging.error(f"Invalid calibration_length or measurement_length values. Must be positive integers.")
236
+ continue
237
+
238
+ quant_name = f"{base_model_name}-exl2-{bpw}bpw"
239
+ work_dir = os.path.join(output_base_path, base_model_name, f"{quant_name}-work")
240
+ output_dir = os.path.join(output_base_path, base_model_name, quant_name)
241
+
242
+ try:
243
+ os.makedirs(work_dir, exist_ok=True)
244
+ os.makedirs(output_dir, exist_ok=True)
245
+ logging.info(f"Directories created or already exist: {work_dir}, {output_dir}")
246
+ except OSError as e:
247
+ logging.error(f"Failed to create directories for {quant_name}: {str(e)}")
248
+ continue
249
+
250
+ # Run quantization
251
+ command_list = [
252
+ "python", os.path.join(exllama_path, "convert.py"),
253
+ "-i", input_model_path,
254
+ "-o", work_dir,
255
+ "-cf", output_dir,
256
+ "-b", str(bpw),
257
+ "-hb", str(hb),
258
+ "-l", str(calibration_length),
259
+ "-ml", str(measurement_length)
260
+ ]
261
+ if not run_command(command_list):
262
+ logging.error(f"Quantization failed for {quant_name}. Skipping upload.")
263
+ continue
264
+
265
+ logging.info(f"Quantization completed for {quant_name}")
266
+
267
+ # Try to upload to Hugging Face
268
+ repo_name = f"{hf_username}/{quant_name}"
269
+ try:
270
+ create_repo(repo_name, repo_type="model", exist_ok=True)
271
+ logging.info(f"Repository '{repo_name}' is ready.")
272
+ api = HfApi()
273
+ api.upload_folder(
274
+ folder_path=output_dir,
275
+ repo_id=repo_name,
276
+ repo_type="model"
277
+ )
278
+ logging.info(f"Successfully uploaded {quant_name} to Hugging Face")
279
+ except Exception as e:
280
+ logging.error(f"Failed to upload {quant_name} to Hugging Face: {str(e)}")
281
+ logging.info(f"Quantized model is still available locally at {output_dir}")
282
+
283
+ logging.info(f"Completed processing for {quant_name}")
284
+
285
+ if __name__ == "__main__":
286
+ config_path = "config.yaml"
287
+ try:
288
+ with open(config_path, "r") as f:
289
+ config = yaml.safe_load(f)
290
+ logging.info(f"Configuration loaded from {config_path}")
291
+ except yaml.YAMLError as e:
292
+ logging.error(f"Error parsing {config_path}: {str(e)}")
293
+ sys.exit(1)
294
+ except FileNotFoundError:
295
+ logging.error(f"{config_path} not found. Please create a config file.")
296
+ sys.exit(1)
297
+
298
+ if not validate_config(config):
299
+ logging.error("Configuration validation failed. Exiting.")
300
+ sys.exit(1)
301
+
302
+ if not authenticate_hf():
303
+ logging.error("Hugging Face authentication failed. Exiting.")
304
+ sys.exit(1)
305
+
306
+ input_model_path = resolve_input_model(config)
307
+ if not input_model_path:
308
+ logging.error("Failed to resolve input model path. Exiting.")
309
+ sys.exit(1)
310
+
311
+ quantize_and_upload(config, input_model_path)
312
+ logging.info("Script execution completed.")
313
+ ```
314
+
315
  ---
316
  base_model:
317
  - TheDrummer/Lazarus-2407-100B