|
# Quantization Notes: |
|
|
|
bpw: 5 |
|
hb: 6 |
|
calibration_length: 8192 |
|
measurement_length: 8192 |
|
|
|
## Quantization Code: |
|
|
|
Posting this here for convenience in case anyone is interested or finds it useful. I run this code using a conda 3.12 python env in WSL 2 Ubuntu. Steps to run include creating conda env and installing / upgrading exllamav2, logging into huggingface using the "huggingface-cli login" terminal command, configuring the config.yaml file, then running the python script. |
|
|
|
```config.yaml |
|
base_model_name: "Endurance-100B-v1" |
|
input_model: "~/models/TheDrummer_Endurance-100B-v1" |
|
download_output_dir: "~/models" |
|
output_base_path: "~/models/exl2-converted" |
|
hf_username: "NobodySpecial" |
|
default_hb: 6 # Default head bits value |
|
exllama_path: "~/exllamav2" |
|
|
|
quantizations: |
|
- bpw: 5 |
|
calibration_length: 8192 # Optional: specify calibration length in tokens |
|
measurement_length: 8192 # Optional: specify measurement length in tokens |
|
- bpw: 6 |
|
hb: 8 # Optional |
|
calibration_length: 8192 # Optional: specify calibration length in tokens |
|
measurement_length: 8192 # Optional: specify measurement length in tokens |
|
``` |
|
|
|
```script.py |
|
import yaml |
|
import os |
|
import sys |
|
import subprocess |
|
import logging |
|
import re |
|
from tqdm import tqdm |
|
from pathlib import Path |
|
from huggingface_hub import HfApi, create_repo, login, hf_hub_download |
|
|
|
# Set up logging |
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
def run_command(command_list, timeout=300): |
|
try: |
|
process = subprocess.Popen( |
|
command_list, |
|
stdout=subprocess.PIPE, |
|
stderr=subprocess.PIPE, |
|
text=True, |
|
bufsize=1, |
|
universal_newlines=True |
|
) |
|
|
|
while True: |
|
output = process.stdout.readline() |
|
if output == '' and process.poll() is not None: |
|
break |
|
if output: |
|
logging.info(output.strip()) |
|
|
|
rc = process.poll() |
|
if rc != 0: |
|
error_output = process.stderr.read() |
|
logging.error(f"Error executing command: {' '.join(command_list)}") |
|
logging.error(f"Error output: {error_output}") |
|
return False |
|
|
|
logging.info(f"Command executed successfully: {' '.join(command_list)}") |
|
return True |
|
except subprocess.TimeoutExpired: |
|
logging.error(f"Command timed out: {' '.join(command_list)}") |
|
process.kill() |
|
return False |
|
except Exception as e: |
|
logging.error(f"Unexpected error executing command: {' '.join(command_list)}") |
|
logging.error(f"Error: {str(e)}") |
|
return False |
|
|
|
def validate_config(config): |
|
required_keys = [ |
|
'exllama_path', |
|
'base_model_name', |
|
'input_model', |
|
'output_base_path', |
|
'hf_username', |
|
'quantizations' |
|
] |
|
missing_keys = [key for key in required_keys if key not in config] |
|
if missing_keys: |
|
logging.error(f"Missing required configuration keys: {', '.join(missing_keys)}") |
|
return False |
|
|
|
# Validate exllama_path |
|
if not os.path.isdir(os.path.expanduser(config['exllama_path'])): |
|
logging.error(f"exllama_path does not exist or is not a directory: {config['exllama_path']}") |
|
return False |
|
|
|
# Validate output_base_path |
|
output_base_path = os.path.expanduser(config['output_base_path']) |
|
if not os.path.isdir(output_base_path): |
|
try: |
|
os.makedirs(output_base_path, exist_ok=True) |
|
logging.info(f"Created output_base_path directory: {output_base_path}") |
|
except OSError as e: |
|
logging.error(f"Failed to create output_base_path directory: {str(e)}") |
|
return False |
|
|
|
return True |
|
|
|
def authenticate_hf(): |
|
try: |
|
# Read the token from the local cache file |
|
token_path = os.path.expanduser("~/.cache/huggingface/token") |
|
with open(token_path, "r") as token_file: |
|
hf_token = token_file.read().strip() |
|
|
|
# Use the token to login |
|
login(token=hf_token) |
|
logging.info("Authenticated with Hugging Face successfully.") |
|
except Exception as e: |
|
logging.error(f"Failed to authenticate with Hugging Face: {str(e)}") |
|
return False |
|
return True |
|
|
|
def sanitize_model_and_branch_names(model, branch): |
|
# Remove trailing slash if present |
|
model = model.rstrip('/') |
|
|
|
# Remove base URL if present |
|
if model.startswith("https://huggingface.co/"): |
|
model = model[len("https://huggingface.co/"):] |
|
|
|
# Split model and branch if provided in model name |
|
model_parts = model.split(":") |
|
model = model_parts[0] |
|
branch = model_parts[1] if len(model_parts) > 1 else branch |
|
|
|
# Use 'main' as default branch if not specified |
|
if branch is None: |
|
branch = "main" |
|
|
|
# Validate branch name |
|
if not re.match(r"^[a-zA-Z0-9._-]+$", branch): |
|
raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.") |
|
|
|
return model, branch |
|
|
|
def download_model(model_name, branch_name, output_dir): |
|
# Sanitize model and branch names |
|
model_name, branch_name = sanitize_model_and_branch_names(model_name, branch_name) |
|
|
|
# Expand user directory if needed |
|
output_dir = os.path.expanduser(output_dir) |
|
|
|
# Initialize Hugging Face API |
|
api = HfApi() |
|
|
|
# Create output directory |
|
output_folder = Path(output_dir) / f"{'_'.join(model_name.split('/')[-2:])}" |
|
if branch_name != "main": |
|
output_folder = output_folder.with_name(f"{output_folder.name}_{branch_name}") |
|
output_folder.mkdir(parents=True, exist_ok=True) |
|
|
|
# Get file list |
|
try: |
|
files = api.list_repo_files(model_name, revision=branch_name) |
|
except Exception as e: |
|
logging.error(f"Error accessing repository: {e}") |
|
return None |
|
|
|
# Download files |
|
for file in tqdm(files, desc="Downloading files"): |
|
try: |
|
hf_hub_download( |
|
repo_id=model_name, |
|
filename=file, |
|
revision=branch_name, |
|
local_dir=output_folder, |
|
local_dir_use_symlinks=False |
|
) |
|
except Exception as e: |
|
logging.error(f"Error downloading {file}: {e}") |
|
|
|
logging.info(f"Model downloaded to {output_folder}") |
|
return output_folder |
|
|
|
def resolve_input_model(config): |
|
input_model = os.path.expanduser(config['input_model']) |
|
if os.path.isdir(input_model): |
|
# Input model is a local directory |
|
logging.info(f"Using local model directory: {input_model}") |
|
return input_model |
|
else: |
|
# Input model is a Hugging Face repository |
|
logging.info(f"Input model is a Hugging Face model: {input_model}") |
|
download_output_dir = os.path.expanduser(config.get('download_output_dir', './models')) |
|
if not os.path.isdir(download_output_dir): |
|
try: |
|
os.makedirs(download_output_dir, exist_ok=True) |
|
logging.info(f"Created download_output_dir directory: {download_output_dir}") |
|
except OSError as e: |
|
logging.error(f"Failed to create download_output_dir directory: {str(e)}") |
|
sys.exit(1) |
|
model_name, branch_name = sanitize_model_and_branch_names(input_model, branch=None) |
|
output_folder = download_model(model_name, branch_name, download_output_dir) |
|
if output_folder is None: |
|
logging.error("Failed to download the model.") |
|
sys.exit(1) |
|
return str(output_folder) |
|
|
|
def quantize_and_upload(config, input_model_path): |
|
exllama_path = os.path.expanduser(config['exllama_path']) |
|
base_model_name = config['base_model_name'] |
|
output_base_path = os.path.expanduser(config['output_base_path']) |
|
hf_username = config['hf_username'] |
|
default_hb = config.get('default_hb', 8) |
|
|
|
for quant_config in config['quantizations']: |
|
if 'bpw' not in quant_config: |
|
logging.warning("Skipping quantization config without 'bpw'.") |
|
continue |
|
|
|
bpw = quant_config['bpw'] |
|
hb = quant_config.get('hb', default_hb) |
|
calibration_length = quant_config.get('calibration_length', 2048) |
|
measurement_length = quant_config.get('measurement_length', calibration_length) |
|
|
|
if not isinstance(calibration_length, int) or not isinstance(measurement_length, int): |
|
logging.error(f"Invalid calibration_length or measurement_length values. Expected integers.") |
|
continue |
|
|
|
if calibration_length <= 0 or measurement_length <= 0: |
|
logging.error(f"Invalid calibration_length or measurement_length values. Must be positive integers.") |
|
continue |
|
|
|
quant_name = f"{base_model_name}-exl2-{bpw}bpw" |
|
work_dir = os.path.join(output_base_path, base_model_name, f"{quant_name}-work") |
|
output_dir = os.path.join(output_base_path, base_model_name, quant_name) |
|
|
|
try: |
|
os.makedirs(work_dir, exist_ok=True) |
|
os.makedirs(output_dir, exist_ok=True) |
|
logging.info(f"Directories created or already exist: {work_dir}, {output_dir}") |
|
except OSError as e: |
|
logging.error(f"Failed to create directories for {quant_name}: {str(e)}") |
|
continue |
|
|
|
# Run quantization |
|
command_list = [ |
|
"python", os.path.join(exllama_path, "convert.py"), |
|
"-i", input_model_path, |
|
"-o", work_dir, |
|
"-cf", output_dir, |
|
"-b", str(bpw), |
|
"-hb", str(hb), |
|
"-l", str(calibration_length), |
|
"-ml", str(measurement_length) |
|
] |
|
if not run_command(command_list): |
|
logging.error(f"Quantization failed for {quant_name}. Skipping upload.") |
|
continue |
|
|
|
logging.info(f"Quantization completed for {quant_name}") |
|
|
|
# Try to upload to Hugging Face |
|
repo_name = f"{hf_username}/{quant_name}" |
|
try: |
|
create_repo(repo_name, repo_type="model", exist_ok=True) |
|
logging.info(f"Repository '{repo_name}' is ready.") |
|
api = HfApi() |
|
api.upload_folder( |
|
folder_path=output_dir, |
|
repo_id=repo_name, |
|
repo_type="model" |
|
) |
|
logging.info(f"Successfully uploaded {quant_name} to Hugging Face") |
|
except Exception as e: |
|
logging.error(f"Failed to upload {quant_name} to Hugging Face: {str(e)}") |
|
logging.info(f"Quantized model is still available locally at {output_dir}") |
|
|
|
logging.info(f"Completed processing for {quant_name}") |
|
|
|
if __name__ == "__main__": |
|
config_path = "config.yaml" |
|
try: |
|
with open(config_path, "r") as f: |
|
config = yaml.safe_load(f) |
|
logging.info(f"Configuration loaded from {config_path}") |
|
except yaml.YAMLError as e: |
|
logging.error(f"Error parsing {config_path}: {str(e)}") |
|
sys.exit(1) |
|
except FileNotFoundError: |
|
logging.error(f"{config_path} not found. Please create a config file.") |
|
sys.exit(1) |
|
|
|
if not validate_config(config): |
|
logging.error("Configuration validation failed. Exiting.") |
|
sys.exit(1) |
|
|
|
if not authenticate_hf(): |
|
logging.error("Hugging Face authentication failed. Exiting.") |
|
sys.exit(1) |
|
|
|
input_model_path = resolve_input_model(config) |
|
if not input_model_path: |
|
logging.error("Failed to resolve input model path. Exiting.") |
|
sys.exit(1) |
|
|
|
quantize_and_upload(config, input_model_path) |
|
logging.info("Script execution completed.") |
|
``` |
|
|
|
--- |
|
base_model: |
|
- TheDrummer/Lazarus-2407-100B |
|
--- |
|
# Join our Discord! https://discord.gg/Nbv9pQ88Xb |
|
## 2500+ members strong 💪 Now with more channels! A hub for users and makers alike! |
|
--- |
|
# Endurance 100B v1 🎡 |
|
|
|
*A finetune of [Lazarus 2407 100B](https://huggingface.co/TheDrummer/Lazarus-2407-100B), a pruned Mistral Large 2407 123B!* |
|
|
|
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/R2dDPDShY2VEhRzbJr-Go.png) |
|
|
|
> [Do not go gentle into that good night. Rage, rage against the dying of the light!](https://www.youtube.com/watch?v=ESWzPhZWYeI) |
|
|
|
--- |
|
|
|
## Links |
|
- Original: https://huggingface.co/TheDrummer/Endurance-100B-v1 |
|
- GGUF: https://huggingface.co/TheDrummer/Endurance-100B-v1-GGUF |
|
- iMatrix: https://huggingface.co/bartowski/Endurance-100B-v1-GGUF (recommended for smaller quants) |
|
|
|
## Arsenal (Supported Chat Templates) |
|
- Metharme (Pygmalion in ST) |
|
- Creative, unhinged, unique |
|
|
|
### Favorite RP Format |
|
`*action* Dialogue *thoughts* Dialogue *narration*` in 1st person PoV |
|
|
|
### Favorite Card |
|
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/Ebi-X8DEw5LclXCFuL_H7.png) |
|
[Audrey by thecooler](https://characterhub.org/characters/thecooler/audrey-5f995ef8) |
|
|
|
--- |
|
|
|
## Technical Details |
|
|
|
*Refer to [Lazarus 2407 100B](https://huggingface.co/TheDrummer/Lazarus-2407-100B) for pruning details.* |
|
|
|
Endurance used the same hyperparameters as Behemoth. Training loss indicates that they are exactly the same albeit with lower confidence. |
|
|
|
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/s0uELhSkSSwseyBrFzw7q.png) |
|
|
|
Notes on Lazarus 100B (base model for Endurance): |
|
- Testers have noted that 100B seemed nearly identical to 123B. |
|
- One tester said that only one minor mistake was made by the model, requiring a rewrite for failing to pick up on the nuance. |
|
- Another tester went through a satisfying 32K playthrough without any issues. |
|
|
|
Endurance 100B v1.0 has gone through additional RP & Instruct training. |