# Quantization Notes: bpw: 5 hb: 6 calibration_length: 8192 measurement_length: 8192 ## Quantization Code: Posting this here for convenience in case anyone is interested or finds it useful. I run this code using a conda 3.12 python env in WSL 2 Ubuntu. Steps to run include creating conda env and installing / upgrading exllamav2, logging into huggingface using the "huggingface-cli login" terminal command, configuring the config.yaml file, then running the python script. ```config.yaml base_model_name: "Endurance-100B-v1" input_model: "~/models/TheDrummer_Endurance-100B-v1" download_output_dir: "~/models" output_base_path: "~/models/exl2-converted" hf_username: "NobodySpecial" default_hb: 6 # Default head bits value exllama_path: "~/exllamav2" quantizations: - bpw: 5 calibration_length: 8192 # Optional: specify calibration length in tokens measurement_length: 8192 # Optional: specify measurement length in tokens - bpw: 6 hb: 8 # Optional calibration_length: 8192 # Optional: specify calibration length in tokens measurement_length: 8192 # Optional: specify measurement length in tokens ``` ```script.py import yaml import os import sys import subprocess import logging import re from tqdm import tqdm from pathlib import Path from huggingface_hub import HfApi, create_repo, login, hf_hub_download # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def run_command(command_list, timeout=300): try: process = subprocess.Popen( command_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1, universal_newlines=True ) while True: output = process.stdout.readline() if output == '' and process.poll() is not None: break if output: logging.info(output.strip()) rc = process.poll() if rc != 0: error_output = process.stderr.read() logging.error(f"Error executing command: {' '.join(command_list)}") logging.error(f"Error output: {error_output}") return False logging.info(f"Command executed successfully: {' '.join(command_list)}") return True except subprocess.TimeoutExpired: logging.error(f"Command timed out: {' '.join(command_list)}") process.kill() return False except Exception as e: logging.error(f"Unexpected error executing command: {' '.join(command_list)}") logging.error(f"Error: {str(e)}") return False def validate_config(config): required_keys = [ 'exllama_path', 'base_model_name', 'input_model', 'output_base_path', 'hf_username', 'quantizations' ] missing_keys = [key for key in required_keys if key not in config] if missing_keys: logging.error(f"Missing required configuration keys: {', '.join(missing_keys)}") return False # Validate exllama_path if not os.path.isdir(os.path.expanduser(config['exllama_path'])): logging.error(f"exllama_path does not exist or is not a directory: {config['exllama_path']}") return False # Validate output_base_path output_base_path = os.path.expanduser(config['output_base_path']) if not os.path.isdir(output_base_path): try: os.makedirs(output_base_path, exist_ok=True) logging.info(f"Created output_base_path directory: {output_base_path}") except OSError as e: logging.error(f"Failed to create output_base_path directory: {str(e)}") return False return True def authenticate_hf(): try: # Read the token from the local cache file token_path = os.path.expanduser("~/.cache/huggingface/token") with open(token_path, "r") as token_file: hf_token = token_file.read().strip() # Use the token to login login(token=hf_token) logging.info("Authenticated with Hugging Face successfully.") except Exception as e: logging.error(f"Failed to authenticate with Hugging Face: {str(e)}") return False return True def sanitize_model_and_branch_names(model, branch): # Remove trailing slash if present model = model.rstrip('/') # Remove base URL if present if model.startswith("https://huggingface.co/"): model = model[len("https://huggingface.co/"):] # Split model and branch if provided in model name model_parts = model.split(":") model = model_parts[0] branch = model_parts[1] if len(model_parts) > 1 else branch # Use 'main' as default branch if not specified if branch is None: branch = "main" # Validate branch name if not re.match(r"^[a-zA-Z0-9._-]+$", branch): raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.") return model, branch def download_model(model_name, branch_name, output_dir): # Sanitize model and branch names model_name, branch_name = sanitize_model_and_branch_names(model_name, branch_name) # Expand user directory if needed output_dir = os.path.expanduser(output_dir) # Initialize Hugging Face API api = HfApi() # Create output directory output_folder = Path(output_dir) / f"{'_'.join(model_name.split('/')[-2:])}" if branch_name != "main": output_folder = output_folder.with_name(f"{output_folder.name}_{branch_name}") output_folder.mkdir(parents=True, exist_ok=True) # Get file list try: files = api.list_repo_files(model_name, revision=branch_name) except Exception as e: logging.error(f"Error accessing repository: {e}") return None # Download files for file in tqdm(files, desc="Downloading files"): try: hf_hub_download( repo_id=model_name, filename=file, revision=branch_name, local_dir=output_folder, local_dir_use_symlinks=False ) except Exception as e: logging.error(f"Error downloading {file}: {e}") logging.info(f"Model downloaded to {output_folder}") return output_folder def resolve_input_model(config): input_model = os.path.expanduser(config['input_model']) if os.path.isdir(input_model): # Input model is a local directory logging.info(f"Using local model directory: {input_model}") return input_model else: # Input model is a Hugging Face repository logging.info(f"Input model is a Hugging Face model: {input_model}") download_output_dir = os.path.expanduser(config.get('download_output_dir', './models')) if not os.path.isdir(download_output_dir): try: os.makedirs(download_output_dir, exist_ok=True) logging.info(f"Created download_output_dir directory: {download_output_dir}") except OSError as e: logging.error(f"Failed to create download_output_dir directory: {str(e)}") sys.exit(1) model_name, branch_name = sanitize_model_and_branch_names(input_model, branch=None) output_folder = download_model(model_name, branch_name, download_output_dir) if output_folder is None: logging.error("Failed to download the model.") sys.exit(1) return str(output_folder) def quantize_and_upload(config, input_model_path): exllama_path = os.path.expanduser(config['exllama_path']) base_model_name = config['base_model_name'] output_base_path = os.path.expanduser(config['output_base_path']) hf_username = config['hf_username'] default_hb = config.get('default_hb', 8) for quant_config in config['quantizations']: if 'bpw' not in quant_config: logging.warning("Skipping quantization config without 'bpw'.") continue bpw = quant_config['bpw'] hb = quant_config.get('hb', default_hb) calibration_length = quant_config.get('calibration_length', 2048) measurement_length = quant_config.get('measurement_length', calibration_length) if not isinstance(calibration_length, int) or not isinstance(measurement_length, int): logging.error(f"Invalid calibration_length or measurement_length values. Expected integers.") continue if calibration_length <= 0 or measurement_length <= 0: logging.error(f"Invalid calibration_length or measurement_length values. Must be positive integers.") continue quant_name = f"{base_model_name}-exl2-{bpw}bpw" work_dir = os.path.join(output_base_path, base_model_name, f"{quant_name}-work") output_dir = os.path.join(output_base_path, base_model_name, quant_name) try: os.makedirs(work_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True) logging.info(f"Directories created or already exist: {work_dir}, {output_dir}") except OSError as e: logging.error(f"Failed to create directories for {quant_name}: {str(e)}") continue # Run quantization command_list = [ "python", os.path.join(exllama_path, "convert.py"), "-i", input_model_path, "-o", work_dir, "-cf", output_dir, "-b", str(bpw), "-hb", str(hb), "-l", str(calibration_length), "-ml", str(measurement_length) ] if not run_command(command_list): logging.error(f"Quantization failed for {quant_name}. Skipping upload.") continue logging.info(f"Quantization completed for {quant_name}") # Try to upload to Hugging Face repo_name = f"{hf_username}/{quant_name}" try: create_repo(repo_name, repo_type="model", exist_ok=True) logging.info(f"Repository '{repo_name}' is ready.") api = HfApi() api.upload_folder( folder_path=output_dir, repo_id=repo_name, repo_type="model" ) logging.info(f"Successfully uploaded {quant_name} to Hugging Face") except Exception as e: logging.error(f"Failed to upload {quant_name} to Hugging Face: {str(e)}") logging.info(f"Quantized model is still available locally at {output_dir}") logging.info(f"Completed processing for {quant_name}") if __name__ == "__main__": config_path = "config.yaml" try: with open(config_path, "r") as f: config = yaml.safe_load(f) logging.info(f"Configuration loaded from {config_path}") except yaml.YAMLError as e: logging.error(f"Error parsing {config_path}: {str(e)}") sys.exit(1) except FileNotFoundError: logging.error(f"{config_path} not found. Please create a config file.") sys.exit(1) if not validate_config(config): logging.error("Configuration validation failed. Exiting.") sys.exit(1) if not authenticate_hf(): logging.error("Hugging Face authentication failed. Exiting.") sys.exit(1) input_model_path = resolve_input_model(config) if not input_model_path: logging.error("Failed to resolve input model path. Exiting.") sys.exit(1) quantize_and_upload(config, input_model_path) logging.info("Script execution completed.") ``` --- base_model: - TheDrummer/Lazarus-2407-100B --- # Join our Discord! https://discord.gg/Nbv9pQ88Xb ## 2500+ members strong 💪 Now with more channels! A hub for users and makers alike! --- # Endurance 100B v1 🎡 *A finetune of [Lazarus 2407 100B](https://huggingface.co/TheDrummer/Lazarus-2407-100B), a pruned Mistral Large 2407 123B!* ![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/R2dDPDShY2VEhRzbJr-Go.png) > [Do not go gentle into that good night. Rage, rage against the dying of the light!](https://www.youtube.com/watch?v=ESWzPhZWYeI) --- ## Links - Original: https://huggingface.co/TheDrummer/Endurance-100B-v1 - GGUF: https://huggingface.co/TheDrummer/Endurance-100B-v1-GGUF - iMatrix: https://huggingface.co/bartowski/Endurance-100B-v1-GGUF (recommended for smaller quants) ## Arsenal (Supported Chat Templates) - Metharme (Pygmalion in ST) - Creative, unhinged, unique ### Favorite RP Format `*action* Dialogue *thoughts* Dialogue *narration*` in 1st person PoV ### Favorite Card ![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/Ebi-X8DEw5LclXCFuL_H7.png) [Audrey by thecooler](https://characterhub.org/characters/thecooler/audrey-5f995ef8) --- ## Technical Details *Refer to [Lazarus 2407 100B](https://huggingface.co/TheDrummer/Lazarus-2407-100B) for pruning details.* Endurance used the same hyperparameters as Behemoth. Training loss indicates that they are exactly the same albeit with lower confidence. ![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/s0uELhSkSSwseyBrFzw7q.png) Notes on Lazarus 100B (base model for Endurance): - Testers have noted that 100B seemed nearly identical to 123B. - One tester said that only one minor mistake was made by the model, requiring a rewrite for failing to pick up on the nuance. - Another tester went through a satisfying 32K playthrough without any issues. Endurance 100B v1.0 has gone through additional RP & Instruct training.