NobodySpecial
/

Endurance-100B-v1-exl2-5bpw

Safetensors

mistral

5-bit

exl2

Model card Files Files and versions Community

NobodySpecial commited on Dec 4, 2024

Commit

b47deb3

•

1 Parent(s): eef34c4

Update README.md

Browse files

Files changed (1) hide show

README.md +314 -0

README.md CHANGED Viewed

@@ -1,3 +1,317 @@
 ---
 base_model:
 - TheDrummer/Lazarus-2407-100B

+# Quantization Notes:
+bpw: 5
+hb: 6
+calibration_length: 8192
+measurement_length: 8192
+## Quantization Code:
+Posting this here for convenience in case anyone is interested or finds it useful. I run this code using a conda 3.12 python env in WSL 2 Ubuntu. Steps to run include creating conda env and installing / upgrading exllamav2, logging into huggingface using the "huggingface-cli login" terminal command, configuring the config.yaml file, then running the python script.
+```config.yaml
+base_model_name: "Endurance-100B-v1"
+input_model: "~/models/TheDrummer_Endurance-100B-v1"
+download_output_dir: "~/models"
+output_base_path: "~/models/exl2-converted"
+hf_username: "NobodySpecial"
+default_hb: 6  # Default head bits value
+exllama_path: "~/exllamav2"
+quantizations:
+  - bpw: 5
+    calibration_length: 8192  # Optional: specify calibration length in tokens
+    measurement_length: 8192  # Optional: specify measurement length in tokens
+  - bpw: 6
+    hb: 8 # Optional
+    calibration_length: 8192  # Optional: specify calibration length in tokens
+    measurement_length: 8192  # Optional: specify measurement length in tokens
+```
+```script.py
+import yaml
+import os
+import sys
+import subprocess
+import logging
+import re
+from tqdm import tqdm
+from pathlib import Path
+from huggingface_hub import HfApi, create_repo, login, hf_hub_download
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def run_command(command_list, timeout=300):
+    try:
+        process = subprocess.Popen(
+            command_list,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=1,
+            universal_newlines=True
+        )
+        while True:
+            output = process.stdout.readline()
+            if output == '' and process.poll() is not None:
+                break
+            if output:
+                logging.info(output.strip())
+        rc = process.poll()
+        if rc != 0:
+            error_output = process.stderr.read()
+            logging.error(f"Error executing command: {' '.join(command_list)}")
+            logging.error(f"Error output: {error_output}")
+            return False
+        logging.info(f"Command executed successfully: {' '.join(command_list)}")
+        return True
+    except subprocess.TimeoutExpired:
+        logging.error(f"Command timed out: {' '.join(command_list)}")
+        process.kill()
+        return False
+    except Exception as e:
+        logging.error(f"Unexpected error executing command: {' '.join(command_list)}")
+        logging.error(f"Error: {str(e)}")
+        return False
+def validate_config(config):
+    required_keys = [
+        'exllama_path',
+        'base_model_name',
+        'input_model',
+        'output_base_path',
+        'hf_username',
+        'quantizations'
+    ]
+    missing_keys = [key for key in required_keys if key not in config]
+    if missing_keys:
+        logging.error(f"Missing required configuration keys: {', '.join(missing_keys)}")
+        return False
+    # Validate exllama_path
+    if not os.path.isdir(os.path.expanduser(config['exllama_path'])):
+        logging.error(f"exllama_path does not exist or is not a directory: {config['exllama_path']}")
+        return False
+    # Validate output_base_path
+    output_base_path = os.path.expanduser(config['output_base_path'])
+    if not os.path.isdir(output_base_path):
+        try:
+            os.makedirs(output_base_path, exist_ok=True)
+            logging.info(f"Created output_base_path directory: {output_base_path}")
+        except OSError as e:
+            logging.error(f"Failed to create output_base_path directory: {str(e)}")
+            return False
+    return True
+def authenticate_hf():
+    try:
+        # Read the token from the local cache file
+        token_path = os.path.expanduser("~/.cache/huggingface/token")
+        with open(token_path, "r") as token_file:
+            hf_token = token_file.read().strip()
+        # Use the token to login
+        login(token=hf_token)
+        logging.info("Authenticated with Hugging Face successfully.")
+    except Exception as e:
+        logging.error(f"Failed to authenticate with Hugging Face: {str(e)}")
+        return False
+    return True
+def sanitize_model_and_branch_names(model, branch):
+    # Remove trailing slash if present
+    model = model.rstrip('/')
+    # Remove base URL if present
+    if model.startswith("https://huggingface.co/"):
+        model = model[len("https://huggingface.co/"):]
+    # Split model and branch if provided in model name
+    model_parts = model.split(":")
+    model = model_parts[0]
+    branch = model_parts[1] if len(model_parts) > 1 else branch
+    # Use 'main' as default branch if not specified
+    if branch is None:
+        branch = "main"
+    # Validate branch name
+    if not re.match(r"^[a-zA-Z0-9._-]+$", branch):
+        raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.")
+    return model, branch
+def download_model(model_name, branch_name, output_dir):
+    # Sanitize model and branch names
+    model_name, branch_name = sanitize_model_and_branch_names(model_name, branch_name)
+    # Expand user directory if needed
+    output_dir = os.path.expanduser(output_dir)
+    # Initialize Hugging Face API
+    api = HfApi()
+    # Create output directory
+    output_folder = Path(output_dir) / f"{'_'.join(model_name.split('/')[-2:])}"
+    if branch_name != "main":
+        output_folder = output_folder.with_name(f"{output_folder.name}_{branch_name}")
+    output_folder.mkdir(parents=True, exist_ok=True)
+    # Get file list
+    try:
+        files = api.list_repo_files(model_name, revision=branch_name)
+    except Exception as e:
+        logging.error(f"Error accessing repository: {e}")
+        return None
+    # Download files
+    for file in tqdm(files, desc="Downloading files"):
+        try:
+            hf_hub_download(
+                repo_id=model_name,
+                filename=file,
+                revision=branch_name,
+                local_dir=output_folder,
+                local_dir_use_symlinks=False
+            )
+        except Exception as e:
+            logging.error(f"Error downloading {file}: {e}")
+    logging.info(f"Model downloaded to {output_folder}")
+    return output_folder
+def resolve_input_model(config):
+    input_model = os.path.expanduser(config['input_model'])
+    if os.path.isdir(input_model):
+        # Input model is a local directory
+        logging.info(f"Using local model directory: {input_model}")
+        return input_model
+    else:
+        # Input model is a Hugging Face repository
+        logging.info(f"Input model is a Hugging Face model: {input_model}")
+        download_output_dir = os.path.expanduser(config.get('download_output_dir', './models'))
+        if not os.path.isdir(download_output_dir):
+            try:
+                os.makedirs(download_output_dir, exist_ok=True)
+                logging.info(f"Created download_output_dir directory: {download_output_dir}")
+            except OSError as e:
+                logging.error(f"Failed to create download_output_dir directory: {str(e)}")
+                sys.exit(1)
+        model_name, branch_name = sanitize_model_and_branch_names(input_model, branch=None)
+        output_folder = download_model(model_name, branch_name, download_output_dir)
+        if output_folder is None:
+            logging.error("Failed to download the model.")
+            sys.exit(1)
+        return str(output_folder)
+def quantize_and_upload(config, input_model_path):
+    exllama_path = os.path.expanduser(config['exllama_path'])
+    base_model_name = config['base_model_name']
+    output_base_path = os.path.expanduser(config['output_base_path'])
+    hf_username = config['hf_username']
+    default_hb = config.get('default_hb', 8)
+    for quant_config in config['quantizations']:
+        if 'bpw' not in quant_config:
+            logging.warning("Skipping quantization config without 'bpw'.")
+            continue
+        bpw = quant_config['bpw']
+        hb = quant_config.get('hb', default_hb)
+        calibration_length = quant_config.get('calibration_length', 2048)
+        measurement_length = quant_config.get('measurement_length', calibration_length)
+        if not isinstance(calibration_length, int) or not isinstance(measurement_length, int):
+            logging.error(f"Invalid calibration_length or measurement_length values. Expected integers.")
+            continue
+        if calibration_length <= 0 or measurement_length <= 0:
+            logging.error(f"Invalid calibration_length or measurement_length values. Must be positive integers.")
+            continue
+        quant_name = f"{base_model_name}-exl2-{bpw}bpw"
+        work_dir = os.path.join(output_base_path, base_model_name, f"{quant_name}-work")
+        output_dir = os.path.join(output_base_path, base_model_name, quant_name)
+        try:
+            os.makedirs(work_dir, exist_ok=True)
+            os.makedirs(output_dir, exist_ok=True)
+            logging.info(f"Directories created or already exist: {work_dir}, {output_dir}")
+        except OSError as e:
+            logging.error(f"Failed to create directories for {quant_name}: {str(e)}")
+            continue
+        # Run quantization
+        command_list = [
+            "python", os.path.join(exllama_path, "convert.py"),
+            "-i", input_model_path,
+            "-o", work_dir,
+            "-cf", output_dir,
+            "-b", str(bpw),
+            "-hb", str(hb),
+            "-l", str(calibration_length),
+            "-ml", str(measurement_length)
+        ]
+        if not run_command(command_list):
+            logging.error(f"Quantization failed for {quant_name}. Skipping upload.")
+            continue
+        logging.info(f"Quantization completed for {quant_name}")
+        # Try to upload to Hugging Face
+        repo_name = f"{hf_username}/{quant_name}"
+        try:
+            create_repo(repo_name, repo_type="model", exist_ok=True)
+            logging.info(f"Repository '{repo_name}' is ready.")
+            api = HfApi()
+            api.upload_folder(
+                folder_path=output_dir,
+                repo_id=repo_name,
+                repo_type="model"
+            )
+            logging.info(f"Successfully uploaded {quant_name} to Hugging Face")
+        except Exception as e:
+            logging.error(f"Failed to upload {quant_name} to Hugging Face: {str(e)}")
+            logging.info(f"Quantized model is still available locally at {output_dir}")
+        logging.info(f"Completed processing for {quant_name}")
+if __name__ == "__main__":
+    config_path = "config.yaml"
+    try:
+        with open(config_path, "r") as f:
+            config = yaml.safe_load(f)
+            logging.info(f"Configuration loaded from {config_path}")
+    except yaml.YAMLError as e:
+        logging.error(f"Error parsing {config_path}: {str(e)}")
+        sys.exit(1)
+    except FileNotFoundError:
+        logging.error(f"{config_path} not found. Please create a config file.")
+        sys.exit(1)
+    if not validate_config(config):
+        logging.error("Configuration validation failed. Exiting.")
+        sys.exit(1)
+    if not authenticate_hf():
+        logging.error("Hugging Face authentication failed. Exiting.")
+        sys.exit(1)
+    input_model_path = resolve_input_model(config)
+    if not input_model_path:
+        logging.error("Failed to resolve input model path. Exiting.")
+        sys.exit(1)
+    quantize_and_upload(config, input_model_path)
+    logging.info("Script execution completed.")
+```
 ---
 base_model:
 - TheDrummer/Lazarus-2407-100B