NobodySpecial's picture
Update README.md
b47deb3 verified
|
raw
history blame
13.6 kB
# Quantization Notes:
bpw: 5
hb: 6
calibration_length: 8192
measurement_length: 8192
## Quantization Code:
Posting this here for convenience in case anyone is interested or finds it useful. I run this code using a conda 3.12 python env in WSL 2 Ubuntu. Steps to run include creating conda env and installing / upgrading exllamav2, logging into huggingface using the "huggingface-cli login" terminal command, configuring the config.yaml file, then running the python script.
```config.yaml
base_model_name: "Endurance-100B-v1"
input_model: "~/models/TheDrummer_Endurance-100B-v1"
download_output_dir: "~/models"
output_base_path: "~/models/exl2-converted"
hf_username: "NobodySpecial"
default_hb: 6 # Default head bits value
exllama_path: "~/exllamav2"
quantizations:
- bpw: 5
calibration_length: 8192 # Optional: specify calibration length in tokens
measurement_length: 8192 # Optional: specify measurement length in tokens
- bpw: 6
hb: 8 # Optional
calibration_length: 8192 # Optional: specify calibration length in tokens
measurement_length: 8192 # Optional: specify measurement length in tokens
```
```script.py
import yaml
import os
import sys
import subprocess
import logging
import re
from tqdm import tqdm
from pathlib import Path
from huggingface_hub import HfApi, create_repo, login, hf_hub_download
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def run_command(command_list, timeout=300):
try:
process = subprocess.Popen(
command_list,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1,
universal_newlines=True
)
while True:
output = process.stdout.readline()
if output == '' and process.poll() is not None:
break
if output:
logging.info(output.strip())
rc = process.poll()
if rc != 0:
error_output = process.stderr.read()
logging.error(f"Error executing command: {' '.join(command_list)}")
logging.error(f"Error output: {error_output}")
return False
logging.info(f"Command executed successfully: {' '.join(command_list)}")
return True
except subprocess.TimeoutExpired:
logging.error(f"Command timed out: {' '.join(command_list)}")
process.kill()
return False
except Exception as e:
logging.error(f"Unexpected error executing command: {' '.join(command_list)}")
logging.error(f"Error: {str(e)}")
return False
def validate_config(config):
required_keys = [
'exllama_path',
'base_model_name',
'input_model',
'output_base_path',
'hf_username',
'quantizations'
]
missing_keys = [key for key in required_keys if key not in config]
if missing_keys:
logging.error(f"Missing required configuration keys: {', '.join(missing_keys)}")
return False
# Validate exllama_path
if not os.path.isdir(os.path.expanduser(config['exllama_path'])):
logging.error(f"exllama_path does not exist or is not a directory: {config['exllama_path']}")
return False
# Validate output_base_path
output_base_path = os.path.expanduser(config['output_base_path'])
if not os.path.isdir(output_base_path):
try:
os.makedirs(output_base_path, exist_ok=True)
logging.info(f"Created output_base_path directory: {output_base_path}")
except OSError as e:
logging.error(f"Failed to create output_base_path directory: {str(e)}")
return False
return True
def authenticate_hf():
try:
# Read the token from the local cache file
token_path = os.path.expanduser("~/.cache/huggingface/token")
with open(token_path, "r") as token_file:
hf_token = token_file.read().strip()
# Use the token to login
login(token=hf_token)
logging.info("Authenticated with Hugging Face successfully.")
except Exception as e:
logging.error(f"Failed to authenticate with Hugging Face: {str(e)}")
return False
return True
def sanitize_model_and_branch_names(model, branch):
# Remove trailing slash if present
model = model.rstrip('/')
# Remove base URL if present
if model.startswith("https://huggingface.co/"):
model = model[len("https://huggingface.co/"):]
# Split model and branch if provided in model name
model_parts = model.split(":")
model = model_parts[0]
branch = model_parts[1] if len(model_parts) > 1 else branch
# Use 'main' as default branch if not specified
if branch is None:
branch = "main"
# Validate branch name
if not re.match(r"^[a-zA-Z0-9._-]+$", branch):
raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.")
return model, branch
def download_model(model_name, branch_name, output_dir):
# Sanitize model and branch names
model_name, branch_name = sanitize_model_and_branch_names(model_name, branch_name)
# Expand user directory if needed
output_dir = os.path.expanduser(output_dir)
# Initialize Hugging Face API
api = HfApi()
# Create output directory
output_folder = Path(output_dir) / f"{'_'.join(model_name.split('/')[-2:])}"
if branch_name != "main":
output_folder = output_folder.with_name(f"{output_folder.name}_{branch_name}")
output_folder.mkdir(parents=True, exist_ok=True)
# Get file list
try:
files = api.list_repo_files(model_name, revision=branch_name)
except Exception as e:
logging.error(f"Error accessing repository: {e}")
return None
# Download files
for file in tqdm(files, desc="Downloading files"):
try:
hf_hub_download(
repo_id=model_name,
filename=file,
revision=branch_name,
local_dir=output_folder,
local_dir_use_symlinks=False
)
except Exception as e:
logging.error(f"Error downloading {file}: {e}")
logging.info(f"Model downloaded to {output_folder}")
return output_folder
def resolve_input_model(config):
input_model = os.path.expanduser(config['input_model'])
if os.path.isdir(input_model):
# Input model is a local directory
logging.info(f"Using local model directory: {input_model}")
return input_model
else:
# Input model is a Hugging Face repository
logging.info(f"Input model is a Hugging Face model: {input_model}")
download_output_dir = os.path.expanduser(config.get('download_output_dir', './models'))
if not os.path.isdir(download_output_dir):
try:
os.makedirs(download_output_dir, exist_ok=True)
logging.info(f"Created download_output_dir directory: {download_output_dir}")
except OSError as e:
logging.error(f"Failed to create download_output_dir directory: {str(e)}")
sys.exit(1)
model_name, branch_name = sanitize_model_and_branch_names(input_model, branch=None)
output_folder = download_model(model_name, branch_name, download_output_dir)
if output_folder is None:
logging.error("Failed to download the model.")
sys.exit(1)
return str(output_folder)
def quantize_and_upload(config, input_model_path):
exllama_path = os.path.expanduser(config['exllama_path'])
base_model_name = config['base_model_name']
output_base_path = os.path.expanduser(config['output_base_path'])
hf_username = config['hf_username']
default_hb = config.get('default_hb', 8)
for quant_config in config['quantizations']:
if 'bpw' not in quant_config:
logging.warning("Skipping quantization config without 'bpw'.")
continue
bpw = quant_config['bpw']
hb = quant_config.get('hb', default_hb)
calibration_length = quant_config.get('calibration_length', 2048)
measurement_length = quant_config.get('measurement_length', calibration_length)
if not isinstance(calibration_length, int) or not isinstance(measurement_length, int):
logging.error(f"Invalid calibration_length or measurement_length values. Expected integers.")
continue
if calibration_length <= 0 or measurement_length <= 0:
logging.error(f"Invalid calibration_length or measurement_length values. Must be positive integers.")
continue
quant_name = f"{base_model_name}-exl2-{bpw}bpw"
work_dir = os.path.join(output_base_path, base_model_name, f"{quant_name}-work")
output_dir = os.path.join(output_base_path, base_model_name, quant_name)
try:
os.makedirs(work_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
logging.info(f"Directories created or already exist: {work_dir}, {output_dir}")
except OSError as e:
logging.error(f"Failed to create directories for {quant_name}: {str(e)}")
continue
# Run quantization
command_list = [
"python", os.path.join(exllama_path, "convert.py"),
"-i", input_model_path,
"-o", work_dir,
"-cf", output_dir,
"-b", str(bpw),
"-hb", str(hb),
"-l", str(calibration_length),
"-ml", str(measurement_length)
]
if not run_command(command_list):
logging.error(f"Quantization failed for {quant_name}. Skipping upload.")
continue
logging.info(f"Quantization completed for {quant_name}")
# Try to upload to Hugging Face
repo_name = f"{hf_username}/{quant_name}"
try:
create_repo(repo_name, repo_type="model", exist_ok=True)
logging.info(f"Repository '{repo_name}' is ready.")
api = HfApi()
api.upload_folder(
folder_path=output_dir,
repo_id=repo_name,
repo_type="model"
)
logging.info(f"Successfully uploaded {quant_name} to Hugging Face")
except Exception as e:
logging.error(f"Failed to upload {quant_name} to Hugging Face: {str(e)}")
logging.info(f"Quantized model is still available locally at {output_dir}")
logging.info(f"Completed processing for {quant_name}")
if __name__ == "__main__":
config_path = "config.yaml"
try:
with open(config_path, "r") as f:
config = yaml.safe_load(f)
logging.info(f"Configuration loaded from {config_path}")
except yaml.YAMLError as e:
logging.error(f"Error parsing {config_path}: {str(e)}")
sys.exit(1)
except FileNotFoundError:
logging.error(f"{config_path} not found. Please create a config file.")
sys.exit(1)
if not validate_config(config):
logging.error("Configuration validation failed. Exiting.")
sys.exit(1)
if not authenticate_hf():
logging.error("Hugging Face authentication failed. Exiting.")
sys.exit(1)
input_model_path = resolve_input_model(config)
if not input_model_path:
logging.error("Failed to resolve input model path. Exiting.")
sys.exit(1)
quantize_and_upload(config, input_model_path)
logging.info("Script execution completed.")
```
---
base_model:
- TheDrummer/Lazarus-2407-100B
---
# Join our Discord! https://discord.gg/Nbv9pQ88Xb
## 2500+ members strong 💪 Now with more channels! A hub for users and makers alike!
---
# Endurance 100B v1 🎡
*A finetune of [Lazarus 2407 100B](https://huggingface.co/TheDrummer/Lazarus-2407-100B), a pruned Mistral Large 2407 123B!*
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/R2dDPDShY2VEhRzbJr-Go.png)
> [Do not go gentle into that good night. Rage, rage against the dying of the light!](https://www.youtube.com/watch?v=ESWzPhZWYeI)
---
## Links
- Original: https://huggingface.co/TheDrummer/Endurance-100B-v1
- GGUF: https://huggingface.co/TheDrummer/Endurance-100B-v1-GGUF
- iMatrix: https://huggingface.co/bartowski/Endurance-100B-v1-GGUF (recommended for smaller quants)
## Arsenal (Supported Chat Templates)
- Metharme (Pygmalion in ST)
- Creative, unhinged, unique
### Favorite RP Format
`*action* Dialogue *thoughts* Dialogue *narration*` in 1st person PoV
### Favorite Card
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/Ebi-X8DEw5LclXCFuL_H7.png)
[Audrey by thecooler](https://characterhub.org/characters/thecooler/audrey-5f995ef8)
---
## Technical Details
*Refer to [Lazarus 2407 100B](https://huggingface.co/TheDrummer/Lazarus-2407-100B) for pruning details.*
Endurance used the same hyperparameters as Behemoth. Training loss indicates that they are exactly the same albeit with lower confidence.
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/s0uELhSkSSwseyBrFzw7q.png)
Notes on Lazarus 100B (base model for Endurance):
- Testers have noted that 100B seemed nearly identical to 123B.
- One tester said that only one minor mistake was made by the model, requiring a rewrite for failing to pick up on the nuance.
- Another tester went through a satisfying 32K playthrough without any issues.
Endurance 100B v1.0 has gone through additional RP & Instruct training.