Update README.md

b47deb3 verified about 1 month ago

13.6 kB

	# Quantization Notes:

	bpw: 5
	hb: 6
	calibration_length: 8192
	measurement_length: 8192

	## Quantization Code:

	Posting this here for convenience in case anyone is interested or finds it useful. I run this code using a conda 3.12 python env in WSL 2 Ubuntu. Steps to run include creating conda env and installing / upgrading exllamav2, logging into huggingface using the "huggingface-cli login" terminal command, configuring the config.yaml file, then running the python script.

	```config.yaml
	base_model_name: "Endurance-100B-v1"
	input_model: "~/models/TheDrummer_Endurance-100B-v1"
	download_output_dir: "~/models"
	output_base_path: "~/models/exl2-converted"
	hf_username: "NobodySpecial"
	default_hb: 6 # Default head bits value
	exllama_path: "~/exllamav2"

	quantizations:
	- bpw: 5
	calibration_length: 8192 # Optional: specify calibration length in tokens
	measurement_length: 8192 # Optional: specify measurement length in tokens
	- bpw: 6
	hb: 8 # Optional
	calibration_length: 8192 # Optional: specify calibration length in tokens
	measurement_length: 8192 # Optional: specify measurement length in tokens
	```

	```script.py
	import yaml
	import os
	import sys
	import subprocess
	import logging
	import re
	from tqdm import tqdm
	from pathlib import Path
	from huggingface_hub import HfApi, create_repo, login, hf_hub_download

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def run_command(command_list, timeout=300):
	try:
	process = subprocess.Popen(
	command_list,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	text=True,
	bufsize=1,
	universal_newlines=True
	)

	while True:
	output = process.stdout.readline()
	if output == '' and process.poll() is not None:
	break
	if output:
	logging.info(output.strip())

	rc = process.poll()
	if rc != 0:
	error_output = process.stderr.read()
	logging.error(f"Error executing command: {' '.join(command_list)}")
	logging.error(f"Error output: {error_output}")
	return False

	logging.info(f"Command executed successfully: {' '.join(command_list)}")
	return True
	except subprocess.TimeoutExpired:
	logging.error(f"Command timed out: {' '.join(command_list)}")
	process.kill()
	return False
	except Exception as e:
	logging.error(f"Unexpected error executing command: {' '.join(command_list)}")
	logging.error(f"Error: {str(e)}")
	return False

	def validate_config(config):
	required_keys = [
	'exllama_path',
	'base_model_name',
	'input_model',
	'output_base_path',
	'hf_username',
	'quantizations'
	]
	missing_keys = [key for key in required_keys if key not in config]
	if missing_keys:
	logging.error(f"Missing required configuration keys: {', '.join(missing_keys)}")
	return False

	# Validate exllama_path
	if not os.path.isdir(os.path.expanduser(config['exllama_path'])):
	logging.error(f"exllama_path does not exist or is not a directory: {config['exllama_path']}")
	return False

	# Validate output_base_path
	output_base_path = os.path.expanduser(config['output_base_path'])
	if not os.path.isdir(output_base_path):
	try:
	os.makedirs(output_base_path, exist_ok=True)
	logging.info(f"Created output_base_path directory: {output_base_path}")
	except OSError as e:
	logging.error(f"Failed to create output_base_path directory: {str(e)}")
	return False

	return True

	def authenticate_hf():
	try:
	# Read the token from the local cache file
	token_path = os.path.expanduser("~/.cache/huggingface/token")
	with open(token_path, "r") as token_file:
	hf_token = token_file.read().strip()

	# Use the token to login
	login(token=hf_token)
	logging.info("Authenticated with Hugging Face successfully.")
	except Exception as e:
	logging.error(f"Failed to authenticate with Hugging Face: {str(e)}")
	return False
	return True

	def sanitize_model_and_branch_names(model, branch):
	# Remove trailing slash if present
	model = model.rstrip('/')

	# Remove base URL if present
	if model.startswith("https://huggingface.co/"):
	model = model[len("https://huggingface.co/"):]

	# Split model and branch if provided in model name
	model_parts = model.split(":")
	model = model_parts[0]
	branch = model_parts[1] if len(model_parts) > 1 else branch

	# Use 'main' as default branch if not specified
	if branch is None:
	branch = "main"

	# Validate branch name
	if not re.match(r"^[a-zA-Z0-9._-]+$", branch):
	raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.")

	return model, branch

	def download_model(model_name, branch_name, output_dir):
	# Sanitize model and branch names
	model_name, branch_name = sanitize_model_and_branch_names(model_name, branch_name)

	# Expand user directory if needed
	output_dir = os.path.expanduser(output_dir)

	# Initialize Hugging Face API
	api = HfApi()

	# Create output directory
	output_folder = Path(output_dir) / f"{'_'.join(model_name.split('/')[-2:])}"
	if branch_name != "main":
	output_folder = output_folder.with_name(f"{output_folder.name}_{branch_name}")
	output_folder.mkdir(parents=True, exist_ok=True)

	# Get file list
	try:
	files = api.list_repo_files(model_name, revision=branch_name)
	except Exception as e:
	logging.error(f"Error accessing repository: {e}")
	return None

	# Download files
	for file in tqdm(files, desc="Downloading files"):
	try:
	hf_hub_download(
	repo_id=model_name,
	filename=file,
	revision=branch_name,
	local_dir=output_folder,
	local_dir_use_symlinks=False
	)
	except Exception as e:
	logging.error(f"Error downloading {file}: {e}")

	logging.info(f"Model downloaded to {output_folder}")
	return output_folder

	def resolve_input_model(config):
	input_model = os.path.expanduser(config['input_model'])
	if os.path.isdir(input_model):
	# Input model is a local directory
	logging.info(f"Using local model directory: {input_model}")
	return input_model
	else:
	# Input model is a Hugging Face repository
	logging.info(f"Input model is a Hugging Face model: {input_model}")
	download_output_dir = os.path.expanduser(config.get('download_output_dir', './models'))
	if not os.path.isdir(download_output_dir):
	try:
	os.makedirs(download_output_dir, exist_ok=True)
	logging.info(f"Created download_output_dir directory: {download_output_dir}")
	except OSError as e:
	logging.error(f"Failed to create download_output_dir directory: {str(e)}")
	sys.exit(1)
	model_name, branch_name = sanitize_model_and_branch_names(input_model, branch=None)
	output_folder = download_model(model_name, branch_name, download_output_dir)
	if output_folder is None:
	logging.error("Failed to download the model.")
	sys.exit(1)
	return str(output_folder)

	def quantize_and_upload(config, input_model_path):
	exllama_path = os.path.expanduser(config['exllama_path'])
	base_model_name = config['base_model_name']
	output_base_path = os.path.expanduser(config['output_base_path'])
	hf_username = config['hf_username']
	default_hb = config.get('default_hb', 8)

	for quant_config in config['quantizations']:
	if 'bpw' not in quant_config:
	logging.warning("Skipping quantization config without 'bpw'.")
	continue

	bpw = quant_config['bpw']
	hb = quant_config.get('hb', default_hb)
	calibration_length = quant_config.get('calibration_length', 2048)
	measurement_length = quant_config.get('measurement_length', calibration_length)

	if not isinstance(calibration_length, int) or not isinstance(measurement_length, int):
	logging.error(f"Invalid calibration_length or measurement_length values. Expected integers.")
	continue

	if calibration_length <= 0 or measurement_length <= 0:
	logging.error(f"Invalid calibration_length or measurement_length values. Must be positive integers.")
	continue

	quant_name = f"{base_model_name}-exl2-{bpw}bpw"
	work_dir = os.path.join(output_base_path, base_model_name, f"{quant_name}-work")
	output_dir = os.path.join(output_base_path, base_model_name, quant_name)

	try:
	os.makedirs(work_dir, exist_ok=True)
	os.makedirs(output_dir, exist_ok=True)
	logging.info(f"Directories created or already exist: {work_dir}, {output_dir}")
	except OSError as e:
	logging.error(f"Failed to create directories for {quant_name}: {str(e)}")
	continue

	# Run quantization
	command_list = [
	"python", os.path.join(exllama_path, "convert.py"),
	"-i", input_model_path,
	"-o", work_dir,
	"-cf", output_dir,
	"-b", str(bpw),
	"-hb", str(hb),
	"-l", str(calibration_length),
	"-ml", str(measurement_length)
	]
	if not run_command(command_list):
	logging.error(f"Quantization failed for {quant_name}. Skipping upload.")
	continue

	logging.info(f"Quantization completed for {quant_name}")

	# Try to upload to Hugging Face
	repo_name = f"{hf_username}/{quant_name}"
	try:
	create_repo(repo_name, repo_type="model", exist_ok=True)
	logging.info(f"Repository '{repo_name}' is ready.")
	api = HfApi()
	api.upload_folder(
	folder_path=output_dir,
	repo_id=repo_name,
	repo_type="model"
	)
	logging.info(f"Successfully uploaded {quant_name} to Hugging Face")
	except Exception as e:
	logging.error(f"Failed to upload {quant_name} to Hugging Face: {str(e)}")
	logging.info(f"Quantized model is still available locally at {output_dir}")

	logging.info(f"Completed processing for {quant_name}")

	if __name__ == "__main__":
	config_path = "config.yaml"
	try:
	with open(config_path, "r") as f:
	config = yaml.safe_load(f)
	logging.info(f"Configuration loaded from {config_path}")
	except yaml.YAMLError as e:
	logging.error(f"Error parsing {config_path}: {str(e)}")
	sys.exit(1)
	except FileNotFoundError:
	logging.error(f"{config_path} not found. Please create a config file.")
	sys.exit(1)

	if not validate_config(config):
	logging.error("Configuration validation failed. Exiting.")
	sys.exit(1)

	if not authenticate_hf():
	logging.error("Hugging Face authentication failed. Exiting.")
	sys.exit(1)

	input_model_path = resolve_input_model(config)
	if not input_model_path:
	logging.error("Failed to resolve input model path. Exiting.")
	sys.exit(1)

	quantize_and_upload(config, input_model_path)
	logging.info("Script execution completed.")
	```

	---
	base_model:
	- TheDrummer/Lazarus-2407-100B
	---
	# Join our Discord! https://discord.gg/Nbv9pQ88Xb
	## 2500+ members strong 💪 Now with more channels! A hub for users and makers alike!
	---
	# Endurance 100B v1 🎡

	A finetune of [Lazarus 2407 100B](https://huggingface.co/TheDrummer/Lazarus-2407-100B), a pruned Mistral Large 2407 123B!

	![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/R2dDPDShY2VEhRzbJr-Go.png)

	> [Do not go gentle into that good night. Rage, rage against the dying of the light!](https://www.youtube.com/watch?v=ESWzPhZWYeI)

	---

	## Links
	- Original: https://huggingface.co/TheDrummer/Endurance-100B-v1
	- GGUF: https://huggingface.co/TheDrummer/Endurance-100B-v1-GGUF
	- iMatrix: https://huggingface.co/bartowski/Endurance-100B-v1-GGUF (recommended for smaller quants)

	## Arsenal (Supported Chat Templates)
	- Metharme (Pygmalion in ST)
	- Creative, unhinged, unique

	### Favorite RP Format
	`action Dialogue thoughts Dialogue narration` in 1st person PoV

	### Favorite Card
	![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/Ebi-X8DEw5LclXCFuL_H7.png)
	[Audrey by thecooler](https://characterhub.org/characters/thecooler/audrey-5f995ef8)

	---

	## Technical Details

	Refer to [Lazarus 2407 100B](https://huggingface.co/TheDrummer/Lazarus-2407-100B) for pruning details.

	Endurance used the same hyperparameters as Behemoth. Training loss indicates that they are exactly the same albeit with lower confidence.

	![image/png](https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/s0uELhSkSSwseyBrFzw7q.png)

	Notes on Lazarus 100B (base model for Endurance):
	- Testers have noted that 100B seemed nearly identical to 123B.
	- One tester said that only one minor mistake was made by the model, requiring a rewrite for failing to pick up on the nuance.
	- Another tester went through a satisfying 32K playthrough without any issues.

	Endurance 100B v1.0 has gone through additional RP & Instruct training.