Spaces:

tosi-n7
/

ark-instruct-line-item

No application file

App Files Files Community

ark-instruct-line-item / chimp /src /config.py

tosi-n7

Upload folder using huggingface_hub

d8ffdc4 11 months ago

raw

history blame contribute delete

1.92 kB

	# Used for multi-gpu
	local_rank = -1
	per_device_train_batch_size = 4
	per_device_eval_batch_size = 4
	gradient_accumulation_steps = 1
	learning_rate = 2e-4
	max_grad_norm = 0.3
	weight_decay = 0.001
	lora_alpha = 16
	lora_dropout = 0.1
	lora_r = 64
	max_seq_length = None

	# The model that you want to train from the Hugging Face hub
	model_name = "guardrail/llama-2-7b-guanaco-instruct-sharded"

	# Fine-tuned model name
	new_model = "llama-2-7b-custom-accountant"

	# The instruction dataset to use
	# dataset_name = "databricks/databricks-dolly-15k"

	# Activate 4-bit precision base model loading
	use_4bit = True

	# Activate nested quantization for 4-bit base models
	use_nested_quant = False

	# Compute dtype for 4-bit base models
	bnb_4bit_compute_dtype = "float16"

	# Quantization type (fp4 or nf4)
	bnb_4bit_quant_type = "nf4"

	# Number of training epochs
	num_train_epochs = 2

	# Enable fp16 training, (bf16 to True with an A100)
	fp16 = False

	# Enable bf16 training
	bf16 = False

	# Use packing dataset creating
	packing = False

	# Enable gradient checkpointing
	gradient_checkpointing = True

	# Optimizer to use, original is paged_adamw_32bit
	optim = "paged_adamw_32bit"

	# Learning rate schedule (constant a bit better than cosine, and has advantage for analysis)
	lr_scheduler_type = "cosine"

	# Number of optimizer update steps, 10K original, 20 for demo purposes
	max_steps = -1

	# Fraction of steps to do a warmup for
	warmup_ratio = 0.03

	# Group sequences into batches with same length (saves memory and speeds up training considerably)
	group_by_length = True

	# Save checkpoint every X updates steps
	save_steps = 10

	# Log every X updates steps
	logging_steps = 1

	# The output directory where the model predictions and checkpoints will be written
	output_dir = "../model_files/"

	# Load the entire model on the GPU 0
	device_map = {"": 0}

	# Visualize training
	report_to = "tensorboard"

	# Tensorboard logs
	tb_log_dir = "../logs/"