Spaces:

goku6045
/

test

Sleeping

App Files Files Community

test / app.py

goku6045

Update app.py

2601523 verified 11 months ago

raw

history blame

11.1 kB

	import gradio as gr
	import os


	class Main:

	async def train_model(self,max_steps, base_model, model_type, tokenizer_type, is_llama_derived_model,
	strict, datasets_path, dataset_format, shards,
	val_set_size, output_dir, adapter, lora_model_dir, sequence_len, sample_packing,
	pad_to_sequence_len, lora_r, lora_alpha, lora_dropout,
	lora_target_modules, lora_target_linear, lora_fan_in_fan_out, gradient_accumulation_steps,
	micro_batch_size, num_epochs, optimizer, lr_scheduler, learning_rate, train_on_inputs,
	group_by_length, bf16, fp16, tf32, gradient_checkpointing,
	resume_from_checkpoint, local_rank, logging_steps, xformers_attention, flash_attention,
	load_best_model_at_end, warmup_steps, evals_per_epoch, eval_table_size, saves_per_epoch,
	debug, weight_decay, wandb_project, wandb_entity, wandb_watch,
	wandb_name, wandb_log_model,last_tab,progress=gr.Progress(track_tqdm=True)):

	a = [base_model, model_type, tokenizer_type, is_llama_derived_model,
	strict, datasets_path, dataset_format, shards,
	val_set_size, output_dir, adapter, lora_model_dir, sequence_len, sample_packing,
	pad_to_sequence_len, lora_r, lora_alpha, lora_dropout,
	lora_target_modules, lora_target_linear, lora_fan_in_fan_out, gradient_accumulation_steps,
	micro_batch_size, num_epochs, optimizer, lr_scheduler, learning_rate, train_on_inputs,
	group_by_length, bf16, fp16, tf32, gradient_checkpointing,
	resume_from_checkpoint, local_rank, logging_steps, xformers_attention, flash_attention,
	load_best_model_at_end, warmup_steps, evals_per_epoch, eval_table_size, saves_per_epoch,
	debug, weight_decay, wandb_project, wandb_entity, wandb_watch,
	wandb_name, wandb_log_model,last_tab]

	return a





	def initiate_userInterface(self):
	with gr.Blocks() as self.app:
	gr.Markdown("### Axolotl UI")

	# Finetuning Tab
	with gr.Tab("FineTuning UI"):
	base_model = gr.Dropdown(choices=["NousResearch/Llama-2-7b-hf", "mistralai/Mistral-7B-Instruct-v0.2"], label="Select Model", value="NousResearch/Llama-2-7b-hf")
	datasets_path = gr.Textbox(label="datasets_path", value="mhenrichsen/alpaca_2k_test")
	dataset_format = gr.Radio(choices=['Alpaca'], label="Dataset Format", value='Alpaca')
	shards = gr.Slider(minimum=0, maximum=20, step=1, label="shards", value=10)
	last_tab = gr.Checkbox(label='last_tab',value=False,visible=False)

	with gr.Accordion("Advanced Settings",open=False):
	with gr.Tab("YAML Configuration"):
	model_type = gr.Radio(label="model_type", choices=['MistralForCausalLM','LlamaForCausalLM'],info="",value="LlamaForCausalLM")
	tokenizer_type = gr.Textbox(label="tokenizer_type", value="LlamaTokenizer",visible=False)
	is_llama_derived_model = gr.Checkbox(label="is_llama_derived_model", value=True,info="Determines the padding strategy based on the parent type of the model")
	strict = gr.Checkbox(label="strict", value=False,visible=False)
	val_set_size = gr.Slider(minimum=0, maximum=1, step=0.1, label="val_set_size", value=0.05,info="Percentage of training data to be used for validation")
	output_dir = gr.Textbox(label="output_dir", value="./finetune-out",info="Output directory of the finetuned model")
	adapter = gr.Radio(choices=["qlora", "lora"], label="adapter",value='qlora',info="Parameter efficient training strategy")
	lora_model_dir = gr.Textbox(label="lora_model_dir",info="Directory of a custom adapter can be provided",visible=False)
	sequence_len = gr.Slider(minimum=512, maximum=4096, step=10,label="sequence_len", value=1024,info="The maximum length input allowed to train")
	sample_packing = gr.Checkbox(label="sample_packing", value=True,info="Speeds up data preparation but recommended false for small datasets")
	pad_to_sequence_len = gr.Checkbox(label="pad_to_sequence_len", value=True, info="Pads the input to match sequence length to avoid memory fragmentation and out of memory issues. Recommended true")
	# eval_sample_packing = gr.Checkbox(label="eval_sample_packing", value=False)
	lora_r = gr.Slider(minimum=8, maximum=64, step=2,label="lora_r", value=32,info="The number of parameters in adaptation layers.")
	lora_alpha = gr.Slider(minimum=8, maximum=64, step=0.1,label="lora_alpha", value=16,info="How much adapted weights affect base model's")
	lora_dropout = gr.Slider(minimum=0, maximum=1, label="lora_dropout", value=0.05, step=0.01,info="The ratio of weights ignored randomly within adapted weights")
	lora_target_modules = gr.Textbox(label="lora_target_modules", value="q_proj, v_proj, k_proj",info="All dense layers can be targeted using parameter efficient tuning")
	lora_target_linear = gr.Checkbox(label="lora_target_linear", value=True,info="Lora Target Modules will be ignored and all linear layers will be used")
	lora_fan_in_fan_out = gr.Textbox(label="lora_fan_in_fan_out",visible=False)

	gradient_accumulation_steps = gr.Slider(minimum=4, maximum=64, step=1,label="gradient_accumulation_steps", value=4,info="Number of steps required to update the weights with cumulative gradients")
	micro_batch_size = gr.Slider(minimum=1, maximum=64, step=2,label="micro_batch_size", value=2,info="Number of samples sent to each gpu")
	num_epochs = gr.Slider(minimum=1, maximum=4, step=1,label="num_epochs", value=1)
	max_steps = gr.Textbox(label="max_steps",value='1',info="Maximum number of steps to be trained. Overwrites the number of epochs",visible=False)
	optimizer = gr.Radio(choices=["adamw_hf",'adamw_torch','adamw_torch_fused','adamw_torch_xla','adamw_apex_fused','adafactor','adamw_anyprecision','sgd','adagrad','adamw_bnb_8bit','lion_8bit','lion_32bit','paged_adamw_32bit','paged_adamw_8bit','paged_lion_32bit','paged_lion_8bit'], value="paged_adamw_32bit",label='optimizer',info="Use an optimizer which aligns with the quantization of model")
	lr_scheduler = gr.Radio(label="lr_scheduler", choices=['one_cycle', 'log_sweep', 'cosine'],value="cosine",info="Determines dynamic learning rate based on current step")
	learning_rate = gr.Textbox(label="max_learning_rate", value="2e-5",info="")
	train_on_inputs = gr.Checkbox(label="train_on_inputs", value=False,visible=False)
	group_by_length = gr.Checkbox(label="group_by_length", value=False,visible=False)
	bf16 = gr.Checkbox(label="bfloat16", value=False,info="Enable bfloat16 precision for tensors; supported only on Ampere or newer GPUs.")
	fp16 = gr.Checkbox(label="Half Precision", value=True,info="Enable half precision (FP16) for tensor processing.")
	tf32 = gr.Checkbox(label="TensorFloat32", value=False,info="Enable TensorFloat32 precision for tensors; supported only on Ampere or newer GPUs.")
	gradient_checkpointing = gr.Checkbox(label="gradient_checkpointing", value=True,info='',visible=False)
	resume_from_checkpoint = gr.Textbox(label="resume_from_checkpoint",visible=False)
	local_rank = gr.Textbox(label="local_rank",visible=False)
	logging_steps = gr.Slider(minimum=1, maximum=100, step=1,label="logging_steps", value=1,info='',visible=False)
	xformers_attention = gr.Checkbox(label="xformers_attention", value=False,visible=False)
	flash_attention = gr.Checkbox(label="flash_attention", value=False,info='',visible=False)
	load_best_model_at_end = gr.Checkbox(label="load_best_model_at_end", value=False,visible=False)
	warmup_steps = gr.Slider(minimum=1, maximum=100, step=1,label="warmup_steps", value=10,visible=False)
	evals_per_epoch = gr.Slider(minimum=1, maximum=100, step=1,label="evals_per_epoch", value=4,info='No. of Evaluation Per Epoch',visible=False)
	eval_table_size = gr.Textbox(label="eval_table_size",visible=False)
	saves_per_epoch = gr.Slider(minimum=1, maximum=100, step=1,label="saves_per_epoch", value=1,info='No. of checkpoints to be saved')

	debug = gr.Checkbox(label="debug", value=False,visible=False)

	weight_decay = gr.Number(label="weight_decay", value=0.0,visible=False)
	wandb_watch = gr.Checkbox(label="wandb_watch", value=False,visible=False)
	wandb_log_model = gr.Checkbox(label="wandb_log_model", value=False,visible=False)
	wandb_project = gr.Textbox(label="wandb_project",visible=False)
	wandb_entity = gr.Textbox(label="wandb_entity",visible=False)
	wandb_name = gr.Textbox(label="wandb_name",visible=False)


	train_btn = gr.Button("Start Training")
	train_btn.click(
	self.train_model,
	inputs=[max_steps, base_model, model_type, tokenizer_type, is_llama_derived_model,
	strict, datasets_path, dataset_format, shards,
	val_set_size, output_dir, adapter, lora_model_dir, sequence_len, sample_packing,
	pad_to_sequence_len, lora_r, lora_alpha, lora_dropout,
	lora_target_modules, lora_target_linear, lora_fan_in_fan_out, gradient_accumulation_steps,
	micro_batch_size, num_epochs, optimizer, lr_scheduler, learning_rate, train_on_inputs,
	group_by_length, bf16, fp16, tf32, gradient_checkpointing,
	resume_from_checkpoint, local_rank, logging_steps, xformers_attention, flash_attention,
	load_best_model_at_end, warmup_steps, evals_per_epoch, eval_table_size, saves_per_epoch,
	debug, weight_decay, wandb_project, wandb_entity, wandb_watch,
	wandb_name, wandb_log_model,last_tab],
	outputs=[gr.Textbox(label="Training Output",interactive=False)]
	)

	return self.app


	if __name__ == "__main__":
	main = Main()
	app = main.initiate_userInterface()
	app.queue().launch(share=True,server_name='0.0.0.0')