import gradio as gr import os class Main: async def train_model(self,max_steps, base_model, model_type, tokenizer_type, is_llama_derived_model, strict, datasets_path, dataset_format, shards, val_set_size, output_dir, adapter, lora_model_dir, sequence_len, sample_packing, pad_to_sequence_len, lora_r, lora_alpha, lora_dropout, lora_target_modules, lora_target_linear, lora_fan_in_fan_out, gradient_accumulation_steps, micro_batch_size, num_epochs, optimizer, lr_scheduler, learning_rate, train_on_inputs, group_by_length, bf16, fp16, tf32, gradient_checkpointing, resume_from_checkpoint, local_rank, logging_steps, xformers_attention, flash_attention, load_best_model_at_end, warmup_steps, evals_per_epoch, eval_table_size, saves_per_epoch, debug, weight_decay, wandb_project, wandb_entity, wandb_watch, wandb_name, wandb_log_model,last_tab,progress=gr.Progress(track_tqdm=True)): a = [base_model, model_type, tokenizer_type, is_llama_derived_model, strict, datasets_path, dataset_format, shards, val_set_size, output_dir, adapter, lora_model_dir, sequence_len, sample_packing, pad_to_sequence_len, lora_r, lora_alpha, lora_dropout, lora_target_modules, lora_target_linear, lora_fan_in_fan_out, gradient_accumulation_steps, micro_batch_size, num_epochs, optimizer, lr_scheduler, learning_rate, train_on_inputs, group_by_length, bf16, fp16, tf32, gradient_checkpointing, resume_from_checkpoint, local_rank, logging_steps, xformers_attention, flash_attention, load_best_model_at_end, warmup_steps, evals_per_epoch, eval_table_size, saves_per_epoch, debug, weight_decay, wandb_project, wandb_entity, wandb_watch, wandb_name, wandb_log_model,last_tab] return a def initiate_userInterface(self): with gr.Blocks() as self.app: gr.Markdown("### Axolotl UI") # Finetuning Tab with gr.Tab("FineTuning UI"): base_model = gr.Dropdown(choices=["NousResearch/Llama-2-7b-hf", "mistralai/Mistral-7B-Instruct-v0.2"], label="Select Model", value="NousResearch/Llama-2-7b-hf") datasets_path = gr.Textbox(label="datasets_path", value="mhenrichsen/alpaca_2k_test") dataset_format = gr.Radio(choices=['Alpaca'], label="Dataset Format", value='Alpaca') shards = gr.Slider(minimum=0, maximum=20, step=1, label="shards", value=10) last_tab = gr.Checkbox(label='last_tab',value=False,visible=False) with gr.Accordion("Advanced Settings",open=False): with gr.Tab("YAML Configuration"): model_type = gr.Radio(label="model_type", choices=['MistralForCausalLM','LlamaForCausalLM'],info="",value="LlamaForCausalLM") tokenizer_type = gr.Textbox(label="tokenizer_type", value="LlamaTokenizer",visible=False) is_llama_derived_model = gr.Checkbox(label="is_llama_derived_model", value=True,info="Determines the padding strategy based on the parent type of the model") strict = gr.Checkbox(label="strict", value=False,visible=False) val_set_size = gr.Slider(minimum=0, maximum=1, step=0.1, label="val_set_size", value=0.05,info="Percentage of training data to be used for validation") output_dir = gr.Textbox(label="output_dir", value="./finetune-out",info="Output directory of the finetuned model") adapter = gr.Radio(choices=["qlora", "lora"], label="adapter",value='qlora',info="Parameter efficient training strategy") lora_model_dir = gr.Textbox(label="lora_model_dir",info="Directory of a custom adapter can be provided",visible=False) sequence_len = gr.Slider(minimum=512, maximum=4096, step=10,label="sequence_len", value=1024,info="The maximum length input allowed to train") sample_packing = gr.Checkbox(label="sample_packing", value=True,info="Speeds up data preparation but recommended false for small datasets") pad_to_sequence_len = gr.Checkbox(label="pad_to_sequence_len", value=True, info="Pads the input to match sequence length to avoid memory fragmentation and out of memory issues. Recommended true") # eval_sample_packing = gr.Checkbox(label="eval_sample_packing", value=False) lora_r = gr.Slider(minimum=8, maximum=64, step=2,label="lora_r", value=32,info="The number of parameters in adaptation layers.") lora_alpha = gr.Slider(minimum=8, maximum=64, step=0.1,label="lora_alpha", value=16,info="How much adapted weights affect base model's") lora_dropout = gr.Slider(minimum=0, maximum=1, label="lora_dropout", value=0.05, step=0.01,info="The ratio of weights ignored randomly within adapted weights") lora_target_modules = gr.Textbox(label="lora_target_modules", value="q_proj, v_proj, k_proj",info="All dense layers can be targeted using parameter efficient tuning") lora_target_linear = gr.Checkbox(label="lora_target_linear", value=True,info="Lora Target Modules will be ignored and all linear layers will be used") lora_fan_in_fan_out = gr.Textbox(label="lora_fan_in_fan_out",visible=False) gradient_accumulation_steps = gr.Slider(minimum=4, maximum=64, step=1,label="gradient_accumulation_steps", value=4,info="Number of steps required to update the weights with cumulative gradients") micro_batch_size = gr.Slider(minimum=1, maximum=64, step=2,label="micro_batch_size", value=2,info="Number of samples sent to each gpu") num_epochs = gr.Slider(minimum=1, maximum=4, step=1,label="num_epochs", value=1) max_steps = gr.Textbox(label="max_steps",value='1',info="Maximum number of steps to be trained. Overwrites the number of epochs",visible=False) optimizer = gr.Radio(choices=["adamw_hf",'adamw_torch','adamw_torch_fused','adamw_torch_xla','adamw_apex_fused','adafactor','adamw_anyprecision','sgd','adagrad','adamw_bnb_8bit','lion_8bit','lion_32bit','paged_adamw_32bit','paged_adamw_8bit','paged_lion_32bit','paged_lion_8bit'], value="paged_adamw_32bit",label='optimizer',info="Use an optimizer which aligns with the quantization of model") lr_scheduler = gr.Radio(label="lr_scheduler", choices=['one_cycle', 'log_sweep', 'cosine'],value="cosine",info="Determines dynamic learning rate based on current step") learning_rate = gr.Textbox(label="max_learning_rate", value="2e-5",info="") train_on_inputs = gr.Checkbox(label="train_on_inputs", value=False,visible=False) group_by_length = gr.Checkbox(label="group_by_length", value=False,visible=False) bf16 = gr.Checkbox(label="bfloat16", value=False,info="Enable bfloat16 precision for tensors; supported only on Ampere or newer GPUs.") fp16 = gr.Checkbox(label="Half Precision", value=True,info="Enable half precision (FP16) for tensor processing.") tf32 = gr.Checkbox(label="TensorFloat32", value=False,info="Enable TensorFloat32 precision for tensors; supported only on Ampere or newer GPUs.") gradient_checkpointing = gr.Checkbox(label="gradient_checkpointing", value=True,info='',visible=False) resume_from_checkpoint = gr.Textbox(label="resume_from_checkpoint",visible=False) local_rank = gr.Textbox(label="local_rank",visible=False) logging_steps = gr.Slider(minimum=1, maximum=100, step=1,label="logging_steps", value=1,info='',visible=False) xformers_attention = gr.Checkbox(label="xformers_attention", value=False,visible=False) flash_attention = gr.Checkbox(label="flash_attention", value=False,info='',visible=False) load_best_model_at_end = gr.Checkbox(label="load_best_model_at_end", value=False,visible=False) warmup_steps = gr.Slider(minimum=1, maximum=100, step=1,label="warmup_steps", value=10,visible=False) evals_per_epoch = gr.Slider(minimum=1, maximum=100, step=1,label="evals_per_epoch", value=4,info='No. of Evaluation Per Epoch',visible=False) eval_table_size = gr.Textbox(label="eval_table_size",visible=False) saves_per_epoch = gr.Slider(minimum=1, maximum=100, step=1,label="saves_per_epoch", value=1,info='No. of checkpoints to be saved') debug = gr.Checkbox(label="debug", value=False,visible=False) weight_decay = gr.Number(label="weight_decay", value=0.0,visible=False) wandb_watch = gr.Checkbox(label="wandb_watch", value=False,visible=False) wandb_log_model = gr.Checkbox(label="wandb_log_model", value=False,visible=False) wandb_project = gr.Textbox(label="wandb_project",visible=False) wandb_entity = gr.Textbox(label="wandb_entity",visible=False) wandb_name = gr.Textbox(label="wandb_name",visible=False) train_btn = gr.Button("Start Training") train_btn.click( self.train_model, inputs=[max_steps, base_model, model_type, tokenizer_type, is_llama_derived_model, strict, datasets_path, dataset_format, shards, val_set_size, output_dir, adapter, lora_model_dir, sequence_len, sample_packing, pad_to_sequence_len, lora_r, lora_alpha, lora_dropout, lora_target_modules, lora_target_linear, lora_fan_in_fan_out, gradient_accumulation_steps, micro_batch_size, num_epochs, optimizer, lr_scheduler, learning_rate, train_on_inputs, group_by_length, bf16, fp16, tf32, gradient_checkpointing, resume_from_checkpoint, local_rank, logging_steps, xformers_attention, flash_attention, load_best_model_at_end, warmup_steps, evals_per_epoch, eval_table_size, saves_per_epoch, debug, weight_decay, wandb_project, wandb_entity, wandb_watch, wandb_name, wandb_log_model,last_tab], outputs=[gr.Textbox(label="Training Output",interactive=False)] ) return self.app if __name__ == "__main__": main = Main() app = main.initiate_userInterface() app.queue().launch(share=True,server_name='0.0.0.0')