{ | |
"bf16": true, | |
"dataset_eval_split": "validation", | |
"dataset_name": "vwxyzjn/summarize_from_feedback_tldr_3_filtered_oai_preprocessing_1706381144", | |
"ddp_find_unused_parameters": false, | |
"eval_steps": 0.2, | |
"evaluation_strategy": "steps", | |
"git": "7906781", | |
"gradient_accumulation_steps": 4, | |
"gradient_checkpointing": false, | |
"hub_model_id": "mnoukhov/pythia410m-sft-tldr", | |
"learning_rate": "3e-6", | |
"logging_steps": 100, | |
"lr_scheduler_type": "cosine", | |
"max_seq_length": 580, | |
"model_name": "EleutherAI/pythia-410m-deduped", | |
"name": "newsft_pythia410m_tldr.yml", | |
"num_train_epochs": 1, | |
"per_device_eval_batch_size": 8, | |
"per_device_train_batch_size": 32, | |
"push_to_hub": true, | |
"report_to": "wandb", | |
"task_type": "tldr" | |
} |