{ | |
"repo_id": "habanoz/haber-gpt-3-40M-8k-c4tr-v1.003", | |
"trainer_config": { | |
"seed": 145, | |
"seq_length": 1024, | |
"gradient_accumulation_steps": 6, | |
"batch_size": 40, | |
"data_dir": "c4tr.003", | |
"max_iters": 20000, | |
"warmup_iters": 2000, | |
"grad_norm_clip": 1.0, | |
"out_dir": "haber-gpt-3-40M-8k-c4tr-v1.003", | |
"dtype": "float16", | |
"compile": true, | |
"gc": false, | |
"learning_rate": 0.0018, | |
"decay_lr": true, | |
"lr_decay_iters": 20000, | |
"min_lr": 0.00018, | |
"weight_decay": 0.1, | |
"beta1": 0.9, | |
"beta2": 0.95, | |
"log_interval": 50, | |
"eval_interval": 1000, | |
"eval_iters": 100, | |
"promised_flops": 65000000000000.0, | |
"wandb_log": true, | |
"wandb_project": "Haber-GPT-3-40M", | |
"wandb_run_name": "haber-gpt-3-40M-8k-c4tr-v1.003", | |
"wandb_run_id": "1733943696" | |
} | |
} |