# Suggested data paths when using GPT-NeoX locally { "data_path": "data/enwik8/enwik8_text_document", # or for weighted datasets: # "train-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"], # "test-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"], # "valid-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"], # "train-data-weights": [1., 2.], # "test-data-weights": [2., 1.], # "valid-data-weights": [0.5, 0.4], # If weight_by_num_documents is True, Builds dataset weights from a multinomial distribution over groups of data according to the number of documents in each group. # WARNING: setting this to True will override any user provided weights # "weight_by_num_documents": false, # "weighted_sampler_alpha": 0.3, "vocab_file": "data/gpt2-vocab.json", "merge_file": "data/gpt2-merges.txt", "save": "checkpoints", "load": "checkpoints", "checkpoint_validation_with_forward_pass": False, "tensorboard_dir": "tensorboard", "log_dir": "logs", "use_wandb": True, "wandb_host": "https://api.wandb.ai", "wandb_project": "neox" }