Upload checkpoints/pretrain_config.json with huggingface_hub
Browse files
checkpoints/pretrain_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "camembertv2-base-bf16", "seed": 25, "debug": false, "do_train": true, "do_eval": false, "phase2": true, "record_gradients": false, "profile": false, "distribution_strategy": "off", "use_horovod": true, "num_gpus": 16, "tpu_address": "", "amp": false, "xla": true, "fp16_compression": false, "bf16": true, "optimizer": "adam", "gradient_accumulation_steps": 8, "lr_schedule": "linear", "skip_adaptive": true, "electra_objective": false, "model_type": "roberta", "gen_weight": 1.0, "disc_weight": 50.0, "mask_prob": 0.4, "learning_rate": 0.0003, "lr_decay_power": 0.5, "weight_decay_rate": 0.01, "num_warmup_steps": 1000, "opt_beta_1": 0.878, "opt_beta_2": 0.974, "end_lr": 1e-05, "scale_loss": false, "log_freq": 10, "skip_checkpoint": false, "save_checkpoints_steps": 500, "eval_every_n_steps": -1, "num_train_steps": 17000, "num_eval_steps": 100, "keep_checkpoint_max": 500, "restore_checkpoint": "latest", "load_weights": false, "model_size": "base", "model_hparam_overrides": {}, "vocab_size": 32768, "do_lower_case": false, "uniform_generator": false, "shared_embeddings": true, "disentangled_gradients": false, "generator_layers": 1.0, "generator_hidden_size": 0.25, "disallow_correct": false, "temperature": 1.0, "max_seq_length": 1024, "train_batch_size": 64, "eval_batch_size": 512, "results_dir": "/scratch/camembertv2/runs/", "json_summary": null, "hidden_act": "gelu_bf16", "hidden_dropout_prob": 0.1, "attention_probs_dropout_prob": 0.1, "max_position_embeddings": 1025, "type_vocab_size": 1, "position_biased_input": true, "data_prep_working_dir": "/scratch/camembertv2/data/", "repeat_dataset": false, "vocab_file": "vocab/camembert-wordpiece-culturax-wiki-hal-2digits-apostrophe", "pretrain_tfrecords": "tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/culturax/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/wiki/train/*,tfrecords/tfrecord_lower_case_0_seq_len_1024_random_seed_12345/halfr/train/*", "ignore_ids_dict": {"[PAD]": 0, "[CLS]": 1, "[SEP]": 2, "[UNK]": 3, "[MASK]": 4}, "pad_token_id": 0, "bos_token_id": 1, "eos_token_id": 2, "model_dir": "/scratch/camembertv2/runs/models/camembertv2-base-bf16", "checkpoints_dir": "/scratch/camembertv2/runs/models/camembertv2-base-bf16/checkpoints", "weights_dir": "/scratch/camembertv2/runs/models/camembertv2-base-bf16/weights", "results_txt": "/scratch/camembertv2/runs/models/camembertv2-base-bf16/unsup_results.txt", "results_pkl": "/scratch/camembertv2/runs/models/camembertv2-base-bf16/unsup_results.pkl", "log_dir": "/scratch/camembertv2/runs/models/camembertv2-base-bf16/logs", "max_predictions_per_seq": 414, "hidden_size": 768, "embedding_size": 768, "num_hidden_layers": 12, "intermediate_size": 3072, "num_attention_heads": 12}
|