{ "accum_grad": 1, "att_dropout_rate": 0.0, "att_unit": 512, "backend": "pytorch", "beta1": 0.9, "beta2": 0.999, "config2": null, "config3": null, "debugmode": 1, "dropout_rate": 0.0, "dump_hdf5_path": null, "early_stop_criterion": "validation/main/loss", "emb_dropout_rate": 0.0, "embed_unit": 128, "epoch": 50, "gradclip": 1.0, "head": 8, "layer": 16, "lr": 0.0008, "lr_cosine_total": 100000, "lr_cosine_warmup": 1000, "maxlen": 60, "model_module": "transformer", "n_vocab": 5049, "ngpu": 8, "opt": "adam", "patience": 0, "pos_enc": "none", "report_interval_iters": 100, "schedulers": [ [ "lr", "cosine" ] ], "seed": 1, "sortagrad": 0, "test_label": null, "tie_weights": false, "train_dtype": "float32", "unit": 2048, "verbose": 1, "weight_decay": 0.0 }