{"model_type": "custom_seq2seq", "architecture": "custom_seq2seq", "vocab_size": 32000, "batch_size": 16, "d_model": 512, "num_heads": 8, "num_encoder_layers": 6, "num_decoder_layers": 6, "max_len": 64, "lr": 0.0001, "num_epochs": 10} |
{"model_type": "custom_seq2seq", "architecture": "custom_seq2seq", "vocab_size": 32000, "batch_size": 16, "d_model": 512, "num_heads": 8, "num_encoder_layers": 6, "num_decoder_layers": 6, "max_len": 64, "lr": 0.0001, "num_epochs": 10} |