{ | |
"text_encoder": { | |
"model_type": "bert", | |
"dim": 768, | |
"context_dim": 768, | |
"vocab_size": 30522, | |
"padding_idx": 0, | |
"num_layers": 4, | |
"num_heads": 12, | |
"embedding_dim": 256, | |
"multimodal_layers_ids": [2, 3], | |
"head_one_neuron": false, | |
"pooling": "cls", | |
"max_position_embeddings": 77, | |
"dropout_prob": 0.1 | |
}, | |
"image_encoder": { | |
"dim": 768, | |
"patch_size": 16, | |
"image_size": 224, | |
"num_layers": 12, | |
"num_heads": 12, | |
"embedding_dim": 256, | |
"pooling": "cls" | |
} | |
} |