{ | |
"text_encoder": { | |
"tokenizer_class": "bert", | |
"model_type": "bert", | |
"dim": 768, | |
"context_dim": 384, | |
"vocab_size": 30522, | |
"padding_idx": 0, | |
"num_layers": 4, | |
"num_heads": 12, | |
"embedding_dim": 256, | |
"multimodal_layers_ids": [ | |
2, | |
3 | |
], | |
"head_one_neuron": false, | |
"pooling": "cls", | |
"max_position_embeddings": 64, | |
"dropout_prob": 0.1 | |
}, | |
"image_encoder": { | |
"dim": 384, | |
"patch_size": 16, | |
"image_size": 224, | |
"num_layers": 12, | |
"num_heads": 6, | |
"embedding_dim": 256, | |
"normalization_means": [0.48145466, 0.4578275, 0.40821073], | |
"normalization_deviations": [0.26862954, 0.26130258, 0.27577711], | |
"pooling": "cls" | |
} | |
} |