{ | |
"text_encoder": { | |
"tokenizer_class": "unigram", | |
"model_type": "bert", | |
"dim": 384, | |
"context_dim": 768, | |
"vocab_size": 250037, | |
"padding_idx": 1, | |
"num_layers": 12, | |
"num_heads": 12, | |
"embedding_dim": 256, | |
"multimodal_layers_ids": [ | |
8, | |
9, | |
10, | |
11 | |
], | |
"head_one_neuron": false, | |
"pooling": "mean", | |
"max_position_embeddings": 50, | |
"dropout_prob": 0.1 | |
}, | |
"image_encoder": { | |
"normalization_means": [ | |
0.48145466, | |
0.4578275, | |
0.40821073 | |
], | |
"normalization_deviations": [ | |
0.26862954, | |
0.26130258, | |
0.27577711 | |
], | |
"dim": 768, | |
"patch_size": 16, | |
"image_size": 224, | |
"num_layers": 12, | |
"num_heads": 12, | |
"embedding_dim": 256, | |
"pooling": "cls" | |
} | |
} |