File size: 352 Bytes
d20e9f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
image_root: '/export/share/datasets/vision/nocaps/'
ann_root: 'annotation'

# set pretrained as a file path or an url
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'

vit: 'base'
batch_size: 32

image_size: 384

max_length: 20
min_length: 5
num_beams: 3
prompt: 'a picture of '