image_path: "demo/pokemon1.jpg" image_caption: ['Charmander', 'Bulbasaur', 'Squirtle'] clip: semantic_clip_model_name: 'ViT-L/14' semantic_pretrained_data: 'openai' clip_model_name: 'ViT-B/16' pretrained_data: 'openai' car: iom_thres: 0.6 mask_threshold: 0.5 confidence_threshold: 0 # 0.2 clipes_threshold: 0.6 visual_prompt_type: ['gray', 'blur'] semantic_templates: ['a clean origami {}.', 'a photo of a {}.', 'This is a photo of a {}', 'There is a {} in the scene', 'There is the {} in the scene', 'a photo of a {} in the scene', 'a photo of a small {}.', 'a photo of a medium {}.', 'a photo of a large {}.', 'This is a photo of a small {}.', 'This is a photo of a medium {}.', 'This is a photo of a large {}.', 'There is a small {} in the scene.', 'There is a medium {} in the scene.', 'There is a large {} in the scene.'] bg_cls: ['ground', 'land', 'grass', 'tree', 'building', 'wall', 'sky', 'lake', 'water', 'river', 'sea', 'railway', 'railroad', 'helmet', 'cloud', 'house', 'mountain', 'ocean', 'road', 'rock', 'street', 'valley', 'bridge'] sam: model_dir: "/homes/53/kevinsun/google-research/clip_as_rnn" sam_checkpoint: "./sam_hq_vit_h.pth" model_type: "vit_h" min_pred_threshold: 0.01 points_per_side: 64 pred_iou_thresh: 0.88 stability_score_thresh: 0.95 box_nms_thresh: 0.7 test: confidence_threshold: 0.7 algo: "maskcut" ds_name: "voc" seg_mode: "semantic" split: 'val' output_path: "./outputs/" use_pseudo: False # use_iterative: False num_iteration: 1