File size: 5,734 Bytes
3bbb319 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
#!/usr/bin/env python
"""
detector.py is an out-of-the-box windowed detector
callable from the command line.
By default it configures and runs the Caffe reference ImageNet model.
Note that this model was trained for image classification and not detection,
and finetuning for detection can be expected to improve results.
The selective_search_ijcv_with_python code required for the selective search
proposal mode is available at
https://github.com/sergeyk/selective_search_ijcv_with_python
TODO:
- batch up image filenames as well: don't want to load all of them into memory
- come up with a batching scheme that preserved order / keeps a unique ID
"""
import numpy as np
import pandas as pd
import os
import argparse
import time
import caffe
CROP_MODES = ['list', 'selective_search']
COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax']
def main(argv):
pycaffe_dir = os.path.dirname(__file__)
parser = argparse.ArgumentParser()
# Required arguments: input and output.
parser.add_argument(
"input_file",
help="Input txt/csv filename. If .txt, must be list of filenames.\
If .csv, must be comma-separated file with header\
'filename, xmin, ymin, xmax, ymax'"
)
parser.add_argument(
"output_file",
help="Output h5/csv filename. Format depends on extension."
)
# Optional arguments.
parser.add_argument(
"--model_def",
default=os.path.join(pycaffe_dir,
"../models/bvlc_reference_caffenet/deploy.prototxt"),
help="Model definition file."
)
parser.add_argument(
"--pretrained_model",
default=os.path.join(pycaffe_dir,
"../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"),
help="Trained model weights file."
)
parser.add_argument(
"--crop_mode",
default="selective_search",
choices=CROP_MODES,
help="How to generate windows for detection."
)
parser.add_argument(
"--gpu",
action='store_true',
help="Switch for gpu computation."
)
parser.add_argument(
"--mean_file",
default=os.path.join(pycaffe_dir,
'caffe/imagenet/ilsvrc_2012_mean.npy'),
help="Data set image mean of H x W x K dimensions (numpy array). " +
"Set to '' for no mean subtraction."
)
parser.add_argument(
"--input_scale",
type=float,
help="Multiply input features by this scale to finish preprocessing."
)
parser.add_argument(
"--raw_scale",
type=float,
default=255.0,
help="Multiply raw input by this scale before preprocessing."
)
parser.add_argument(
"--channel_swap",
default='2,1,0',
help="Order to permute input channels. The default converts " +
"RGB -> BGR since BGR is the Caffe default by way of OpenCV."
)
parser.add_argument(
"--context_pad",
type=int,
default='16',
help="Amount of surrounding context to collect in input window."
)
args = parser.parse_args()
mean, channel_swap = None, None
if args.mean_file:
mean = np.load(args.mean_file)
if mean.shape[1:] != (1, 1):
mean = mean.mean(1).mean(1)
if args.channel_swap:
channel_swap = [int(s) for s in args.channel_swap.split(',')]
if args.gpu:
caffe.set_mode_gpu()
print("GPU mode")
else:
caffe.set_mode_cpu()
print("CPU mode")
# Make detector.
detector = caffe.Detector(args.model_def, args.pretrained_model, mean=mean,
input_scale=args.input_scale, raw_scale=args.raw_scale,
channel_swap=channel_swap,
context_pad=args.context_pad)
# Load input.
t = time.time()
print("Loading input...")
if args.input_file.lower().endswith('txt'):
with open(args.input_file) as f:
inputs = [_.strip() for _ in f.readlines()]
elif args.input_file.lower().endswith('csv'):
inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str})
inputs.set_index('filename', inplace=True)
else:
raise Exception("Unknown input file type: not in txt or csv.")
# Detect.
if args.crop_mode == 'list':
# Unpack sequence of (image filename, windows).
images_windows = [
(ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values)
for ix in inputs.index.unique()
]
detections = detector.detect_windows(images_windows)
else:
detections = detector.detect_selective_search(inputs)
print("Processed {} windows in {:.3f} s.".format(len(detections),
time.time() - t))
# Collect into dataframe with labeled fields.
df = pd.DataFrame(detections)
df.set_index('filename', inplace=True)
df[COORD_COLS] = pd.DataFrame(
data=np.vstack(df['window']), index=df.index, columns=COORD_COLS)
del(df['window'])
# Save results.
t = time.time()
if args.output_file.lower().endswith('csv'):
# csv
# Enumerate the class probabilities.
class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)]
df[class_cols] = pd.DataFrame(
data=np.vstack(df['feat']), index=df.index, columns=class_cols)
df.to_csv(args.output_file, cols=COORD_COLS + class_cols)
else:
# h5
df.to_hdf(args.output_file, 'df', mode='w')
print("Saved to {} in {:.3f} s.".format(args.output_file,
time.time() - t))
if __name__ == "__main__":
import sys
main(sys.argv)
|