function [scores, maxlabel] = classification_demo(im, use_gpu) | |
% [scores, maxlabel] = classification_demo(im, use_gpu) | |
% | |
% Image classification demo using BVLC CaffeNet. | |
% | |
% IMPORTANT: before you run this demo, you should download BVLC CaffeNet | |
% from Model Zoo (http://caffe.berkeleyvision.org/model_zoo.html) | |
% | |
% **************************************************************************** | |
% For detailed documentation and usage on Caffe's Matlab interface, please | |
% refer to the Caffe Interface Tutorial at | |
% http://caffe.berkeleyvision.org/tutorial/interfaces.html#matlab | |
% **************************************************************************** | |
% | |
% input | |
% im color image as uint8 HxWx3 | |
% use_gpu 1 to use the GPU, 0 to use the CPU | |
% | |
% output | |
% scores 1000-dimensional ILSVRC score vector | |
% maxlabel the label of the highest score | |
% | |
% You may need to do the following before you start matlab: | |
% $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda-5.5/lib64 | |
% $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6 | |
% Or the equivalent based on where things are installed on your system | |
% and what versions are installed. | |
% | |
% Usage: | |
% im = imread('../../examples/images/cat.jpg'); | |
% scores = classification_demo(im, 1); | |
% [score, class] = max(scores); | |
% Five things to be aware of: | |
% caffe uses row-major order | |
% matlab uses column-major order | |
% caffe uses BGR color channel order | |
% matlab uses RGB color channel order | |
% images need to have the data mean subtracted | |
% Data coming in from matlab needs to be in the order | |
% [width, height, channels, images] | |
% where width is the fastest dimension. | |
% Here is the rough matlab code for putting image data into the correct | |
% format in W x H x C with BGR channels: | |
% % permute channels from RGB to BGR | |
% im_data = im(:, :, [3, 2, 1]); | |
% % flip width and height to make width the fastest dimension | |
% im_data = permute(im_data, [2, 1, 3]); | |
% % convert from uint8 to single | |
% im_data = single(im_data); | |
% % reshape to a fixed size (e.g., 227x227). | |
% im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear'); | |
% % subtract mean_data (already in W x H x C with BGR channels) | |
% im_data = im_data - mean_data; | |
% If you have multiple images, cat them with cat(4, ...) | |
% Add caffe/matlab to your Matlab search PATH in order to use matcaffe | |
if exist('../+caffe', 'dir') | |
addpath('..'); | |
else | |
error('Please run this demo from caffe/matlab/demo'); | |
end | |
% Set caffe mode | |
if exist('use_gpu', 'var') && use_gpu | |
caffe.set_mode_gpu(); | |
gpu_id = 0; % we will use the first gpu in this demo | |
caffe.set_device(gpu_id); | |
else | |
caffe.set_mode_cpu(); | |
end | |
% Initialize the network using BVLC CaffeNet for image classification | |
% Weights (parameter) file needs to be downloaded from Model Zoo. | |
model_dir = '../../models/bvlc_reference_caffenet/'; | |
net_model = [model_dir 'deploy.prototxt']; | |
net_weights = [model_dir 'bvlc_reference_caffenet.caffemodel']; | |
phase = 'test'; % run with phase test (so that dropout isn't applied) | |
if ~exist(net_weights, 'file') | |
error('Please download CaffeNet from Model Zoo before you run this demo'); | |
end | |
% Initialize a network | |
net = caffe.Net(net_model, net_weights, phase); | |
if nargin < 1 | |
% For demo purposes we will use the cat image | |
fprintf('using caffe/examples/images/cat.jpg as input image\n'); | |
im = imread('../../examples/images/cat.jpg'); | |
end | |
% prepare oversampled input | |
% input_data is Height x Width x Channel x Num | |
tic; | |
input_data = {prepare_image(im)}; | |
toc; | |
% do forward pass to get scores | |
% scores are now Channels x Num, where Channels == 1000 | |
tic; | |
% The net forward function. It takes in a cell array of N-D arrays | |
% (where N == 4 here) containing data of input blob(s) and outputs a cell | |
% array containing data from output blob(s) | |
scores = net.forward(input_data); | |
toc; | |
scores = scores{1}; | |
scores = mean(scores, 2); % take average scores over 10 crops | |
[~, maxlabel] = max(scores); | |
% call caffe.reset_all() to reset caffe | |
caffe.reset_all(); | |
% ------------------------------------------------------------------------ | |
function crops_data = prepare_image(im) | |
% ------------------------------------------------------------------------ | |
% caffe/matlab/+caffe/imagenet/ilsvrc_2012_mean.mat contains mean_data that | |
% is already in W x H x C with BGR channels | |
d = load('../+caffe/imagenet/ilsvrc_2012_mean.mat'); | |
mean_data = d.mean_data; | |
IMAGE_DIM = 256; | |
CROPPED_DIM = 227; | |
% Convert an image returned by Matlab's imread to im_data in caffe's data | |
% format: W x H x C with BGR channels | |
im_data = im(:, :, [3, 2, 1]); % permute channels from RGB to BGR | |
im_data = permute(im_data, [2, 1, 3]); % flip width and height | |
im_data = single(im_data); % convert from uint8 to single | |
im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear'); % resize im_data | |
im_data = im_data - mean_data; % subtract mean_data (already in W x H x C, BGR) | |
% oversample (4 corners, center, and their x-axis flips) | |
crops_data = zeros(CROPPED_DIM, CROPPED_DIM, 3, 10, 'single'); | |
indices = [0 IMAGE_DIM-CROPPED_DIM] + 1; | |
n = 1; | |
for i = indices | |
for j = indices | |
crops_data(:, :, :, n) = im_data(i:i+CROPPED_DIM-1, j:j+CROPPED_DIM-1, :); | |
crops_data(:, :, :, n+5) = crops_data(end:-1:1, :, :, n); | |
n = n + 1; | |
end | |
end | |
center = floor(indices(2) / 2) + 1; | |
crops_data(:,:,:,5) = ... | |
im_data(center:center+CROPPED_DIM-1,center:center+CROPPED_DIM-1,:); | |
crops_data(:,:,:,10) = crops_data(end:-1:1, :, :, 5); | |