function phow_caltech101()
% PHOW_CALTECH101 Image classification in the Caltech-101 dataset
% This program demonstrates how to use VLFeat to construct an image
% classifier on the Caltech-101 data. The classifier uses PHOW
% features (dense SIFT), spatial histograms of visual words, and a
% Chi2 SVM. To speedup computation it uses VLFeat fast dense SIFT,
% kd-trees, and homogeneous kernel map. The program also
% demonstrates VLFeat PEGASOS SVM solver, although for this small
% dataset other solvers such as LIBLINEAR can be more efficient.
%
% By default 15 training images are used, which should result in
% about 64% performance (a good performance considering that only a
% single feature type is being used).
%
% Call PHOW_CALTECH101 to train and test a classifier on a small
% subset of the Caltech-101 data. Note that the program
% automatically downloads a copy of the Caltech-101 data from the
% Internet if it cannot find a local copy.
%
% Edit the PHOW_CALTECH101 file to change the program configuration.
%
% To run on the entire dataset change CONF.TINYPROBLEM to FALSE.
%
% The Caltech-101 data is saved into CONF.CALDIR, which defaults to
% 'data/caltech-101'. Change this path to the desired location, for
% instance to point to an existing copy of the Caltech-101 data.
%
% The program can also be used to train a model on custom data by
% pointing CONF.CALDIR to it. Just create a subdirectory for each
% class and put the training images there. Make sure to adjust
% CONF.NUMTRAIN accordingly.
%
% Intermediate files are stored in the directory CONF.DATADIR. All
% such files begin with the prefix CONF.PREFIX, which can be changed
% to test different parameter settings without overriding previous
% results.
%
% The program saves the trained model in
% <CONF.DATADIR>/<CONF.PREFIX>-model.mat. This model can be used to
% test novel images independently of the Caltech data.
%
% load('data/baseline-model.mat') ; # change to the model path
% label = model.classify(model, im) ;
%
% Author: Andrea Vedaldi
% Copyright (C) 2011-2013 Andrea Vedaldi
% All rights reserved.
%
% This file is part of the VLFeat library and is made available under
% the terms of the BSD license (see the COPYING file).
conf.calDir = 'data/caltech-101' ;
conf.dataDir = 'data/' ;
conf.autoDownloadData = true ;
conf.numTrain = 15 ;
conf.numTest = 15 ;
conf.numClasses = 102 ;
conf.numWords = 600 ;
conf.numSpatialX = [2 4] ;
conf.numSpatialY = [2 4] ;
conf.quantizer = 'kdtree' ;
conf.svm.C = 10 ;
conf.svm.solver = 'sdca' ;
%conf.svm.solver = 'sgd' ;
%conf.svm.solver = 'liblinear' ;
conf.svm.biasMultiplier = 1 ;
conf.phowOpts = {'Step', 3} ;
conf.clobber = false ;
conf.tinyProblem = true ;
conf.prefix = 'baseline' ;
conf.randSeed = 1 ;
if conf.tinyProblem
conf.prefix = 'tiny' ;
conf.numClasses = 5 ;
conf.numSpatialX = 2 ;
conf.numSpatialY = 2 ;
conf.numWords = 300 ;
conf.phowOpts = {'Verbose', 2, 'Sizes', 7, 'Step', 5} ;
end
conf.vocabPath = fullfile(conf.dataDir, [conf.prefix '-vocab.mat']) ;
conf.histPath = fullfile(conf.dataDir, [conf.prefix '-hists.mat']) ;
conf.modelPath = fullfile(conf.dataDir, [conf.prefix '-model.mat']) ;
conf.resultPath = fullfile(conf.dataDir, [conf.prefix '-result']) ;
randn('state',conf.randSeed) ;
rand('state',conf.randSeed) ;
vl_twister('state',conf.randSeed) ;
% --------------------------------------------------------------------
% Download Caltech-101 data
% --------------------------------------------------------------------
if ~exist(conf.calDir, 'dir') || ...
(~exist(fullfile(conf.calDir, 'airplanes'),'dir') && ...
~exist(fullfile(conf.calDir, '101_ObjectCategories', 'airplanes')))
if ~conf.autoDownloadData
error(...
['Caltech-101 data not found. ' ...
'Set conf.autoDownloadData=true to download the required data.']) ;
end
vl_xmkdir(conf.calDir) ;
calUrl = ['http://www.vision.caltech.edu/Image_Datasets/' ...
'Caltech101/101_ObjectCategories.tar.gz'] ;
fprintf('Downloading Caltech-101 data to ''%s''. This will take a while.', conf.calDir) ;
untar(calUrl, conf.calDir) ;
end
if ~exist(fullfile(conf.calDir, 'airplanes'),'dir')
conf.calDir = fullfile(conf.calDir, '101_ObjectCategories') ;
end
% --------------------------------------------------------------------
% Setup data
% --------------------------------------------------------------------
classes = dir(conf.calDir) ;
classes = classes([classes.isdir]) ;
classes = {classes(3:conf.numClasses+2).name} ;
images = {} ;
imageClass = {} ;
for ci = 1:length(classes)
ims = dir(fullfile(conf.calDir, classes{ci}, '*.jpg'))' ;
ims = vl_colsubset(ims, conf.numTrain + conf.numTest) ;
ims = cellfun(@(x)fullfile(classes{ci},x),{ims.name},'UniformOutput',false) ;
images = {images{:}, ims{:}} ;
imageClass{end+1} = ci * ones(1,length(ims)) ;
end
selTrain = find(mod(0:length(images)-1, conf.numTrain+conf.numTest) < conf.numTrain) ;
selTest = setdiff(1:length(images), selTrain) ;
imageClass = cat(2, imageClass{:}) ;
model.classes = classes ;
model.phowOpts = conf.phowOpts ;
model.numSpatialX = conf.numSpatialX ;
model.numSpatialY = conf.numSpatialY ;
model.quantizer = conf.quantizer ;
model.vocab = [] ;
model.w = [] ;
model.b = [] ;
model.classify = @classify ;
% --------------------------------------------------------------------
% Train vocabulary
% --------------------------------------------------------------------
if ~exist(conf.vocabPath) || conf.clobber
% Get some PHOW descriptors to train the dictionary
selTrainFeats = vl_colsubset(selTrain, 30) ;
descrs = {} ;
%for ii = 1:length(selTrainFeats)
parfor ii = 1:length(selTrainFeats)
im = imread(fullfile(conf.calDir, images{selTrainFeats(ii)})) ;
im = standarizeImage(im) ;
[drop, descrs{ii}] = vl_phow(im, model.phowOpts{:}) ;
end
descrs = vl_colsubset(cat(2, descrs{:}), 10e4) ;
descrs = single(descrs) ;
% Quantize the descriptors to get the visual words
vocab = vl_kmeans(descrs, conf.numWords, 'verbose', 'algorithm', 'elkan', 'MaxNumIterations', 50) ;
save(conf.vocabPath, 'vocab') ;
else
load(conf.vocabPath) ;
end
model.vocab = vocab ;
if strcmp(model.quantizer, 'kdtree')
model.kdtree = vl_kdtreebuild(vocab) ;
end
% --------------------------------------------------------------------
% Compute spatial histograms
% --------------------------------------------------------------------
if ~exist(conf.histPath) || conf.clobber
hists = {} ;
parfor ii = 1:length(images)
% for ii = 1:length(images)
fprintf('Processing %s (%.2f %%)\n', images{ii}, 100 * ii / length(images)) ;
im = imread(fullfile(conf.calDir, images{ii})) ;
hists{ii} = getImageDescriptor(model, im);
end
hists = cat(2, hists{:}) ;
save(conf.histPath, 'hists') ;
else
load(conf.histPath) ;
end
% --------------------------------------------------------------------
% Compute feature map
% --------------------------------------------------------------------
psix = vl_homkermap(hists, 1, 'kchi2', 'gamma', .5) ;
% --------------------------------------------------------------------
% Train SVM
% --------------------------------------------------------------------
if ~exist(conf.modelPath) || conf.clobber
switch conf.svm.solver
case {'sgd', 'sdca'}
lambda = 1 / (conf.svm.C * length(selTrain)) ;
w = [] ;
parfor ci = 1:length(classes)
perm = randperm(length(selTrain)) ;
fprintf('Training model for class %s\n', classes{ci}) ;
y = 2 * (imageClass(selTrain) == ci) - 1 ;
[w(:,ci) b(ci) info] = vl_svmtrain(psix(:, selTrain(perm)), y(perm), lambda, ...
'Solver', conf.svm.solver, ...
'MaxNumIterations', 50/lambda, ...
'BiasMultiplier', conf.svm.biasMultiplier, ...
'Epsilon', 1e-3);
end
case 'liblinear'
svm = train(imageClass(selTrain)', ...
sparse(double(psix(:,selTrain))), ...
sprintf(' -s 3 -B %f -c %f', ...
conf.svm.biasMultiplier, conf.svm.C), ...
'col') ;
w = svm.w(:,1:end-1)' ;
b = svm.w(:,end)' ;
end
model.b = conf.svm.biasMultiplier * b ;
model.w = w ;
save(conf.modelPath, 'model') ;
else
load(conf.modelPath) ;
end
% --------------------------------------------------------------------
% Test SVM and evaluate
% --------------------------------------------------------------------
% Estimate the class of the test images
scores = model.w' * psix + model.b' * ones(1,size(psix,2)) ;
[drop, imageEstClass] = max(scores, [], 1) ;
% Compute the confusion matrix
idx = sub2ind([length(classes), length(classes)], ...
imageClass(selTest), imageEstClass(selTest)) ;
confus = zeros(length(classes)) ;
confus = vl_binsum(confus, ones(size(idx)), idx) ;
% Plots
figure(1) ; clf;
subplot(1,2,1) ;
imagesc(scores(:,[selTrain selTest])) ; title('Scores') ;
set(gca, 'ytick', 1:length(classes), 'yticklabel', classes) ;
subplot(1,2,2) ;
imagesc(confus) ;
title(sprintf('Confusion matrix (%.2f %% accuracy)', ...
100 * mean(diag(confus)/conf.numTest) )) ;
print('-depsc2', [conf.resultPath '.ps']) ;
save([conf.resultPath '.mat'], 'confus', 'conf') ;
% -------------------------------------------------------------------------
function im = standarizeImage(im)
% -------------------------------------------------------------------------
im = im2single(im) ;
if size(im,1) > 480, im = imresize(im, [480 NaN]) ; end
% -------------------------------------------------------------------------
function hist = getImageDescriptor(model, im)
% -------------------------------------------------------------------------
im = standarizeImage(im) ;
width = size(im,2) ;
height = size(im,1) ;
numWords = size(model.vocab, 2) ;
% get PHOW features
[frames, descrs] = vl_phow(im, model.phowOpts{:}) ;
% quantize local descriptors into visual words
switch model.quantizer
case 'vq'
[drop, binsa] = min(vl_alldist(model.vocab, single(descrs)), [], 1) ;
case 'kdtree'
binsa = double(vl_kdtreequery(model.kdtree, model.vocab, ...
single(descrs), ...
'MaxComparisons', 50)) ;
end
for i = 1:length(model.numSpatialX)
binsx = vl_binsearch(linspace(1,width,model.numSpatialX(i)+1), frames(1,:)) ;
binsy = vl_binsearch(linspace(1,height,model.numSpatialY(i)+1), frames(2,:)) ;
% combined quantization
bins = sub2ind([model.numSpatialY(i), model.numSpatialX(i), numWords], ...
binsy,binsx,binsa) ;
hist = zeros(model.numSpatialY(i) * model.numSpatialX(i) * numWords, 1) ;
hist = vl_binsum(hist, ones(size(bins)), bins) ;
hists{i} = single(hist / sum(hist)) ;
end
hist = cat(1,hists{:}) ;
hist = hist / sum(hist) ;
% -------------------------------------------------------------------------
function [className, score] = classify(model, im)
% -------------------------------------------------------------------------
hist = getImageDescriptor(model, im) ;
psix = vl_homkermap(hist, 1, 'kchi2', 'period', .7) ;
scores = model.w' * psix + model.b' ;
[score, best] = max(scores) ;
className = model.classes{best} ;