function [w,b,sv,obj] = linear_primal_svm(lambda,wInit,bInit,D,noneg, maxIteration,opt) % Solves the following SVM optimization problem in the primal (with quatratic % penalization of the training errors). Default solved by Newton. % % min_{w,e} lambda/2 * w'w + 1/2 sum_i out_i^2 % s.t. Y_i * (w .* X_i + b) >= D_i - out_i % w(nonneg)>=0 % % A global variable X containing the training inputs % should be defined. X is an n x d matrix (n = number of points). % X can be either normal matrix or sparse matrix. % A global variable Y is the target vector of size nx1. Normal SVM will % have +1 and -1 value, but it can be actually aribitury value % A global variable n is the number of elements that you want to use for training. % LAMBDA is the regularization parameter ( = 1/C) % wInit is an optional input for the initial value of [w;b] % dvec is an optional input, usually it is 1 for standard SVM % maxIteration is the number of iterations allowd % % W is the hyperplane w (vector of length d). % B is the bias % The outputs on the training points are either X*W+B % SV is the support vector index number % OBJ is the objective function value % OPT is a structure containing the options (in brackets default values): % cg: Do not use Newton, but nonlinear conjugate gradients [0] % lin_cg: Compute the Newton step with linear CG % [0 unless solving sparse linear SVM] % iter_max_Newton: Maximum number of Newton steps [20] % prec: Stopping criterion % cg_prec and cg_it: stopping criteria for the linear CG. % Original written by Olivier Chapelle @ http://olivier.chapelle.cc/primal/ % Modified by Jianxiong Xiao to have several advance features @ http://mit.edu/jxiao/ if ~exist('maxIteration','var') || maxIteration==Inf % Assign the options to their default values maxIteration = 10000000; end if ~exist('opt','var') % Assign the options to their default values opt = []; end if ~isfield(opt,'cg'), opt.cg = 0; end; if ~isfield(opt,'lin_cg'), opt.lin_cg = 0; end; if ~isfield(opt,'iter_max_Newton'), opt.iter_max_Newton = 20; end; % used to be 20 if ~isfield(opt,'prec'), opt.prec = 1e-6; end; if ~isfield(opt,'cg_prec'), opt.cg_prec = 1e-4; end; if ~isfield(opt,'cg_it'), opt.cg_it = 20; end; global X; global Y; if ~exist('noneg','var') noneg = []; end if ~exist('dvec','var') || isempty(D) D = ones(numel(Y),1); end if isempty(X), error('Global variable X undefined'); end; if ~exist('bInit','var') bInit=0; end if ~exist('wInit','var') d = size(X,2); wInit = zeros(d,1); end if issparse(X) opt.lin_cg = 1; end; if ~opt.cg [sol,obj, sv] = primal_svm_linear (lambda,maxIteration,wInit,bInit,D,noneg,opt); else [sol,obj, sv] = primal_svm_linear_cg(lambda,maxIteration,wInit,bInit,D,noneg,opt); end; % The last component of the solution is the bias b. b = sol(end); w = sol(1:end-1); fprintf('\n'); % ------------------------------- % Train a linear SVM using Newton % ------------------------------- function [w,obj,sv] = primal_svm_linear(lambda,maxIteration,wInit,bInit,D,noneg,opt) global X; global Y; global n; d = size(X,2); w = [wInit; bInit]; % The last component of w is b. w(noneg) = max(w(noneg),0); %out = ones(n,1); % Vector containing 1-Y.*(X*w) out = D(1:n) - Y(1:n).*(X(1:n,:)*w(1:end-1)+w(end)); for iter=1:maxIteration if iter > opt.iter_max_Newton; warning('PrimalSVM:MaxNumNewton','Maximum number of Newton steps reached. Try larger lambda'); break; end; [obj, grad, sv] = obj_fun_linear(w,lambda,out); % Compute the Newton direction either exactly or by linear CG if opt.lin_cg % Advantage of linear CG when using sparse input: the Hessian is never computed explicitly. [step, foo, relres] = minres(@hess_vect_mult, -grad, opt.cg_prec,opt.cg_it,[],[],[],sv,lambda); else Xsv = X(sv,:); hess = lambda*diag([ones(d,1); 0]) + [[Xsv'*Xsv sum(Xsv,1)']; [sum(Xsv) length(sv)]]; % Hessian step = - hess \ grad; % Newton direction end; % Do an exact line search [t,out, sv] = line_search_linear(w,step,out, lambda); w = w + t*step; w(noneg) = max(w(noneg),0); fprintf('Iter = %d, Obj = %f, Nb of sv = %d, Newton decr = %.3f, Line search = %.3f',iter,obj,length(sv),-step'*grad/2,t); if opt.lin_cg fprintf(', Lin CG acc = %.4f \n',relres); else fprintf(' \n'); end; if -step'*grad < opt.prec * obj % Stop when the Newton decrement is small enough break; end; end; % ----------------------------------------------------- % Train a linear SVM using nonlinear conjugate gradient % ----------------------------------------------------- function [w, obj, sv] = primal_svm_linear_cg(lambda,maxIteration,wInit,bInit, D,noneg,opt) global X; global Y; global n; d = size(X,2); w = [wInit; bInit]; % The last component of w is b. w(noneg) = max(w(noneg),0); %out = ones(n,1); % Vector containing 1-Y.*(X*w) out = D(1:n) - Y(1:n).*(X(1:n,:)*w(1:end-1)+w(end)); %go = [X(1:n,:)'*Y(1:n); sum(Y(1:n))]; % -gradient at w=0, need to be change for w!=0 initialization [~, grad] = obj_fun_linear(w,lambda,out); go = -grad; % -gradient s = go; % The first search direction is given by the gradient for iter=1:maxIteration if iter > opt.cg_it * min(n,d) warning('PrimalSVM:MaxNumCG','Maximum number of CG iterations reached. Try larger lambda'); break; end; % Do an exact line search [t,out,sv] = line_search_linear(w,s,out,lambda); w = w + t*s; w(noneg) = max(w(noneg),0); % Compute the new gradient [obj, gn, sv] = obj_fun_linear(w,lambda,out); gn=-gn; fprintf('Iter = %d, Obj = %f, Norm of grad = %.3f \n',iter,obj,norm(gn)); % Stop when the relative decrease in the objective function is small if t*s'*go < opt.prec*obj, break; end; % Flecher-Reeves update. Change 0 in 1 for Polack-Ribiere be = (gn'*gn - 0*gn'*go) / (go'*go); s = be*s+gn; go = gn; end; function [obj, grad, sv] = obj_fun_linear(w,lambda,out) % Compute the objective function, its gradient and the set of support vectors % Out is supposed to contain 1-Y.*(X*w) global X; global Y; global n; out = max(0,out); wb0 = w; wb0(end) = 0; % Do not penalize b <= Very important for object detection obj = sum(out.^2)/2 + lambda*(wb0')*wb0/2; % L2 penalization of the errors grad = lambda*wb0 - [((out.*Y(1:n))'*X(1:n,:))'; sum(out.*Y(1:n))]; % Gradient sv = find(out>0); function [t,out,sv] = line_search_linear(w,d,out,lambda) % From the current solution w, do a line search in the direction d by % 1D Newton minimization global X; global Y; global n; t = 0; % Precompute some dots products Xd = X(1:n,:)*d(1:end-1)+d(end); wd = lambda * w(1:end-1)'*d(1:end-1); dd = lambda * d(1:end-1)'*d(1:end-1); while 1 out2 = out - t*(Y(1:n).*Xd); % The new outputs after a step of length t sv = find(out2>0); g = wd + t*dd - (out2(sv).*Y(sv))'*Xd(sv); % The gradient (along the line) h = dd + Xd(sv)'*Xd(sv); % The second derivative (along the line) t = t - g/h; % Take the 1D Newton step. Note that if d was an exact Newton % direction, t is 1 after the first iteration. if g^2/h < 1e-10, break; end; % fprintf('%f %f\n',t,g^2/h) end; out = out2; function y = hess_vect_mult(w,sv,lambda) % Compute the Hessian times a given vector x. % hess = lambda*diag([ones(d-1,1); 0]) + (X(sv,:)'*X(sv,:)); global X; global n; y = lambda*w; y(end) = 0; z = (X(1:n,:)*w(1:end-1)+w(end)); % Computing X(sv,:)*x takes more time in Matlab :-( zz = zeros(length(z),1); zz(sv)=z(sv); y = y + [(zz'*X(1:n,:))'; sum(zz)];