function [frames, descrs] = vl_phow(im, varargin)
% VL_PHOW  Extract PHOW features
%   [FRAMES, DESCRS] = VL_PHOW(IM) extracts PHOW features from the
%   image IM. This function is a wrapper around VL_DSIFT() and
%   VL_IMSMOOTH().
%
%   The PHOW descriptors where introduced in [1]. By default,
%   VL_PHOW() computes the gray-scale variant of the descriptor.  The
%   COLOR option can be used to compute the color variant instead.
%
%   Verbose:: [false]
%     Set to true to turn on verbose output.
%
%   Sizes:: [[4 6 8 10]]
%     Scales at which the dense SIFT features are extracted. Each
%     value is used as bin size for the VL_DSIFT() function.
%
%   Fast:: [true]
%     Set to false to turn off the fast SIFT features computation by
%     VL_DSIFT().
%
%   Step:: [2]
%     Step (in pixels) of the grid at which the dense SIFT features
%     are extracted.
%
%   Color:: [GRAY]
%     Choose between GRAY (PHOW-gray), RGB, HSV, and OPPONENT
%     (PHOW-color).
%
%   ContrastThreshold:: [0.005]
%     Contrast threshold below which SIFT features are mapped to
%     zero. The input image is scaled to have intensity range in [0,1]
%     (rather than [0,255]) and this value is compared to the
%     descriptor norm as returned by VL_DSIFT().
%
%   WindowSize:: [1.5]
%     Size of the Gaussian window in units of spatial bins.
%
%   Magnif:: [6]
%     The image is smoothed by a Gaussian kernel of standard deviation
%     SIZE / MAGNIF.
%
%   FloatDescriptors:: [false]
%     If set to TRUE, the descriptors are returned in floating point
%     format.
%
%   See also: VL_DSIFT(), VL_HELP().

% Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
% All rights reserved.
%
% This file is part of the VLFeat library and is made available under
% the terms of the BSD license (see the COPYING file).

% -------------------------------------------------------------------
%                                                 Parse the arguments
% -------------------------------------------------------------------

  opts.verbose = false ;
  opts.fast = true ;
  opts.sizes = [4 6 8 10] ;
  opts.step = 2 ;
  opts.color = 'gray' ;
  opts.floatdescriptors = false ;
  opts.magnif = 6 ;
  opts.windowsize = 1.5 ;
  opts.contrastthreshold = 0.005 ;
  opts = vl_argparse(opts,varargin) ;

  dsiftOpts = {'norm', 'windowsize', opts.windowsize} ;
  if opts.verbose, dsiftOpts{end+1} = 'verbose' ; end
  if opts.fast, dsiftOpts{end+1} = 'fast' ; end
  if opts.floatdescriptors, dsiftOpts{end+1} = 'floatdescriptors' ; end
  dsiftOpts(end+(1:2)) = {'step', opts.step} ;

% -------------------------------------------------------------------
%                                                Extract the features
% -------------------------------------------------------------------


  % standarize the image
  imageSize = [size(im,2) ; size(im,1)] ;
  if strcmp(lower(opts.color), 'gray')
    numChannels = 1 ;
    if size(im,3) > 1, im = rgb2gray(im) ; end
  else
    numChannels = 3 ;
    if size(im,3) == 1, im = cat(3, im, im, im) ; end
    switch lower(opts.color)
      case 'rgb'
      case 'opponent'
        % Note that the mean differs from the standard definition of opponent
        % space and is the regular intesity (for compatibility with
        % the contrast thresholding).
        %
        % Note also that the mean is added pack to the other two
        % components with a small multipliers for monochromatic
        % regions.
        mu = 0.3*im(:,:,1) + 0.59*im(:,:,2) + 0.11*im(:,:,3) ;
        alpha = 0.01 ;
        im = cat(3, mu, ...
                 (im(:,:,1) - im(:,:,2))/sqrt(2) + alpha*mu, ...
                 (im(:,:,1) + im(:,:,2) - 2*im(:,:,3))/sqrt(6) + alpha*mu) ;
      case 'hsv'
        im = rgb2hsv(im) ;
      otherwise
        opts.color = 'hsv' ;
        warning('Color space not recongized, defaulting to HSV color space.') ;
    end
  end

  if opts.verbose
    fprintf('%s: color space: %s\n', mfilename, opts.color) ;
    fprintf('%s: image size: %d x %d\n', mfilename, imageSize(1), imageSize(2)) ;
    fprintf('%s: sizes: [%s]\n', mfilename, sprintf(' %d', opts.sizes)) ;
  end

  for si = 1:length(opts.sizes)

    % Recall from VL_DSIFT() that the first descriptor for scale SIZE has
    % center located at XC = XMIN + 3/2 SIZE (the Y coordinate is
    % similar). It is convenient to align the descriptors at different
    % scales so that they have the same geometric centers. For the
    % maximum size we pick XMIN = 1 and we get centers starting from
    % XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so
    % that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES).
    %
    % In pracrice, the offset must be integer ('bounds'), so the
    % alignment works properly only if all OPTS.SZES are even or odd.

    off = floor(1 + 3/2 * (max(opts.sizes) - opts.sizes(si))) ;

    % smooth the image to the appropriate scale based on the size
    % of the SIFT bins
    sigma = opts.sizes(si) / opts.magnif ;
    ims = vl_imsmooth(im, sigma) ;

    % extract dense SIFT features from all channels
    for k = 1:numChannels
      [f{k}, d{k}] = vl_dsift(...
        ims(:,:,k), ...
        dsiftOpts{:},  ...
        'size', opts.sizes(si), ...
        'bounds', [off off +inf +inf]) ;
    end

    % remove low contrast descriptors
    % note that for color descriptors the V component is
    % thresholded
    switch opts.color
      case {'gray', 'opponent'}
        contrast = f{1}(3,:) ;
      case 'rgb'
        contrast = mean([f{1}(3,:) ; f{2}(3,:) ; f{3}(3,:)],1) ;
      otherwise % hsv
        contrast = f{3}(3,:) ;
    end
    for k = 1:numChannels
      d{k}(:, contrast < opts.contrastthreshold) = 0 ;
    end

    % save only x,y, and the scale
    frames{si} = [f{1}(1:3, :) ; opts.sizes(si) * ones(1,size(f{1},2))] ;
    descrs{si} = cat(1, d{:}) ;
  end
  descrs = cell2mat(descrs) ;
  frames = cell2mat(frames) ;
end