Commit 4e373064 authored by Davis King's avatar Davis King

Filled out spec for the FHOG feature extraction routines.

parent 52cefbe2
......@@ -135,7 +135,7 @@ namespace dlib
void impl_extract_fhog_features(
const image_type& img,
out_type& hog,
int bin_size
int cell_size
)
{
/*
......@@ -190,8 +190,8 @@ namespace dlib
// First we allocate memory for caching orientation histograms & their norms.
const int cells_nr = (int)((double)img.nr()/(double)bin_size + 0.5);
const int cells_nc = (int)((double)img.nc()/(double)bin_size + 0.5);
const int cells_nr = (int)((double)img.nr()/(double)cell_size + 0.5);
const int cells_nc = (int)((double)img.nc()/(double)cell_size + 0.5);
array2d<matrix<float,18,1> > hist(cells_nr, cells_nc);
for (long r = 0; r < hist.nr(); ++r)
......@@ -210,8 +210,8 @@ namespace dlib
const int hog_nc = std::max(cells_nc-2, 0);
init_hog(hog, hog_nr, hog_nc);
const int visible_nr = cells_nr*bin_size;
const int visible_nc = cells_nc*bin_size;
const int visible_nr = cells_nr*cell_size;
const int visible_nc = cells_nc*cell_size;
// First populate the gradient histograms
for (int y = 1; y < visible_nr-1; y++)
......@@ -245,8 +245,8 @@ namespace dlib
}
// add to 4 histograms around pixel using bilinear interpolation
double xp = ((double)x+0.5)/(double)bin_size - 0.5;
double yp = ((double)y+0.5)/(double)bin_size - 0.5;
double xp = ((double)x+0.5)/(double)cell_size - 0.5;
double yp = ((double)y+0.5)/(double)cell_size - 0.5;
int ixp = (int)std::floor(xp);
int iyp = (int)std::floor(yp);
double vx0 = xp-ixp;
......@@ -370,10 +370,10 @@ namespace dlib
void extract_fhog_features(
const image_type& img,
dlib::array<array2d<T,mm1>,mm2>& hog,
int bin_size = 8
int cell_size = 8
)
{
return impl_fhog::impl_extract_fhog_features(img, hog, bin_size);
return impl_fhog::impl_extract_fhog_features(img, hog, cell_size);
}
template <
......@@ -384,10 +384,10 @@ namespace dlib
void extract_fhog_features(
const image_type& img,
array2d<matrix<T,31,1>,mm>& hog,
int bin_size = 8
int cell_size = 8
)
{
return impl_fhog::impl_extract_fhog_features(img, hog, bin_size);
return impl_fhog::impl_extract_fhog_features(img, hog, cell_size);
}
// ----------------------------------------------------------------------------------------
......
......@@ -13,41 +13,110 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template <
typename image_type,
typename image_type,
typename T,
typename mm1,
typename mm2
typename mm
>
void extract_fhog_features(
const image_type& img,
dlib::array<array2d<T,mm1>,mm2>& hog,
int bin_size = 8
array2d<matrix<T,31,1>,mm>& hog,
int cell_size = 8
);
/*!
requires
- cell_size > 0
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename in_image_type::type> is defined)
- T should be float or double
ensures
- This function implements the HOG feature extraction method described in
the paper:
Object Detection with Discriminatively Trained Part Based Models by
P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
This means that it takes an input image img and outputs Felzenszwalb's
version of the HOG features, which are stored into #hog.
- The input image is broken into cells that are cell_size by cell_size pixels
and within each cell we compute a 31 dimensional FHOG vector. This vector
describes the gradient structure within the cell.
- #hog.nr() is approximately equal to img.nr()/cell_size.
- #hog.nc() is approximately equal to img.nc()/cell_size.
- for all valid r and c:
- #hog[r][c] == the FHOG vector describing the cell centered at the pixel
location fhog_to_image(point(c,r),cell_size) in img.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_type,
typename image_type,
typename T,
typename mm
typename mm1,
typename mm2
>
void extract_fhog_features(
const image_type& img,
array2d<matrix<T,31,1>,mm>& hog,
int bin_size = 8
dlib::array<array2d<T,mm1>,mm2>& hog,
int cell_size = 8
);
/*!
requires
- cell_size > 0
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename in_image_type::type> is defined)
- T should be float or double
ensures
- This function is identical to the above extract_fhog_features() routine
except that it outputs the results in a planar format rather than the
interlaced format used above. That is, each element of the hog vector is
placed into one of 31 images inside #hog. To be precise, if vhog is the
output of the above interlaced version of extract_fhog_features() then we
will have, for all valid r and c:
- #hog[i][r][c] == vhog[r][c](i)
(where 0 <= i < 31)
- #hog.size() == 31
!*/
// ----------------------------------------------------------------------------------------
inline point image_to_fhog (
point p,
int bin_size
int cell_size
);
/*!
requires
- cell_size > 0
ensures
- When using extract_fhog_features(), each FHOG cell is extracted from a
certain region in the input image. image_to_fhog() returns the identity of
the FHOG cell containing the image pixel at location p. Or in other words,
let P == image_to_fhog(p) and hog be a FHOG feature map output by
extract_fhog_features(), then hog[P.y()][P.x()] == the FHOG vector/cell
containing the point p in the input image. Note that some image points
might not have corresponding feature locations. E.g. border points or points
outside the image. In these cases the returned point will be outside the
input image.
!*/
// ----------------------------------------------------------------------------------------
inline point fhog_to_image (
point p,
int bin_size
int cell_size
);
/*!
requires
- cell_size > 0
ensures
- Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the
corresponding original input pixel. Note that since FHOG images are
spatially downsampled by aggregation into cells the mapping is not totally
invertible. Therefore, the returned location will be the center of the cell
in the original image that contained the FHOG vector at position p. Moreover,
cell_size should be set to the value used by the call to extract_fhog_features().
!*/
// ----------------------------------------------------------------------------------------
......@@ -58,8 +127,21 @@ namespace dlib
>
matrix<unsigned char> draw_fhog(
const dlib::array<array2d<T,mm1>,mm2>& hog,
const long w = 15
const long cell_draw_size = 15
);
/*!
requires
- cell_draw_size > 0
- hog.size() == 31
ensures
- Interprets hog as a FHOG feature map output by extract_fhog_features() and
converts it into an image suitable for display on the screen. In particular,
we draw all the hog cells into a grayscale image in a way that shows the
magnitude and orientation of the gradient energy in each cell. The result is
then returned.
- The size of the cells in the output image will be rendered as cell_draw_size
pixels wide and tall.
!*/
// ----------------------------------------------------------------------------------------
......@@ -69,8 +151,20 @@ namespace dlib
>
matrix<unsigned char> draw_fhog(
const array2d<matrix<T,31,1>,mm>& hog,
const long w = 15
const long cell_draw_size = 15
);
/*!
requires
- cell_draw_size > 0
ensures
- Interprets hog as a FHOG feature map output by extract_fhog_features() and
converts it into an image suitable for display on the screen. In particular,
we draw all the hog cells into a grayscale image in a way that shows the
magnitude and orientation of the gradient energy in each cell. The result is
then returned.
- The size of the cells in the output image will be rendered as cell_draw_size
pixels wide and tall.
!*/
// ----------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment