Commit cbc79bcf authored by Davis King's avatar Davis King

Made extract_fhog_features() faster by using simd instructions. Also added an

option to zero pad the borders of the output to it's easier to filter.
parent 2895d425
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include "assign_image.h" #include "assign_image.h"
#include "draw.h" #include "draw.h"
#include "interpolation.h" #include "interpolation.h"
#include "../simd/simd4i.h"
#include "../simd/simd4f.h"
namespace dlib namespace dlib
{ {
...@@ -57,6 +59,89 @@ namespace dlib ...@@ -57,6 +59,89 @@ namespace dlib
} }
} }
template <typename image_type>
inline typename dlib::enable_if_c<pixel_traits<typename image_type::type>::rgb>::type get_gradient (
const int r,
const int c,
const image_type& img,
simd4f& grad_x,
simd4f& grad_y,
simd4f& len
)
{
simd4i rleft((int)img[r][c-1].red,
(int)img[r][c].red,
(int)img[r][c+1].red,
(int)img[r][c+2].red);
simd4i rright((int)img[r][c+1].red,
(int)img[r][c+2].red,
(int)img[r][c+3].red,
(int)img[r][c+4].red);
simd4i rtop((int)img[r-1][c].red,
(int)img[r-1][c+1].red,
(int)img[r-1][c+2].red,
(int)img[r-1][c+3].red);
simd4i rbottom((int)img[r+1][c].red,
(int)img[r+1][c+1].red,
(int)img[r+1][c+2].red,
(int)img[r+1][c+3].red);
simd4i gleft((int)img[r][c-1].green,
(int)img[r][c].green,
(int)img[r][c+1].green,
(int)img[r][c+2].green);
simd4i gright((int)img[r][c+1].green,
(int)img[r][c+2].green,
(int)img[r][c+3].green,
(int)img[r][c+4].green);
simd4i gtop((int)img[r-1][c].green,
(int)img[r-1][c+1].green,
(int)img[r-1][c+2].green,
(int)img[r-1][c+3].green);
simd4i gbottom((int)img[r+1][c].green,
(int)img[r+1][c+1].green,
(int)img[r+1][c+2].green,
(int)img[r+1][c+3].green);
simd4i bleft((int)img[r][c-1].blue,
(int)img[r][c].blue,
(int)img[r][c+1].blue,
(int)img[r][c+2].blue);
simd4i bright((int)img[r][c+1].blue,
(int)img[r][c+2].blue,
(int)img[r][c+3].blue,
(int)img[r][c+4].blue);
simd4i btop((int)img[r-1][c].blue,
(int)img[r-1][c+1].blue,
(int)img[r-1][c+2].blue,
(int)img[r-1][c+3].blue);
simd4i bbottom((int)img[r+1][c].blue,
(int)img[r+1][c+1].blue,
(int)img[r+1][c+2].blue,
(int)img[r+1][c+3].blue);
simd4i grad_x_red = rright-rleft;
simd4i grad_y_red = rbottom-rtop;
simd4i grad_x_green = gright-gleft;
simd4i grad_y_green = gbottom-gtop;
simd4i grad_x_blue = bright-bleft;
simd4i grad_y_blue = bbottom-btop;
simd4i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red;
simd4i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green;
simd4i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue;
simd4i cmp = rlen>glen;
simd4i tgrad_x = select(cmp,grad_x_red,grad_x_green);
simd4i tgrad_y = select(cmp,grad_y_red,grad_y_green);
simd4i tlen = select(cmp,rlen,glen);
cmp = tlen>blen;
grad_x = select(cmp,tgrad_x,grad_x_blue);
grad_y = select(cmp,tgrad_y,grad_y_blue);
len = select(cmp,tlen,blen);
}
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
template <typename image_type> template <typename image_type>
...@@ -73,10 +158,44 @@ namespace dlib ...@@ -73,10 +158,44 @@ namespace dlib
len = length_squared(grad); len = length_squared(grad);
} }
template <typename image_type>
inline typename dlib::disable_if_c<pixel_traits<typename image_type::type>::rgb>::type get_gradient (
int r,
int c,
const image_type& img,
simd4f& grad_x,
simd4f& grad_y,
simd4f& len
)
{
simd4i left((int)get_pixel_intensity(img[r][c-1]),
(int)get_pixel_intensity(img[r][c]),
(int)get_pixel_intensity(img[r][c+1]),
(int)get_pixel_intensity(img[r][c+2]));
simd4i right((int)get_pixel_intensity(img[r][c+1]),
(int)get_pixel_intensity(img[r][c+2]),
(int)get_pixel_intensity(img[r][c+3]),
(int)get_pixel_intensity(img[r][c+4]));
simd4i top((int)get_pixel_intensity(img[r-1][c]),
(int)get_pixel_intensity(img[r-1][c+1]),
(int)get_pixel_intensity(img[r-1][c+2]),
(int)get_pixel_intensity(img[r-1][c+3]));
simd4i bottom((int)get_pixel_intensity(img[r+1][c]),
(int)get_pixel_intensity(img[r+1][c+1]),
(int)get_pixel_intensity(img[r+1][c+2]),
(int)get_pixel_intensity(img[r+1][c+3]));
grad_x = right-left;
grad_y = bottom-top;
len = (grad_x*grad_x + grad_y*grad_y);
}
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
template <typename T, typename mm1, typename mm2> template <typename T, typename mm1, typename mm2>
void set_hog ( inline void set_hog (
dlib::array<array2d<T,mm1>,mm2>& hog, dlib::array<array2d<T,mm1>,mm2>& hog,
int o, int o,
int x, int x,
...@@ -91,21 +210,29 @@ namespace dlib ...@@ -91,21 +210,29 @@ namespace dlib
void init_hog ( void init_hog (
dlib::array<array2d<T,mm1>,mm2>& hog, dlib::array<array2d<T,mm1>,mm2>& hog,
int hog_nr, int hog_nr,
int hog_nc int hog_nc,
int filter_rows_padding,
int filter_cols_padding
) )
{ {
const int num_hog_bands = 27+4; const int num_hog_bands = 27+4;
hog.resize(num_hog_bands); hog.resize(num_hog_bands);
for (int i = 0; i < num_hog_bands; ++i) for (int i = 0; i < num_hog_bands; ++i)
{ {
hog[i].set_size(hog_nr, hog_nc); hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
rectangle rect = get_rect(hog[i]);
rect.top() += (filter_rows_padding-1)/2;
rect.left() += (filter_cols_padding-1)/2;
rect.right() -= filter_cols_padding/2;
rect.bottom() -= filter_rows_padding/2;
zero_border_pixels(hog[i],rect);
} }
} }
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
template <typename T, typename mm> template <typename T, typename mm>
void set_hog ( inline void set_hog (
array2d<matrix<T,31,1>,mm>& hog, array2d<matrix<T,31,1>,mm>& hog,
int o, int o,
int x, int x,
...@@ -120,10 +247,25 @@ namespace dlib ...@@ -120,10 +247,25 @@ namespace dlib
void init_hog ( void init_hog (
array2d<matrix<T,31,1>,mm>& hog, array2d<matrix<T,31,1>,mm>& hog,
int hog_nr, int hog_nr,
int hog_nc int hog_nc,
int filter_rows_padding,
int filter_cols_padding
) )
{ {
hog.set_size(hog_nr, hog_nc); hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
// now zero out the border region
rectangle rect = get_rect(hog);
rect.top() += (filter_rows_padding-1)/2;
rect.left() += (filter_cols_padding-1)/2;
rect.right() -= filter_cols_padding/2;
rect.bottom() -= filter_rows_padding/2;
border_enumerator be(get_rect(hog),rect);
while (be.move_next())
{
const point p = be.element();
set_all_elements(hog[p.y()][p.x()], 0);
}
} }
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
...@@ -135,9 +277,22 @@ namespace dlib ...@@ -135,9 +277,22 @@ namespace dlib
void impl_extract_fhog_features( void impl_extract_fhog_features(
const image_type& img, const image_type& img,
out_type& hog, out_type& hog,
int cell_size int cell_size,
int filter_rows_padding,
int filter_cols_padding
) )
{ {
// make sure requires clause is not broken
DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t void extract_fhog_features()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
/* /*
This function implements the HOG feature extraction method described in This function implements the HOG feature extraction method described in
the paper: the paper:
...@@ -199,7 +354,11 @@ namespace dlib ...@@ -199,7 +354,11 @@ namespace dlib
return; return;
} }
array2d<matrix<float,18,1> > hist(cells_nr, cells_nc); // We give hist extra padding around the edges (1 cell all the way around the
// edge) so we can avoid needing to do boundary checks when indexing into it
// later on. So some statements assign to the boundary but those values are
// never used.
array2d<matrix<float,18,1> > hist(cells_nr+2, cells_nc+2);
for (long r = 0; r < hist.nr(); ++r) for (long r = 0; r < hist.nr(); ++r)
{ {
for (long c = 0; c < hist.nc(); ++c) for (long c = 0; c < hist.nc(); ++c)
...@@ -219,23 +378,98 @@ namespace dlib ...@@ -219,23 +378,98 @@ namespace dlib
hog.clear(); hog.clear();
return; return;
} }
init_hog(hog, hog_nr, hog_nc); const int padding_rows_offset = (filter_rows_padding-1)/2;
const int padding_cols_offset = (filter_cols_padding-1)/2;
init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding);
const int visible_nr = cells_nr*cell_size; const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1;
const int visible_nc = cells_nc*cell_size; const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1;
// First populate the gradient histograms // First populate the gradient histograms
for (int y = 1; y < visible_nr-1; y++) for (int y = 1; y < visible_nr; y++)
{
const double yp = ((double)y+0.5)/(double)cell_size - 0.5;
const int iyp = (int)std::floor(yp);
const double vy0 = yp-iyp;
const double vy1 = 1.0-vy0;
int x;
for (x = 1; x < visible_nc-3; x+=4)
{
simd4f xx(x,x+1,x+2,x+3);
// v will be the length of the gradient vectors.
simd4f grad_x, grad_y, v;
get_gradient(y,x,img,grad_x,grad_y,v);
// We will use bilinear interpolation to add into the histogram bins.
// So first we precompute the values needed to determine how much each
// pixel votes into each bin.
simd4f xp = (xx+0.5)/(float)cell_size - 0.5;
simd4f ixp = floor(xp);
simd4f vx0 = xp-ixp;
simd4f vx1 = 1.0f-vx0;
v = sqrt(v);
// Now snap the gradient to one of 18 orientations
simd4f best_dot = 0;
simd4f best_o = 0;
for (int o = 0; o < 9; o++)
{ {
const int r = std::min<int>(y, img.nr()-2); simd4f dot = grad_x*directions[o](0) + grad_y*directions[o](1);
for (int x = 1; x < visible_nc-1; x++) simd4f_bool cmp = dot>best_dot;
best_dot = select(cmp,dot,best_dot);
dot *= -1;
best_o = select(cmp,o,best_o);
cmp = dot>best_dot;
best_dot = select(cmp,dot,best_dot);
best_o = select(cmp,o+9,best_o);
}
// Add the gradient magnitude, v, to 4 histograms around pixel using
// bilinear interpolation.
vx1 *= v;
vx0 *= v;
// The amounts for each bin
simd4f v11 = vy1*vx1;
simd4f v01 = vy0*vx1;
simd4f v10 = vy1*vx0;
simd4f v00 = vy0*vx0;
float _best_o[4]; best_o.store(_best_o);
float _ixp[4]; ixp.store(_ixp);
float _v11[4]; v11.store(_v11);
float _v01[4]; v01.store(_v01);
float _v10[4]; v10.store(_v10);
float _v00[4]; v00.store(_v00);
hist[iyp+1] [_ixp[0]+1](_best_o[0]) += _v11[0];
hist[iyp+1+1][_ixp[0]+1](_best_o[0]) += _v01[0];
hist[iyp+1] [_ixp[0]+2](_best_o[0]) += _v10[0];
hist[iyp+1+1][_ixp[0]+2](_best_o[0]) += _v00[0];
hist[iyp+1] [_ixp[1]+1](_best_o[1]) += _v11[1];
hist[iyp+1+1][_ixp[1]+1](_best_o[1]) += _v01[1];
hist[iyp+1] [_ixp[1]+2](_best_o[1]) += _v10[1];
hist[iyp+1+1][_ixp[1]+2](_best_o[1]) += _v00[1];
hist[iyp+1] [_ixp[2]+1](_best_o[2]) += _v11[2];
hist[iyp+1+1][_ixp[2]+1](_best_o[2]) += _v01[2];
hist[iyp+1] [_ixp[2]+2](_best_o[2]) += _v10[2];
hist[iyp+1+1][_ixp[2]+2](_best_o[2]) += _v00[2];
hist[iyp+1] [_ixp[3]+1](_best_o[3]) += _v11[3];
hist[iyp+1+1][_ixp[3]+1](_best_o[3]) += _v01[3];
hist[iyp+1] [_ixp[3]+2](_best_o[3]) += _v10[3];
hist[iyp+1+1][_ixp[3]+2](_best_o[3]) += _v00[3];
}
// Now process the right columns that don't fit into simd registers.
for (; x < visible_nc; x++)
{ {
const int c = std::min<int>(x, img.nc()-2);
matrix<double,2,1> grad; matrix<double,2,1> grad;
double v; double v;
get_gradient(r,c,img,grad,v); get_gradient(y,x,img,grad,v);
v = std::sqrt(v);
// snap to one of 18 orientations // snap to one of 18 orientations
double best_dot = 0; double best_dot = 0;
...@@ -255,27 +489,17 @@ namespace dlib ...@@ -255,27 +489,17 @@ namespace dlib
} }
} }
v = std::sqrt(v);
// add to 4 histograms around pixel using bilinear interpolation // add to 4 histograms around pixel using bilinear interpolation
double xp = ((double)x+0.5)/(double)cell_size - 0.5; const double xp = ((double)x+0.5)/(double)cell_size - 0.5;
double yp = ((double)y+0.5)/(double)cell_size - 0.5; const int ixp = (int)std::floor(xp);
int ixp = (int)std::floor(xp); const double vx0 = xp-ixp;
int iyp = (int)std::floor(yp); const double vx1 = 1.0-vx0;
double vx0 = xp-ixp;
double vy0 = yp-iyp;
double vx1 = 1.0-vx0;
double vy1 = 1.0-vy0;
if (ixp >= 0 && iyp >= 0)
hist[iyp][ixp](best_o) += vy1*vx1*v;
if (iyp+1 < cells_nr && ixp >= 0) hist[iyp+1][ixp+1](best_o) += vy1*vx1*v;
hist[iyp+1][ixp](best_o) += vy0*vx1*v; hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v;
hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v;
if (iyp >= 0 && ixp+1 < cells_nc) hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v;
hist[iyp][ixp+1](best_o) += vy1*vx0*v;
if (ixp+1 < cells_nc && iyp+1 < cells_nr)
hist[iyp+1][ixp+1](best_o) += vy0*vx0*v;
} }
} }
...@@ -286,7 +510,7 @@ namespace dlib ...@@ -286,7 +510,7 @@ namespace dlib
{ {
for (int o = 0; o < 9; o++) for (int o = 0; o < 9; o++)
{ {
norm[r][c] += (hist[r][c](o) + hist[r][c](o+9)) * (hist[r][c](o) + hist[r][c](o+9)); norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) * (hist[r+1][c+1](o) + hist[r+1][c+1](o+9));
} }
} }
} }
...@@ -295,6 +519,7 @@ namespace dlib ...@@ -295,6 +519,7 @@ namespace dlib
// compute features // compute features
for (int y = 0; y < hog_nr; y++) for (int y = 0; y < hog_nr; y++)
{ {
const int yy = y+padding_rows_offset;
for (int x = 0; x < hog_nc; x++) for (int x = 0; x < hog_nc; x++)
{ {
const double nn1 = 0.2*std::sqrt(norm[y+1][x+1] + norm[y+1][x+2] + norm[y+2][x+1] + norm[y+2][x+2] + eps); const double nn1 = 0.2*std::sqrt(norm[y+1][x+1] + norm[y+1][x+2] + norm[y+2][x+1] + norm[y+2][x+2] + eps);
...@@ -311,14 +536,16 @@ namespace dlib ...@@ -311,14 +536,16 @@ namespace dlib
double t3 = 0; double t3 = 0;
double t4 = 0; double t4 = 0;
const int xx = x+padding_cols_offset;
// contrast-sensitive features // contrast-sensitive features
for (int o = 0; o < 18; o++) for (int o = 0; o < 18; o++)
{ {
double h1 = std::min<double>(hist[y+1][x+1](o) , nn1)*n1; double h1 = std::min<double>(hist[y+1+1][x+1+1](o) , nn1)*n1;
double h2 = std::min<double>(hist[y+1][x+1](o) , nn2)*n2; double h2 = std::min<double>(hist[y+1+1][x+1+1](o) , nn2)*n2;
double h3 = std::min<double>(hist[y+1][x+1](o) , nn3)*n3; double h3 = std::min<double>(hist[y+1+1][x+1+1](o) , nn3)*n3;
double h4 = std::min<double>(hist[y+1][x+1](o) , nn4)*n4; double h4 = std::min<double>(hist[y+1+1][x+1+1](o) , nn4)*n4;
set_hog(hog,o,x,y, (h1 + h2 + h3 + h4)); set_hog(hog,o,xx,yy, (h1 + h2 + h3 + h4));
t1 += h1; t1 += h1;
t2 += h2; t2 += h2;
t3 += h3; t3 += h3;
...@@ -328,19 +555,19 @@ namespace dlib ...@@ -328,19 +555,19 @@ namespace dlib
// contrast-insensitive features // contrast-insensitive features
for (int o = 0; o < 9; o++) for (int o = 0; o < 9; o++)
{ {
double sum = hist[y+1][x+1](o) + hist[y+1][x+1](o+9); double sum = hist[y+1+1][x+1+1](o) + hist[y+1+1][x+1+1](o+9);
double h1 = std::min(sum , nn1)*n1; double h1 = std::min(sum , nn1)*n1;
double h2 = std::min(sum , nn2)*n2; double h2 = std::min(sum , nn2)*n2;
double h3 = std::min(sum , nn3)*n3; double h3 = std::min(sum , nn3)*n3;
double h4 = std::min(sum , nn4)*n4; double h4 = std::min(sum , nn4)*n4;
set_hog(hog,o+18,x,y, (h1 + h2 + h3 + h4)); set_hog(hog,o+18,xx,yy, (h1 + h2 + h3 + h4));
} }
// texture features // texture features
set_hog(hog,27,x,y, 2*0.2357 * t1); set_hog(hog,27,xx,yy, 2*0.2357 * t1);
set_hog(hog,28,x,y, 2*0.2357 * t2); set_hog(hog,28,xx,yy, 2*0.2357 * t2);
set_hog(hog,29,x,y, 2*0.2357 * t3); set_hog(hog,29,xx,yy, 2*0.2357 * t3);
set_hog(hog,30,x,y, 2*0.2357 * t4); set_hog(hog,30,xx,yy, 2*0.2357 * t4);
} }
} }
} }
...@@ -383,10 +610,12 @@ namespace dlib ...@@ -383,10 +610,12 @@ namespace dlib
void extract_fhog_features( void extract_fhog_features(
const image_type& img, const image_type& img,
dlib::array<array2d<T,mm1>,mm2>& hog, dlib::array<array2d<T,mm1>,mm2>& hog,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
) )
{ {
return impl_fhog::impl_extract_fhog_features(img, hog, cell_size); return impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
} }
template < template <
...@@ -397,10 +626,12 @@ namespace dlib ...@@ -397,10 +626,12 @@ namespace dlib
void extract_fhog_features( void extract_fhog_features(
const image_type& img, const image_type& img,
array2d<matrix<T,31,1>,mm>& hog, array2d<matrix<T,31,1>,mm>& hog,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
) )
{ {
return impl_fhog::impl_extract_fhog_features(img, hog, cell_size); return impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -408,52 +639,105 @@ namespace dlib ...@@ -408,52 +639,105 @@ namespace dlib
inline point image_to_fhog ( inline point image_to_fhog (
point p, point p,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
) )
{ {
// There is a one pixel border around the imag. // make sure requires clause is not broken
DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t point image_to_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
// There is a one pixel border around the image.
p -= point(1,1); p -= point(1,1);
// There is also a 1 "cell" border around the HOG image formation. // There is also a 1 "cell" border around the HOG image formation.
return p/cell_size - point(1,1); return p/cell_size - point(1,1) + point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2);
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
inline rectangle image_to_fhog ( inline rectangle image_to_fhog (
const rectangle& rect, const rectangle& rect,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
) )
{ {
return rectangle(image_to_fhog(rect.tl_corner(),cell_size), // make sure requires clause is not broken
image_to_fhog(rect.br_corner(),cell_size)); DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t rectangle image_to_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
return rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
inline point fhog_to_image ( inline point fhog_to_image (
point p, point p,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
) )
{ {
// make sure requires clause is not broken
DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t point fhog_to_image()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
// Convert to image space and then set to the center of the cell. // Convert to image space and then set to the center of the cell.
point offset; point offset;
p = (p+point(1,1)-point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2))*cell_size + point(1,1);
if (p.x() >= 0 && p.y() >= 0) offset = point(cell_size/2,cell_size/2); if (p.x() >= 0 && p.y() >= 0) offset = point(cell_size/2,cell_size/2);
if (p.x() < 0 && p.y() >= 0) offset = point(-cell_size/2,cell_size/2); if (p.x() < 0 && p.y() >= 0) offset = point(-cell_size/2,cell_size/2);
if (p.x() >= 0 && p.y() < 0) offset = point(cell_size/2,-cell_size/2); if (p.x() >= 0 && p.y() < 0) offset = point(cell_size/2,-cell_size/2);
if (p.x() < 0 && p.y() < 0) offset = point(-cell_size/2,-cell_size/2); if (p.x() < 0 && p.y() < 0) offset = point(-cell_size/2,-cell_size/2);
return (p+point(1,1))*cell_size + point(1,1) + offset; return p + offset;
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
inline rectangle fhog_to_image ( inline rectangle fhog_to_image (
const rectangle& rect, const rectangle& rect,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
) )
{ {
return rectangle(fhog_to_image(rect.tl_corner(),cell_size), // make sure requires clause is not broken
fhog_to_image(rect.br_corner(),cell_size)); DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t rectangle fhog_to_image()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
return rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -466,25 +750,35 @@ namespace dlib ...@@ -466,25 +750,35 @@ namespace dlib
> >
matrix<unsigned char> draw_fhog( matrix<unsigned char> draw_fhog(
const dlib::array<array2d<T,mm1>,mm2>& hog, const dlib::array<array2d<T,mm1>,mm2>& hog,
const long w = 15 const long cell_draw_size = 15
) )
{ {
// make sure requires clause is not broken
DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31,
"\t matrix<unsigned char> draw_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_draw_size: " << cell_draw_size
<< "\n\t hog.size(): " << hog.size()
);
dlib::array<matrix<float> > mbars; dlib::array<matrix<float> > mbars;
impl_fhog::create_fhog_bar_images(mbars,w); impl_fhog::create_fhog_bar_images(mbars,cell_draw_size);
// now draw the bars onto the HOG cells // now draw the bars onto the HOG cells
matrix<float> himg(hog[0].nr()*w, hog[0].nc()*w); matrix<float> himg(hog[0].nr()*cell_draw_size, hog[0].nc()*cell_draw_size);
himg = 0; himg = 0;
for (unsigned long d = 0; d < mbars.size(); ++d) for (unsigned long d = 0; d < mbars.size(); ++d)
{ {
for (long r = 0; r < himg.nr(); r+=w) for (long r = 0; r < himg.nr(); r+=cell_draw_size)
{ {
for (long c = 0; c < himg.nc(); c+=w) for (long c = 0; c < himg.nc(); c+=cell_draw_size)
{ {
const float val = hog[d][r/w][c/w] + hog[d+mbars.size()][r/w][c/w] + hog[d+mbars.size()*2][r/w][c/w]; const float val = hog[d][r/cell_draw_size][c/cell_draw_size] +
hog[d+mbars.size()][r/cell_draw_size][c/cell_draw_size] +
hog[d+mbars.size()*2][r/cell_draw_size][c/cell_draw_size];
if (val > 0) if (val > 0)
{ {
set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()]; set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()];
} }
} }
} }
...@@ -501,9 +795,17 @@ namespace dlib ...@@ -501,9 +795,17 @@ namespace dlib
> >
matrix<unsigned char> draw_fhog ( matrix<unsigned char> draw_fhog (
const std::vector<matrix<T> >& hog, const std::vector<matrix<T> >& hog,
const long w = 15 const long cell_draw_size = 15
) )
{ {
// make sure requires clause is not broken
DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31,
"\t matrix<unsigned char> draw_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_draw_size: " << cell_draw_size
<< "\n\t hog.size(): " << hog.size()
);
// Just convert the input into the right object and then call the above draw_fhog() // Just convert the input into the right object and then call the above draw_fhog()
// function on it. // function on it.
dlib::array<array2d<T> > temp(hog.size()); dlib::array<array2d<T> > temp(hog.size());
...@@ -518,7 +820,7 @@ namespace dlib ...@@ -518,7 +820,7 @@ namespace dlib
} }
} }
} }
return draw_fhog(temp,w); return draw_fhog(temp,cell_draw_size);
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -529,25 +831,34 @@ namespace dlib ...@@ -529,25 +831,34 @@ namespace dlib
> >
matrix<unsigned char> draw_fhog( matrix<unsigned char> draw_fhog(
const array2d<matrix<T,31,1>,mm>& hog, const array2d<matrix<T,31,1>,mm>& hog,
const long w = 15 const long cell_draw_size = 15
) )
{ {
// make sure requires clause is not broken
DLIB_ASSERT( cell_draw_size > 0,
"\t matrix<unsigned char> draw_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_draw_size: " << cell_draw_size
);
dlib::array<matrix<float> > mbars; dlib::array<matrix<float> > mbars;
impl_fhog::create_fhog_bar_images(mbars,w); impl_fhog::create_fhog_bar_images(mbars,cell_draw_size);
// now draw the bars onto the HOG cells // now draw the bars onto the HOG cells
matrix<float> himg(hog.nr()*w, hog.nc()*w); matrix<float> himg(hog.nr()*cell_draw_size, hog.nc()*cell_draw_size);
himg = 0; himg = 0;
for (unsigned long d = 0; d < mbars.size(); ++d) for (unsigned long d = 0; d < mbars.size(); ++d)
{ {
for (long r = 0; r < himg.nr(); r+=w) for (long r = 0; r < himg.nr(); r+=cell_draw_size)
{ {
for (long c = 0; c < himg.nc(); c+=w) for (long c = 0; c < himg.nc(); c+=cell_draw_size)
{ {
const float val = hog[r/w][c/w](d) + hog[r/w][c/w](d+mbars.size()) + hog[r/w][c/w](d+mbars.size()*2); const float val = hog[r/cell_draw_size][c/cell_draw_size](d) +
hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()) +
hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()*2);
if (val > 0) if (val > 0)
{ {
set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()]; set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()];
} }
} }
} }
......
...@@ -20,11 +20,15 @@ namespace dlib ...@@ -20,11 +20,15 @@ namespace dlib
void extract_fhog_features( void extract_fhog_features(
const image_type& img, const image_type& img,
array2d<matrix<T,31,1>,mm>& hog, array2d<matrix<T,31,1>,mm>& hog,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
); );
/*! /*!
requires requires
- cell_size > 0 - cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type. - img contains some kind of pixel type.
(i.e. pixel_traits<typename in_image_type::type> is defined) (i.e. pixel_traits<typename in_image_type::type> is defined)
...@@ -40,11 +44,29 @@ namespace dlib ...@@ -40,11 +44,29 @@ namespace dlib
- The input image is broken into cells that are cell_size by cell_size pixels - The input image is broken into cells that are cell_size by cell_size pixels
and within each cell we compute a 31 dimensional FHOG vector. This vector and within each cell we compute a 31 dimensional FHOG vector. This vector
describes the gradient structure within the cell. describes the gradient structure within the cell.
- #hog.nr() is approximately equal to img.nr()/cell_size. - A common task is to convolve each channel of the hog image with a linear
- #hog.nc() is approximately equal to img.nc()/cell_size. filter. This is made more convenient if the contents of #hog includes extra
rows and columns of zero padding along the borders. This extra padding
allows for more efficient convolution code since the code does not need to
perform expensive boundary checking. Therefore, you can set
filter_rows_padding and filter_cols_padding to indicate the size of the
filter you wish to use and this function will ensure #hog has the appropriate
extra zero padding along the borders. In particular, it will include the
following extra padding:
- (filter_rows_padding-1)/2 extra rows of zeros on the top of #hog.
- (filter_cols_padding-1)/2 extra columns of zeros on the left of #hog.
- filter_rows_padding/2 extra rows of zeros on the bottom of #hog.
- filter_cols_padding/2 extra columns of zeros on the right of #hog.
Therefore, the extra padding is done such that functions like
spatially_filter_image() apply their filters to the entire content containing
area of a hog image (note that you should use the following planar version of
extract_fhog_features() instead of the interlaced version if you want to use
spatially_filter_image() on a hog image).
- #hog.nr() is approximately equal to img.nr()/cell_size + filter_rows_padding-1.
- #hog.nc() is approximately equal to img.nc()/cell_size + filter_cols_padding-1.
- for all valid r and c: - for all valid r and c:
- #hog[r][c] == the FHOG vector describing the cell centered at the pixel - #hog[r][c] == the FHOG vector describing the cell centered at the pixel location
location fhog_to_image(point(c,r),cell_size) in img. fhog_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding) in img.
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -58,11 +80,15 @@ namespace dlib ...@@ -58,11 +80,15 @@ namespace dlib
void extract_fhog_features( void extract_fhog_features(
const image_type& img, const image_type& img,
dlib::array<array2d<T,mm1>,mm2>& hog, dlib::array<array2d<T,mm1>,mm2>& hog,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
); );
/*! /*!
requires requires
- cell_size > 0 - cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type. - img contains some kind of pixel type.
(i.e. pixel_traits<typename in_image_type::type> is defined) (i.e. pixel_traits<typename in_image_type::type> is defined)
...@@ -83,11 +109,15 @@ namespace dlib ...@@ -83,11 +109,15 @@ namespace dlib
inline point image_to_fhog ( inline point image_to_fhog (
point p, point p,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
); );
/*! /*!
requires requires
- cell_size > 0 - cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures ensures
- When using extract_fhog_features(), each FHOG cell is extracted from a - When using extract_fhog_features(), each FHOG cell is extracted from a
certain region in the input image. image_to_fhog() returns the identity of certain region in the input image. image_to_fhog() returns the identity of
...@@ -98,56 +128,75 @@ namespace dlib ...@@ -98,56 +128,75 @@ namespace dlib
might not have corresponding feature locations. E.g. border points or points might not have corresponding feature locations. E.g. border points or points
outside the image. In these cases the returned point will be outside the outside the image. In these cases the returned point will be outside the
input image. input image.
- Note that you should use the same values of cell_size, filter_rows_padding,
and filter_cols_padding that you used with extract_fhog_features().
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
inline rectangle image_to_fhog ( inline rectangle image_to_fhog (
const rectangle& rect, const rectangle& rect,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
); );
/*! /*!
requires requires
- cell_size > 0 - cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures ensures
- maps a rectangle from image space to fhog space. In particular this function returns: - maps a rectangle from image space to fhog space. In particular this function returns:
rectangle(image_to_fhog(rect.tl_corner(),cell_size), image_to_fhog(rect.br_corner(),cell_size)) rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
inline point fhog_to_image ( inline point fhog_to_image (
point p, point p,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
); );
/*! /*!
requires requires
- cell_size > 0 - cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures ensures
- Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the - Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the
corresponding original input pixel. Note that since FHOG images are corresponding original input pixel. Note that since FHOG images are
spatially downsampled by aggregation into cells the mapping is not totally spatially downsampled by aggregation into cells the mapping is not totally
invertible. Therefore, the returned location will be the center of the cell invertible. Therefore, the returned location will be the center of the cell
in the original image that contained the FHOG vector at position p. Moreover, in the original image that contained the FHOG vector at position p. Moreover,
cell_size should be set to the value used by the call to extract_fhog_features(). cell_size, filter_rows_padding, and filter_cols_padding should be set to the
values used by the call to extract_fhog_features().
- Mapping from fhog space to image space is an invertible transformation. That - Mapping from fhog space to image space is an invertible transformation. That
is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size),cell_size). is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size,filter_rows_padding,filter_cols_padding),
cell_size,filter_rows_padding,filter_cols_padding).
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
inline rectangle fhog_to_image ( inline rectangle fhog_to_image (
const rectangle& rect, const rectangle& rect,
int cell_size = 8 int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
); );
/*! /*!
requires requires
- cell_size > 0 - cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures ensures
- maps a rectangle from fhog space to image space. In particular this function returns: - maps a rectangle from fhog space to image space. In particular this function returns:
rectangle(fhog_to_image(rect.tl_corner(),cell_size), fhog_to_image(rect.br_corner(),cell_size)) rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
- Mapping from fhog space to image space is an invertible transformation. That - Mapping from fhog space to image space is an invertible transformation. That
is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size),cell_size). is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size,filter_rows_padding,filter_cols_padding),
cell_size,filter_rows_padding,filter_cols_padding).
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -187,6 +236,7 @@ namespace dlib ...@@ -187,6 +236,7 @@ namespace dlib
/*! /*!
requires requires
- cell_draw_size > 0 - cell_draw_size > 0
- hog.size() == 31
ensures ensures
- This function just converts the given hog object into an array<array2d<T>> - This function just converts the given hog object into an array<array2d<T>>
and passes it to the above draw_fhog() routine and returns the results. and passes it to the above draw_fhog() routine and returns the results.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment