Commit ae08cdc5 authored by Davis King's avatar Davis King

Made spatially_filter_image() use SIMD instructions when filtering float data.

parent 306c9c5b
......@@ -18,97 +18,266 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template <
typename in_image_type,
typename out_image_type,
typename EXP,
typename T
>
typename enable_if_c<pixel_traits<typename out_image_type::type>::grayscale,rectangle>::type
spatially_filter_image (
const in_image_type& in_img,
out_image_type& out_img,
const matrix_exp<EXP>& filter,
T scale,
bool use_abs = false,
bool add_to = false
)
namespace impl
{
COMPILE_TIME_ASSERT( pixel_traits<typename in_image_type::type>::has_alpha == false );
COMPILE_TIME_ASSERT( pixel_traits<typename out_image_type::type>::has_alpha == false );
template <
typename in_image_type,
typename out_image_type,
typename EXP,
typename T
>
rectangle grayscale_spatially_filter_image (
const in_image_type& in_img,
out_image_type& out_img,
const matrix_exp<EXP>& _filter,
T scale,
bool use_abs,
bool add_to
)
{
const_temp_matrix<EXP> filter(_filter);
COMPILE_TIME_ASSERT( pixel_traits<typename in_image_type::type>::has_alpha == false );
COMPILE_TIME_ASSERT( pixel_traits<typename out_image_type::type>::has_alpha == false );
DLIB_ASSERT(scale != 0 && filter.size() != 0,
"\tvoid spatially_filter_image()"
<< "\n\t You can't give a scale of zero or an empty filter."
<< "\n\t scale: "<< scale
<< "\n\t filter.nr(): "<< filter.nr()
<< "\n\t filter.nc(): "<< filter.nc()
DLIB_ASSERT(scale != 0 && filter.size() != 0,
"\trectangle spatially_filter_image()"
<< "\n\t You can't give a scale of zero or an empty filter."
<< "\n\t scale: "<< scale
<< "\n\t filter.nr(): "<< filter.nr()
<< "\n\t filter.nc(): "<< filter.nc()
);
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\tvoid spatially_filter_image()"
<< "\n\tYou must give two different image objects"
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\trectangle spatially_filter_image()"
<< "\n\tYou must give two different image objects"
);
// if there isn't any input image then don't do anything
if (in_img.size() == 0)
{
out_img.clear();
return rectangle();
}
// if there isn't any input image then don't do anything
if (in_img.size() == 0)
{
out_img.clear();
return rectangle();
}
out_img.set_size(in_img.nr(),in_img.nc());
out_img.set_size(in_img.nr(),in_img.nc());
// figure out the range that we should apply the filter to
const long first_row = (filter.nr()-1)/2;
const long first_col = (filter.nc()-1)/2;
const long last_row = in_img.nr() - (filter.nr()/2);
const long last_col = in_img.nc() - (filter.nc()/2);
// figure out the range that we should apply the filter to
const long first_row = (filter.nr()-1)/2;
const long first_col = (filter.nc()-1)/2;
const long last_row = in_img.nr() - (filter.nr()/2);
const long last_col = in_img.nc() - (filter.nc()/2);
const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
if (!add_to)
zero_border_pixels(out_img, non_border);
const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
if (!add_to)
zero_border_pixels(out_img, non_border);
// apply the filter to the image
for (long r = first_row; r < last_row; ++r)
{
for (long c = first_col; c < last_col; ++c)
// apply the filter to the image
for (long r = first_row; r < last_row; ++r)
{
typedef typename EXP::type ptype;
ptype p;
ptype temp = 0;
for (long m = 0; m < filter.nr(); ++m)
for (long c = first_col; c < last_col; ++c)
{
for (long n = 0; n < filter.nc(); ++n)
typedef typename EXP::type ptype;
ptype p;
ptype temp = 0;
for (long m = 0; m < filter.nr(); ++m)
{
// pull out the current pixel and put it into p
p = get_pixel_intensity(in_img[r-first_row+m][c-first_col+n]);
temp += p*filter(m,n);
for (long n = 0; n < filter.nc(); ++n)
{
// pull out the current pixel and put it into p
p = get_pixel_intensity(in_img[r-first_row+m][c-first_col+n]);
temp += p*filter(m,n);
}
}
}
temp /= scale;
temp /= scale;
if (use_abs && temp < 0)
{
temp = -temp;
if (use_abs && temp < 0)
{
temp = -temp;
}
// save this pixel to the output image
if (add_to == false)
{
assign_pixel(out_img[r][c], temp);
}
else
{
assign_pixel(out_img[r][c], temp + out_img[r][c]);
}
}
}
// save this pixel to the output image
if (add_to == false)
return non_border;
}
// ------------------------------------------------------------------------------------
template <
typename in_image_type,
typename out_image_type,
typename EXP
>
rectangle float_spatially_filter_image (
const in_image_type& in_img,
out_image_type& out_img,
const matrix_exp<EXP>& _filter,
bool add_to
)
{
const_temp_matrix<EXP> filter(_filter);
DLIB_ASSERT(filter.size() != 0,
"\trectangle spatially_filter_image()"
<< "\n\t You can't give an empty filter."
<< "\n\t filter.nr(): "<< filter.nr()
<< "\n\t filter.nc(): "<< filter.nc()
);
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\trectangle spatially_filter_image()"
<< "\n\tYou must give two different image objects"
);
// if there isn't any input image then don't do anything
if (in_img.size() == 0)
{
out_img.clear();
return rectangle();
}
out_img.set_size(in_img.nr(),in_img.nc());
// figure out the range that we should apply the filter to
const long first_row = (filter.nr()-1)/2;
const long first_col = (filter.nc()-1)/2;
const long last_row = in_img.nr() - (filter.nr()/2);
const long last_col = in_img.nc() - (filter.nc()/2);
const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
if (!add_to)
zero_border_pixels(out_img, non_border);
// apply the filter to the image
for (long r = first_row; r < last_row; ++r)
{
long c = first_col;
for (; c < last_col-3; c+=4)
{
assign_pixel(out_img[r][c], temp);
simd4f p,p2,p3;
simd4f temp = 0, temp2=0, temp3=0;
for (long m = 0; m < filter.nr(); ++m)
{
long n = 0;
for (; n < filter.nc()-2; n+=3)
{
// pull out the current pixel and put it into p
p.load(&in_img[r-first_row+m][c-first_col+n]);
p2.load(&in_img[r-first_row+m][c-first_col+n+1]);
p3.load(&in_img[r-first_row+m][c-first_col+n+2]);
temp += p*filter(m,n);
temp2 += p2*filter(m,n+1);
temp3 += p3*filter(m,n+2);
}
for (; n < filter.nc(); ++n)
{
// pull out the current pixel and put it into p
p.load(&in_img[r-first_row+m][c-first_col+n]);
temp += p*filter(m,n);
}
}
temp += temp2+temp3;
// save this pixel to the output image
if (add_to == false)
{
temp.store(&out_img[r][c]);
}
else
{
p.load(&out_img[r][c]);
temp += p;
temp.store(&out_img[r][c]);
}
}
else
for (; c < last_col; ++c)
{
assign_pixel(out_img[r][c], temp + out_img[r][c]);
float p;
float temp = 0;
for (long m = 0; m < filter.nr(); ++m)
{
for (long n = 0; n < filter.nc(); ++n)
{
// pull out the current pixel and put it into p
p = in_img[r-first_row+m][c-first_col+n];
temp += p*filter(m,n);
}
}
// save this pixel to the output image
if (add_to == false)
{
out_img[r][c] = temp;
}
else
{
out_img[r][c] += temp;
}
}
}
return non_border;
}
}
return non_border;
// ----------------------------------------------------------------------------------------
template <
typename in_image_type,
typename out_image_type,
typename EXP
>
struct is_float_filtering2
{
const static bool value = is_same_type<typename in_image_type::type,float>::value &&
is_same_type<typename out_image_type::type,float>::value &&
is_same_type<typename EXP::type,float>::value;
};
// ----------------------------------------------------------------------------------------
template <
typename in_image_type,
typename out_image_type,
typename EXP,
typename T
>
typename enable_if_c<pixel_traits<typename out_image_type::type>::grayscale &&
is_float_filtering2<in_image_type,out_image_type,EXP>::value,rectangle>::type
spatially_filter_image (
const in_image_type& in_img,
out_image_type& out_img,
const matrix_exp<EXP>& filter,
T scale,
bool use_abs = false,
bool add_to = false
)
{
if (use_abs == false)
{
if (scale == 1)
return impl::float_spatially_filter_image(in_img, out_img, filter, add_to);
else
return impl::float_spatially_filter_image(in_img, out_img, filter/scale, add_to);
}
else
{
return impl::grayscale_spatially_filter_image(in_img, out_img, filter, scale, true, add_to);
}
}
// ----------------------------------------------------------------------------------------
......@@ -119,26 +288,49 @@ namespace dlib
typename EXP,
typename T
>
typename disable_if_c<pixel_traits<typename out_image_type::type>::grayscale,rectangle>::type
typename enable_if_c<pixel_traits<typename out_image_type::type>::grayscale &&
!is_float_filtering2<in_image_type,out_image_type,EXP>::value,rectangle>::type
spatially_filter_image (
const in_image_type& in_img,
out_image_type& out_img,
const matrix_exp<EXP>& filter,
T scale,
bool use_abs = false,
bool add_to = false
)
{
return impl::grayscale_spatially_filter_image(in_img,out_img,filter,scale,use_abs,add_to);
}
// ----------------------------------------------------------------------------------------
template <
typename in_image_type,
typename out_image_type,
typename EXP,
typename T
>
typename disable_if_c<pixel_traits<typename out_image_type::type>::grayscale,rectangle>::type
spatially_filter_image (
const in_image_type& in_img,
out_image_type& out_img,
const matrix_exp<EXP>& _filter,
T scale
)
{
const_temp_matrix<EXP> filter(_filter);
COMPILE_TIME_ASSERT( pixel_traits<typename in_image_type::type>::has_alpha == false );
COMPILE_TIME_ASSERT( pixel_traits<typename out_image_type::type>::has_alpha == false );
DLIB_ASSERT(scale != 0 && filter.size() != 0,
"\tvoid spatially_filter_image()"
"\trectangle spatially_filter_image()"
<< "\n\t You can't give a scale of zero or an empty filter."
<< "\n\t scale: "<< scale
<< "\n\t filter.nr(): "<< filter.nr()
<< "\n\t filter.nc(): "<< filter.nc()
);
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\tvoid spatially_filter_image()"
"\trectangle spatially_filter_image()"
<< "\n\tYou must give two different image objects"
);
......@@ -193,6 +385,8 @@ namespace dlib
return non_border;
}
// ----------------------------------------------------------------------------------------
template <
typename in_image_type,
typename out_image_type,
......@@ -207,6 +401,8 @@ namespace dlib
return spatially_filter_image(in_img,out_img,filter,1);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
namespace impl
......@@ -236,7 +432,7 @@ namespace dlib
DLIB_ASSERT(scale != 0 && row_filter.size() != 0 && col_filter.size() != 0 &&
is_vector(row_filter) &&
is_vector(col_filter),
"\tvoid spatially_filter_image_separable()"
"\trectangle spatially_filter_image_separable()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t scale: "<< scale
<< "\n\t row_filter.size(): "<< row_filter.size()
......@@ -245,7 +441,7 @@ namespace dlib
<< "\n\t is_vector(col_filter): "<< is_vector(col_filter)
);
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\tvoid spatially_filter_image_separable()"
"\trectangle spatially_filter_image_separable()"
<< "\n\tYou must give two different image objects"
);
......@@ -349,7 +545,7 @@ namespace dlib
DLIB_ASSERT(row_filter.size() != 0 && col_filter.size() != 0 &&
is_vector(row_filter) &&
is_vector(col_filter),
"\tvoid spatially_filter_image_separable()"
"\trectangle spatially_filter_image_separable()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t row_filter.size(): "<< row_filter.size()
<< "\n\t col_filter.size(): "<< col_filter.size()
......@@ -357,7 +553,7 @@ namespace dlib
<< "\n\t is_vector(col_filter): "<< is_vector(col_filter)
);
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\tvoid spatially_filter_image_separable()"
"\trectangle spatially_filter_image_separable()"
<< "\n\tYou must give two different image objects"
);
......@@ -529,15 +725,13 @@ namespace dlib
spatially_filter_image_separable (
const in_image_type& in_img,
out_image_type& out_img,
const matrix_exp<EXP1>& _row_filter,
const matrix_exp<EXP2>& _col_filter,
const matrix_exp<EXP1>& row_filter,
const matrix_exp<EXP2>& col_filter,
T scale,
bool use_abs = false,
bool add_to = false
)
{
const_temp_matrix<EXP1> row_filter(_row_filter);
const_temp_matrix<EXP2> col_filter(_col_filter);
return impl::grayscale_spatially_filter_image_separable(in_img,out_img, row_filter, col_filter, scale, use_abs, add_to);
}
......@@ -567,7 +761,7 @@ namespace dlib
DLIB_ASSERT(scale != 0 && row_filter.size() != 0 && col_filter.size() != 0 &&
is_vector(row_filter) &&
is_vector(col_filter),
"\tvoid spatially_filter_image_separable()"
"\trectangle spatially_filter_image_separable()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t scale: "<< scale
<< "\n\t row_filter.size(): "<< row_filter.size()
......@@ -576,7 +770,7 @@ namespace dlib
<< "\n\t is_vector(col_filter): "<< is_vector(col_filter)
);
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\tvoid spatially_filter_image_separable()"
"\trectangle spatially_filter_image_separable()"
<< "\n\tYou must give two different image objects"
);
......@@ -699,7 +893,7 @@ namespace dlib
col_filter.size()%2 == 1 &&
is_vector(row_filter) &&
is_vector(col_filter),
"\tvoid spatially_filter_image_separable_down()"
"\trectangle spatially_filter_image_separable_down()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t downsample: "<< downsample
<< "\n\t scale: "<< scale
......@@ -709,7 +903,7 @@ namespace dlib
<< "\n\t is_vector(col_filter): "<< is_vector(col_filter)
);
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\tvoid spatially_filter_image_separable_down()"
"\trectangle spatially_filter_image_separable_down()"
<< "\n\tYou must give two different image objects"
);
......
......@@ -60,6 +60,9 @@ namespace dlib
- #out_img.nr() == in_img.nr()
- returns a rectangle which indicates what pixels in #out_img are considered
non-border pixels and therefore contain output from the filter.
- if (use_abs == false && all images and filers contain float types) then
- This function will use SIMD instructions and is particularly fast. So if
you can use this form of the function it can give a decent speed boost.
!*/
// ----------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment