Commit 26eb8143 authored by Davis King's avatar Davis King

Made spatially_filter_image_separable() a little bit faster.

parent ae08cdc5
...@@ -589,13 +589,25 @@ namespace dlib ...@@ -589,13 +589,25 @@ namespace dlib
long c = first_col; long c = first_col;
for (; c < last_col-3; c+=4) for (; c < last_col-3; c+=4)
{ {
simd4f p, temp = 0; simd4f p,p2,p3, temp = 0, temp2=0, temp3=0;
for (long n = 0; n < row_filter.size(); ++n) long n = 0;
for (; n < row_filter.size()-2; n+=3)
{
// pull out the current pixel and put it into p
p.load(&in_img[r][c-first_col+n]);
p2.load(&in_img[r][c-first_col+n+1]);
p3.load(&in_img[r][c-first_col+n+2]);
temp += p*row_filter(n);
temp2 += p2*row_filter(n+1);
temp3 += p3*row_filter(n+2);
}
for (; n < row_filter.size(); ++n)
{ {
// pull out the current pixel and put it into p // pull out the current pixel and put it into p
p.load(&in_img[r][c-first_col+n]); p.load(&in_img[r][c-first_col+n]);
temp += p*row_filter(n); temp += p*row_filter(n);
} }
temp += temp2 + temp3;
temp.store(&temp_img[r][c]); temp.store(&temp_img[r][c]);
} }
for (; c < last_col; ++c) for (; c < last_col; ++c)
...@@ -618,12 +630,23 @@ namespace dlib ...@@ -618,12 +630,23 @@ namespace dlib
long c = first_col; long c = first_col;
for (; c < last_col-3; c+=4) for (; c < last_col-3; c+=4)
{ {
simd4f p, temp = 0; simd4f p, p2, p3, temp = 0, temp2 = 0, temp3 = 0;
for (long m = 0; m < col_filter.size(); ++m) long m = 0;
for (; m < col_filter.size()-2; m+=3)
{
p.load(&temp_img[r-first_row+m][c]);
p2.load(&temp_img[r-first_row+m+1][c]);
p3.load(&temp_img[r-first_row+m+2][c]);
temp += p*col_filter(m);
temp2 += p2*col_filter(m+1);
temp3 += p3*col_filter(m+2);
}
for (; m < col_filter.size(); ++m)
{ {
p.load(&temp_img[r-first_row+m][c]); p.load(&temp_img[r-first_row+m][c]);
temp += p*col_filter(m); temp += p*col_filter(m);
} }
temp += temp2+temp3;
// save this pixel to the output image // save this pixel to the output image
if (add_to == false) if (add_to == false)
......
...@@ -791,7 +791,7 @@ namespace ...@@ -791,7 +791,7 @@ namespace
{ {
DLIB_TEST(imout[be.element().y()][be.element().x()] == 0) DLIB_TEST(imout[be.element().y()][be.element().x()] == 0)
} }
DLIB_TEST(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-7); DLIB_TEST_MSG(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-5, max(abs(subm(mat(imout),rect) - subm(out,rect))));
assign_all_pixels(imout, 10); assign_all_pixels(imout, 10);
...@@ -815,7 +815,7 @@ namespace ...@@ -815,7 +815,7 @@ namespace
DLIB_TEST(imout[be.element().y()][be.element().x()] == -10) DLIB_TEST(imout[be.element().y()][be.element().x()] == -10)
} }
out += xcorr_same(mat(img),filt)/2; out += xcorr_same(mat(img),filt)/2;
DLIB_TEST(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-7); DLIB_TEST_MSG(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-5, max(abs(subm(mat(imout),rect) - subm(out,rect))));
...@@ -825,7 +825,7 @@ namespace ...@@ -825,7 +825,7 @@ namespace
assign_all_pixels(imout, 10); assign_all_pixels(imout, 10);
rect = spatially_filter_image_separable(img, imout, row_filt, col_filt); rect = spatially_filter_image_separable(img, imout, row_filt, col_filt);
out = xcorr_same(tmp(xcorr_same(mat(img),trans(row_filt))), col_filt); out = xcorr_same(tmp(xcorr_same(mat(img),trans(row_filt))), col_filt);
DLIB_TEST(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-7); DLIB_TEST_MSG(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-5, max(abs(subm(mat(imout),rect) - subm(out,rect))));
be = border_enumerator(get_rect(imout),rect); be = border_enumerator(get_rect(imout),rect);
while (be.move_next()) while (be.move_next())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment