Commit 26eb8143 authored by Davis King's avatar Davis King

Made spatially_filter_image_separable() a little bit faster.

parent ae08cdc5
......@@ -589,13 +589,25 @@ namespace dlib
long c = first_col;
for (; c < last_col-3; c+=4)
{
simd4f p, temp = 0;
for (long n = 0; n < row_filter.size(); ++n)
simd4f p,p2,p3, temp = 0, temp2=0, temp3=0;
long n = 0;
for (; n < row_filter.size()-2; n+=3)
{
// pull out the current pixel and put it into p
p.load(&in_img[r][c-first_col+n]);
p2.load(&in_img[r][c-first_col+n+1]);
p3.load(&in_img[r][c-first_col+n+2]);
temp += p*row_filter(n);
temp2 += p2*row_filter(n+1);
temp3 += p3*row_filter(n+2);
}
for (; n < row_filter.size(); ++n)
{
// pull out the current pixel and put it into p
p.load(&in_img[r][c-first_col+n]);
temp += p*row_filter(n);
}
temp += temp2 + temp3;
temp.store(&temp_img[r][c]);
}
for (; c < last_col; ++c)
......@@ -618,12 +630,23 @@ namespace dlib
long c = first_col;
for (; c < last_col-3; c+=4)
{
simd4f p, temp = 0;
for (long m = 0; m < col_filter.size(); ++m)
simd4f p, p2, p3, temp = 0, temp2 = 0, temp3 = 0;
long m = 0;
for (; m < col_filter.size()-2; m+=3)
{
p.load(&temp_img[r-first_row+m][c]);
p2.load(&temp_img[r-first_row+m+1][c]);
p3.load(&temp_img[r-first_row+m+2][c]);
temp += p*col_filter(m);
temp2 += p2*col_filter(m+1);
temp3 += p3*col_filter(m+2);
}
for (; m < col_filter.size(); ++m)
{
p.load(&temp_img[r-first_row+m][c]);
temp += p*col_filter(m);
}
temp += temp2+temp3;
// save this pixel to the output image
if (add_to == false)
......
......@@ -791,7 +791,7 @@ namespace
{
DLIB_TEST(imout[be.element().y()][be.element().x()] == 0)
}
DLIB_TEST(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-7);
DLIB_TEST_MSG(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-5, max(abs(subm(mat(imout),rect) - subm(out,rect))));
assign_all_pixels(imout, 10);
......@@ -815,7 +815,7 @@ namespace
DLIB_TEST(imout[be.element().y()][be.element().x()] == -10)
}
out += xcorr_same(mat(img),filt)/2;
DLIB_TEST(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-7);
DLIB_TEST_MSG(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-5, max(abs(subm(mat(imout),rect) - subm(out,rect))));
......@@ -825,7 +825,7 @@ namespace
assign_all_pixels(imout, 10);
rect = spatially_filter_image_separable(img, imout, row_filt, col_filt);
out = xcorr_same(tmp(xcorr_same(mat(img),trans(row_filt))), col_filt);
DLIB_TEST(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-7);
DLIB_TEST_MSG(max(abs(subm(mat(imout),rect) - subm(out,rect))) < 1e-5, max(abs(subm(mat(imout),rect) - subm(out,rect))));
be = border_enumerator(get_rect(imout),rect);
while (be.move_next())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment