Added float_spatially_filter_image_separable() to the public API.

8e318190 · Davis King · 9d3e40a6 · 8e318190 · 8e318190
Commit 8e318190 authored Nov 23, 2013 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 197 additions and 153 deletions

spatial_filtering.h dlib/image_transforms/spatial_filtering.h +157 -153

spatial_filtering_abstract.h dlib/image_transforms/spatial_filtering_abstract.h +40 -0

No files found.
--- a/dlib/image_transforms/spatial_filtering.h
+++ b/dlib/image_transforms/spatial_filtering.h
@@ -522,183 +522,186 @@ namespace dlib
            return non_border;
        }

-    // ------------------------------------------------------------------------------------
+    } // namespace impl

-        // This overload is optimized to use SIMD instructions when filtering float images with
-        // float filters.
-        template <
-            typename in_image_type,
-            typename out_image_type,
-            typename EXP1,
-            typename EXP2
-            >
-        rectangle float_spatially_filter_image_separable (
-            const in_image_type& in_img,
-            out_image_type& out_img,
-            const matrix_exp<EXP1>& _row_filter,
-            const matrix_exp<EXP2>& _col_filter,
-            bool add_to 
-        )
-        {
-            const_temp_matrix<EXP1> row_filter(_row_filter);
-            const_temp_matrix<EXP2> col_filter(_col_filter);
-            DLIB_ASSERT(row_filter.size() != 0 && col_filter.size() != 0 &&
-                is_vector(row_filter) &&
-                is_vector(col_filter),
-                "\trectangle spatially_filter_image_separable()"
-                << "\n\t Invalid inputs were given to this function."
-                << "\n\t row_filter.size(): "<< row_filter.size()
-                << "\n\t col_filter.size(): "<< col_filter.size()
-                << "\n\t is_vector(row_filter): "<< is_vector(row_filter)
-                << "\n\t is_vector(col_filter): "<< is_vector(col_filter)
-            );
-            DLIB_ASSERT(is_same_object(in_img, out_img) == false,
-                "\trectangle spatially_filter_image_separable()"
-                << "\n\tYou must give two different image objects"
-            );
+// ----------------------------------------------------------------------------------------

+    template <
+        typename in_image_type,
+        typename out_image_type,
+        typename EXP1,
+        typename EXP2
+        >
+    struct is_float_filtering
+    {
+        const static bool value = is_same_type<typename in_image_type::type,float>::value &&
+                                  is_same_type<typename out_image_type::type,float>::value &&
+                                  is_same_type<typename EXP1::type,float>::value &&
+                                  is_same_type<typename EXP2::type,float>::value;
+    };

+// ----------------------------------------------------------------------------------------

-            // if there isn't any input image then don't do anything
-            if (in_img.size() == 0)
-            {
-                out_img.clear();
-                return rectangle();
-            }
+    // This overload is optimized to use SIMD instructions when filtering float images with
+    // float filters.
+    template <
+        typename in_image_type,
+        typename out_image_type,
+        typename EXP1,
+        typename EXP2
+        >
+    rectangle float_spatially_filter_image_separable (
+        const in_image_type& in_img,
+        out_image_type& out_img,
+        const matrix_exp<EXP1>& _row_filter,
+        const matrix_exp<EXP2>& _col_filter,
+        out_image_type& scratch,
+        bool add_to = false
+    )
+    {
+        // You can only use this function with images and filters containing float
+        // variables.
+        COMPILE_TIME_ASSERT((is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value == true));

-            out_img.set_size(in_img.nr(),in_img.nc());

-            // figure out the range that we should apply the filter to
-            const long first_row = (col_filter.size()-1)/2;
-            const long first_col = (row_filter.size()-1)/2;
-            const long last_row = in_img.nr() - (col_filter.size()/2);
-            const long last_col = in_img.nc() - (row_filter.size()/2);
+        const_temp_matrix<EXP1> row_filter(_row_filter);
+        const_temp_matrix<EXP2> col_filter(_col_filter);
+        DLIB_ASSERT(row_filter.size() != 0 && col_filter.size() != 0 &&
+            is_vector(row_filter) &&
+            is_vector(col_filter),
+            "\trectangle float_spatially_filter_image_separable()"
+            << "\n\t Invalid inputs were given to this function."
+            << "\n\t row_filter.size(): "<< row_filter.size()
+            << "\n\t col_filter.size(): "<< col_filter.size()
+            << "\n\t is_vector(row_filter): "<< is_vector(row_filter)
+            << "\n\t is_vector(col_filter): "<< is_vector(col_filter)
+        );
+        DLIB_ASSERT(is_same_object(in_img, out_img) == false,
+            "\trectangle float_spatially_filter_image_separable()"
+            << "\n\tYou must give two different image objects"
+        );

-            const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
-            if (!add_to)
-                zero_border_pixels(out_img, non_border); 

-            typedef typename in_image_type::mem_manager_type mem_manager_type;

-            array2d<float,mem_manager_type> temp_img;
-            temp_img.set_size(in_img.nr(), in_img.nc());
+        // if there isn't any input image then don't do anything
+        if (in_img.size() == 0)
+        {
+            out_img.clear();
+            return rectangle();
+        }

-            // apply the row filter
-            for (long r = 0; r < in_img.nr(); ++r)
+        out_img.set_size(in_img.nr(),in_img.nc());
+
+        // figure out the range that we should apply the filter to
+        const long first_row = (col_filter.size()-1)/2;
+        const long first_col = (row_filter.size()-1)/2;
+        const long last_row = in_img.nr() - (col_filter.size()/2);
+        const long last_col = in_img.nc() - (row_filter.size()/2);
+
+        const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
+        if (!add_to)
+            zero_border_pixels(out_img, non_border); 
+
+        scratch.set_size(in_img.nr(), in_img.nc());
+
+        // apply the row filter
+        for (long r = 0; r < in_img.nr(); ++r)
+        {
+            long c = first_col;
+            for (; c < last_col-7; c+=8)
            {
-                long c = first_col;
-                for (; c < last_col-7; c+=8)
+                simd8f p,p2,p3, temp = 0, temp2=0, temp3=0;
+                long n = 0;
+                for (; n < row_filter.size()-2; n+=3)
                {
-                    simd8f p,p2,p3, temp = 0, temp2=0, temp3=0;
-                    long n = 0;
-                    for (; n < row_filter.size()-2; n+=3)
-                    {
-                        // pull out the current pixel and put it into p
-                        p.load(&in_img[r][c-first_col+n]);
-                        p2.load(&in_img[r][c-first_col+n+1]);
-                        p3.load(&in_img[r][c-first_col+n+2]);
-                        temp += p*row_filter(n);
-                        temp2 += p2*row_filter(n+1);
-                        temp3 += p3*row_filter(n+2);
-                    }
-                    for (; n < row_filter.size(); ++n)
-                    {
-                        // pull out the current pixel and put it into p
-                        p.load(&in_img[r][c-first_col+n]);
-                        temp += p*row_filter(n);
-                    }
-                    temp += temp2 + temp3;
-                    temp.store(&temp_img[r][c]);
+                    // pull out the current pixel and put it into p
+                    p.load(&in_img[r][c-first_col+n]);
+                    p2.load(&in_img[r][c-first_col+n+1]);
+                    p3.load(&in_img[r][c-first_col+n+2]);
+                    temp += p*row_filter(n);
+                    temp2 += p2*row_filter(n+1);
+                    temp3 += p3*row_filter(n+2);
                }
-                for (; c < last_col; ++c)
+                for (; n < row_filter.size(); ++n)
                {
-                    float p;
-                    float temp = 0;
-                    for (long n = 0; n < row_filter.size(); ++n)
-                    {
-                        // pull out the current pixel and put it into p
-                        p = in_img[r][c-first_col+n];
-                        temp += p*row_filter(n);
-                    }
-                    temp_img[r][c] = temp;
+                    // pull out the current pixel and put it into p
+                    p.load(&in_img[r][c-first_col+n]);
+                    temp += p*row_filter(n);
+                }
+                temp += temp2 + temp3;
+                temp.store(&scratch[r][c]);
+            }
+            for (; c < last_col; ++c)
+            {
+                float p;
+                float temp = 0;
+                for (long n = 0; n < row_filter.size(); ++n)
+                {
+                    // pull out the current pixel and put it into p
+                    p = in_img[r][c-first_col+n];
+                    temp += p*row_filter(n);
                }
+                scratch[r][c] = temp;
            }
+        }

-            // apply the column filter 
-            for (long r = first_row; r < last_row; ++r)
+        // apply the column filter 
+        for (long r = first_row; r < last_row; ++r)
+        {
+            long c = first_col;
+            for (; c < last_col-7; c+=8)
            {
-                long c = first_col;
-                for (; c < last_col-7; c+=8)
+                simd8f p, p2, p3, temp = 0, temp2 = 0, temp3 = 0;
+                long m = 0;
+                for (; m < col_filter.size()-2; m+=3)
                {
-                    simd8f p, p2, p3, temp = 0, temp2 = 0, temp3 = 0;
-                    long m = 0;
-                    for (; m < col_filter.size()-2; m+=3)
-                    {
-                        p.load(&temp_img[r-first_row+m][c]);
-                        p2.load(&temp_img[r-first_row+m+1][c]);
-                        p3.load(&temp_img[r-first_row+m+2][c]);
-                        temp += p*col_filter(m);
-                        temp2 += p2*col_filter(m+1);
-                        temp3 += p3*col_filter(m+2);
-                    }
-                    for (; m < col_filter.size(); ++m)
-                    {
-                        p.load(&temp_img[r-first_row+m][c]);
-                        temp += p*col_filter(m);
-                    }
-                    temp += temp2+temp3;
+                    p.load(&scratch[r-first_row+m][c]);
+                    p2.load(&scratch[r-first_row+m+1][c]);
+                    p3.load(&scratch[r-first_row+m+2][c]);
+                    temp += p*col_filter(m);
+                    temp2 += p2*col_filter(m+1);
+                    temp3 += p3*col_filter(m+2);
+                }
+                for (; m < col_filter.size(); ++m)
+                {
+                    p.load(&scratch[r-first_row+m][c]);
+                    temp += p*col_filter(m);
+                }
+                temp += temp2+temp3;

-                    // save this pixel to the output image
-                    if (add_to == false)
-                    {
-                        temp.store(&out_img[r][c]);
-                    }
-                    else
-                    {
-                        p.load(&out_img[r][c]);
-                        temp += p;
-                        temp.store(&out_img[r][c]);
-                    }
+                // save this pixel to the output image
+                if (add_to == false)
+                {
+                    temp.store(&out_img[r][c]);
                }
-                for (; c < last_col; ++c)
+                else
                {
-                    float temp = 0;
-                    for (long m = 0; m < col_filter.size(); ++m)
-                    {
-                        temp += temp_img[r-first_row+m][c]*col_filter(m);
-                    }
+                    p.load(&out_img[r][c]);
+                    temp += p;
+                    temp.store(&out_img[r][c]);
+                }
+            }
+            for (; c < last_col; ++c)
+            {
+                float temp = 0;
+                for (long m = 0; m < col_filter.size(); ++m)
+                {
+                    temp += scratch[r-first_row+m][c]*col_filter(m);
+                }

-                    // save this pixel to the output image
-                    if (add_to == false)
-                    {
-                        out_img[r][c] = temp;
-                    }
-                    else
-                    {
-                        out_img[r][c] += temp;
-                    }
+                // save this pixel to the output image
+                if (add_to == false)
+                {
+                    out_img[r][c] = temp;
+                }
+                else
+                {
+                    out_img[r][c] += temp;
                }
            }
-            return non_border;
        }
-
-    } // namespace impl
-
-// ----------------------------------------------------------------------------------------
-
-    template <
-        typename in_image_type,
-        typename out_image_type,
-        typename EXP1,
-        typename EXP2
-        >
-    struct is_float_filtering
-    {
-        const static bool value = is_same_type<typename in_image_type::type,float>::value &&
-                                  is_same_type<typename out_image_type::type,float>::value &&
-                                  is_same_type<typename EXP1::type,float>::value &&
-                                  is_same_type<typename EXP2::type,float>::value;
-    };
+        return non_border;
+    }

 // ----------------------------------------------------------------------------------------

@@ -723,10 +726,11 @@ namespace dlib
    {
        if (use_abs == false)
        {
+            out_image_type scratch;
            if (scale == 1)
-                return impl::float_spatially_filter_image_separable(in_img, out_img, row_filter, col_filter, add_to);
+                return float_spatially_filter_image_separable(in_img, out_img, row_filter, col_filter, scratch, add_to);
            else
-                return impl::float_spatially_filter_image_separable(in_img, out_img, row_filter/scale, col_filter, add_to);
+                return float_spatially_filter_image_separable(in_img, out_img, row_filter/scale, col_filter, scratch,  add_to);
        }
        else
        {

--- a/dlib/image_transforms/spatial_filtering_abstract.h
+++ b/dlib/image_transforms/spatial_filtering_abstract.h
@@ -129,6 +129,46 @@ namespace dlib
                  you can use this form of the function it can give a decent speed boost.
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename in_image_type,
+        typename out_image_type,
+        typename EXP1,
+        typename EXP2
+        >
+    rectangle float_spatially_filter_image_separable (
+        const in_image_type& in_img,
+        out_image_type& out_img,
+        const matrix_exp<EXP1>& row_filter,
+        const matrix_exp<EXP2>& col_filter,
+        out_image_type& scratch,
+        bool add_to = false
+    );
+    /*!
+        requires
+            - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+            - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+            - in_img, out_img, row_filter, and col_filter must all contain float type elements.
+            - is_same_object(in_img, out_img) == false 
+            - row_filter.size() != 0
+            - col_filter.size() != 0
+            - is_vector(row_filter) == true
+            - is_vector(col_filter) == true
+        ensures
+            - This function is identical to the above spatially_filter_image_separable()
+              function except that it can only be invoked on float images with float
+              filters.  In fact, spatially_filter_image_separable() invokes
+              float_spatially_filter_image_separable() in those cases.  So why is
+              float_spatially_filter_image_separable() in the public API?  The reason is
+              because the separable filtering routines internally allocate an image each
+              time they are called.  If you want to avoid this memory allocation then you
+              can call float_spatially_filter_image_separable() and provide the scratch
+              image as input.  This allows you to reuse the same scratch image for many
+              calls to float_spatially_filter_image_separable() and thereby avoid having it
+              allocated and freed for each call.
+    !*/
+
 // ----------------------------------------------------------------------------------------

    template <