Made input_rgb_image_pyramid use the GPU if available. So it's now a lot

faster. The CPU path is also a lot faster as well.

Made input_rgb_image_pyramid use the GPU if available. So it's now a lot
faster. The CPU path is also a lot faster as well.
16327b65 · Davis King · abf39253 · 16327b65
Commit 16327b65 authored Sep 06, 2017 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 60 additions and 25 deletions

input.h dlib/dnn/input.h +60 -25

No files found.
--- a/dlib/dnn/input.h
+++ b/dlib/dnn/input.h
@@ -9,6 +9,7 @@
 #include "../pixel.h"
 #include "../image_processing.h"
 #include <sstream>
+#include "tensor_tools.h"


 namespace dlib
@@ -580,38 +581,72 @@ namespace dlib
                );
            }

+            long NR, NC;
+            pyramid_type pyr;
+            auto& rects = data.annotation().get<std::vector<rectangle>>();
+            impl::compute_tiled_image_pyramid_details(pyr, nr, nc, pyramid_padding, pyramid_outer_padding, rects, NR, NC);

-            std::vector<matrix<rgb_pixel>> imgs(std::distance(ibegin,iend));
-            parallel_for(0, imgs.size(), [&](long i){
-                std::vector<rectangle> rects;
-                if (i == 0)
-                    create_tiled_pyramid<pyramid_type>(ibegin[i], imgs[i], data.annotation().get<std::vector<rectangle>>(), pyramid_padding, pyramid_outer_padding);
-                else
-                    create_tiled_pyramid<pyramid_type>(ibegin[i], imgs[i], rects, pyramid_padding, pyramid_outer_padding);
-            });
-            nr = imgs[0].nr();
-            nc = imgs[0].nc();
-            data.set_size(imgs.size(), 3, nr, nc);
+            // initialize data to the right size to contain the stuff in the iterator range.
+            data.set_size(std::distance(ibegin,iend), 3, NR, NC);

-            const size_t offset = nr*nc;
-            auto ptr = data.host();
-            for (auto&& img : imgs)
+            // We need to zero the image before doing the pyramid, since the pyramid
+            // creation code doesn't write to all parts of the image.  We also take
+            // care to avoid triggering any device to hosts copies.
+            auto ptr = data.host_write_only();
+            for (size_t i = 0; i < data.size(); ++i)
+                ptr[i] = 0;
+
+            if (rects.size() == 0)
+                return;
+
+            // copy the first raw image into the top part of the tiled pyramid.  We need to
+            // do this for each of the input images/samples in the tensor.
+            for (auto i = ibegin; i != iend; ++i)
            {
-                for (long r = 0; r < nr; ++r)
+                auto& img = *i;
+                ptr += rects[0].top()*data.nc();
+                for (long r = 0; r < img.nr(); ++r)
                {
-                    for (long c = 0; c < nc; ++c)
+                    auto p = ptr+rects[0].left();
+                    for (long c = 0; c < img.nc(); ++c)
+                        p[c] = (img(r,c).red-avg_red)/256.0;
+                    ptr += data.nc();
+                }
+                ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
+
+                ptr += rects[0].top()*data.nc();
+                for (long r = 0; r < img.nr(); ++r)
                {
-                        rgb_pixel temp = img(r,c);
-                        auto p = ptr++;
-                        *p = (temp.red-avg_red)/256.0; 
-                        p += offset;
-                        *p = (temp.green-avg_green)/256.0; 
-                        p += offset;
-                        *p = (temp.blue-avg_blue)/256.0; 
-                        p += offset;
+                    auto p = ptr+rects[0].left();
+                    for (long c = 0; c < img.nc(); ++c)
+                        p[c] = (img(r,c).green-avg_green)/256.0;
+                    ptr += data.nc();
                }
+                ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
+
+                ptr += rects[0].top()*data.nc();
+                for (long r = 0; r < img.nr(); ++r)
+                {
+                    auto p = ptr+rects[0].left();
+                    for (long c = 0; c < img.nc(); ++c)
+                        p[c] = (img(r,c).blue-avg_blue)/256.0;
+                    ptr += data.nc();
                }
-                ptr += offset*(data.k()-1);
+                ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
+            }
+
+            // now build the image pyramid into data.  This does the same thing as
+            // create_tiled_pyramid(), except we use the GPU if one is available. 
+            for (size_t i = 1; i < rects.size(); ++i)
+            {
+                alias_tensor src(data.num_samples(),data.k(),rects[i-1].height(),rects[i-1].width());
+                alias_tensor dest(data.num_samples(),data.k(),rects[i].height(),rects[i].width());
+
+                auto asrc  = src(data, data.nc()*rects[i-1].top() + rects[i-1].left());
+                auto adest = dest(data, data.nc()*rects[i].top() + rects[i].left());
+
+                tt::resize_bilinear(adest, data.nc(), data.nr()*data.nc(), 
+                                    asrc, data.nc(), data.nr()*data.nc());
            }
        }