Commit 16327b65 authored by Davis King's avatar Davis King

Made input_rgb_image_pyramid use the GPU if available. So it's now a lot

faster.  The CPU path is also a lot faster as well.
parent abf39253
......@@ -9,6 +9,7 @@
#include "../pixel.h"
#include "../image_processing.h"
#include <sstream>
#include "tensor_tools.h"
namespace dlib
......@@ -580,38 +581,72 @@ namespace dlib
);
}
long NR, NC;
pyramid_type pyr;
auto& rects = data.annotation().get<std::vector<rectangle>>();
impl::compute_tiled_image_pyramid_details(pyr, nr, nc, pyramid_padding, pyramid_outer_padding, rects, NR, NC);
std::vector<matrix<rgb_pixel>> imgs(std::distance(ibegin,iend));
parallel_for(0, imgs.size(), [&](long i){
std::vector<rectangle> rects;
if (i == 0)
create_tiled_pyramid<pyramid_type>(ibegin[i], imgs[i], data.annotation().get<std::vector<rectangle>>(), pyramid_padding, pyramid_outer_padding);
else
create_tiled_pyramid<pyramid_type>(ibegin[i], imgs[i], rects, pyramid_padding, pyramid_outer_padding);
});
nr = imgs[0].nr();
nc = imgs[0].nc();
data.set_size(imgs.size(), 3, nr, nc);
// initialize data to the right size to contain the stuff in the iterator range.
data.set_size(std::distance(ibegin,iend), 3, NR, NC);
const size_t offset = nr*nc;
auto ptr = data.host();
for (auto&& img : imgs)
// We need to zero the image before doing the pyramid, since the pyramid
// creation code doesn't write to all parts of the image. We also take
// care to avoid triggering any device to hosts copies.
auto ptr = data.host_write_only();
for (size_t i = 0; i < data.size(); ++i)
ptr[i] = 0;
if (rects.size() == 0)
return;
// copy the first raw image into the top part of the tiled pyramid. We need to
// do this for each of the input images/samples in the tensor.
for (auto i = ibegin; i != iend; ++i)
{
for (long r = 0; r < nr; ++r)
auto& img = *i;
ptr += rects[0].top()*data.nc();
for (long r = 0; r < img.nr(); ++r)
{
for (long c = 0; c < nc; ++c)
auto p = ptr+rects[0].left();
for (long c = 0; c < img.nc(); ++c)
p[c] = (img(r,c).red-avg_red)/256.0;
ptr += data.nc();
}
ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
ptr += rects[0].top()*data.nc();
for (long r = 0; r < img.nr(); ++r)
{
rgb_pixel temp = img(r,c);
auto p = ptr++;
*p = (temp.red-avg_red)/256.0;
p += offset;
*p = (temp.green-avg_green)/256.0;
p += offset;
*p = (temp.blue-avg_blue)/256.0;
p += offset;
auto p = ptr+rects[0].left();
for (long c = 0; c < img.nc(); ++c)
p[c] = (img(r,c).green-avg_green)/256.0;
ptr += data.nc();
}
ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
ptr += rects[0].top()*data.nc();
for (long r = 0; r < img.nr(); ++r)
{
auto p = ptr+rects[0].left();
for (long c = 0; c < img.nc(); ++c)
p[c] = (img(r,c).blue-avg_blue)/256.0;
ptr += data.nc();
}
ptr += offset*(data.k()-1);
ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
}
// now build the image pyramid into data. This does the same thing as
// create_tiled_pyramid(), except we use the GPU if one is available.
for (size_t i = 1; i < rects.size(); ++i)
{
alias_tensor src(data.num_samples(),data.k(),rects[i-1].height(),rects[i-1].width());
alias_tensor dest(data.num_samples(),data.k(),rects[i].height(),rects[i].width());
auto asrc = src(data, data.nc()*rects[i-1].top() + rects[i-1].left());
auto adest = dest(data, data.nc()*rects[i].top() + rects[i].left());
tt::resize_bilinear(adest, data.nc(), data.nr()*data.nc(),
asrc, data.nc(), data.nr()*data.nc());
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment