Commit 8a707f17 authored by Davis King's avatar Davis King

Added input_rgb_image_pyramid

parent 09200e8d
......@@ -7,6 +7,7 @@
#include "../matrix.h"
#include "../array2d.h"
#include "../pixel.h"
#include "../image_processing.h"
#include <sstream>
......@@ -57,6 +58,10 @@ namespace dlib
float get_avg_green() const { return avg_green; }
float get_avg_blue() const { return avg_blue; }
bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
......@@ -180,6 +185,10 @@ namespace dlib
float get_avg_green() const { return avg_green; }
float get_avg_blue() const { return avg_blue; }
bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
......@@ -298,6 +307,10 @@ namespace dlib
template <typename mm>
input(const input<array2d<T,mm>>&) {}
bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
......@@ -391,6 +404,10 @@ namespace dlib
template <long NR, long NC, typename mm, typename L>
input(const input<matrix<T,NR,NC,mm,L>>&) {}
bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
......@@ -468,6 +485,156 @@ namespace dlib
}
};
// ----------------------------------------------------------------------------------------
template <typename PYRAMID_TYPE>
class input_rgb_image_pyramid
{
public:
typedef matrix<rgb_pixel> input_type;
typedef PYRAMID_TYPE pyramid_type;
input_rgb_image_pyramid (
) :
avg_red(122.782),
avg_green(117.001),
avg_blue(104.298)
{
}
input_rgb_image_pyramid (
float avg_red_,
float avg_green_,
float avg_blue_
) : avg_red(avg_red_), avg_green(avg_green_), avg_blue(avg_blue_)
{}
float get_avg_red() const { return avg_red; }
float get_avg_green() const { return avg_green; }
float get_avg_blue() const { return avg_blue; }
bool image_contained_point (
const tensor& data,
const point& p
) const
{
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
DLIB_CASSERT(rects.size() > 0);
return rects[0].contains(p);
}
drectangle tensor_space_to_image_space (
const tensor& data,
drectangle r
) const
{
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
return tiled_pyramid_to_image<pyramid_type>(rects, r);
}
drectangle image_space_to_tensor_space (
const tensor& data,
double scale,
drectangle r
) const
{
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
return image_to_tiled_pyramid<pyramid_type>(rects, scale, r);
}
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
forward_iterator iend,
resizable_tensor& data
) const
{
DLIB_CASSERT(std::distance(ibegin,iend) > 0);
auto nr = ibegin->nr();
auto nc = ibegin->nc();
// make sure all the input matrices have the same dimensions
for (auto i = ibegin; i != iend; ++i)
{
DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
"\t input_rgb_image_pyramid::to_tensor()"
<< "\n\t All matrices given to to_tensor() must have the same dimensions."
<< "\n\t nr: " << nr
<< "\n\t nc: " << nc
<< "\n\t i->nr(): " << i->nr()
<< "\n\t i->nc(): " << i->nc()
);
}
matrix<rgb_pixel> img;
create_tiled_pyramid<pyramid_type>(*ibegin, img, data.annotation().get<std::vector<rectangle>>());
nr = img.nr();
nc = img.nc();
data.set_size(std::distance(ibegin,iend), 3, nr, nc);
const size_t offset = nr*nc;
auto ptr = data.host();
while(true)
{
for (long r = 0; r < nr; ++r)
{
for (long c = 0; c < nc; ++c)
{
rgb_pixel temp = img(r,c);
auto p = ptr++;
*p = (temp.red-avg_red)/256.0;
p += offset;
*p = (temp.green-avg_green)/256.0;
p += offset;
*p = (temp.blue-avg_blue)/256.0;
p += offset;
}
}
ptr += offset*(data.k()-1);
++ibegin;
if (ibegin == iend)
break;
create_tiled_pyramid<pyramid_type>(*ibegin, img, data.annotation().get<std::vector<rectangle>>());
}
}
friend void serialize(const input_rgb_image_pyramid& item, std::ostream& out)
{
serialize("input_rgb_image_pyramid", out);
serialize(item.avg_red, out);
serialize(item.avg_green, out);
serialize(item.avg_blue, out);
}
friend void deserialize(input_rgb_image_pyramid& item, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "input_rgb_image_pyramid")
throw serialization_error("Unexpected version found while deserializing dlib::input_rgb_image_pyramid.");
deserialize(item.avg_red, in);
deserialize(item.avg_green, in);
deserialize(item.avg_blue, in);
}
friend std::ostream& operator<<(std::ostream& out, const input_rgb_image_pyramid& item)
{
out << "input_rgb_image_pyramid("<<item.avg_red<<","<<item.avg_green<<","<<item.avg_blue<<")";
return out;
}
friend void to_xml(const input_rgb_image_pyramid& item, std::ostream& out)
{
out << "<input_rgb_image_pyramid r='"<<item.avg_red<<"' g='"<<item.avg_green<<"' b='"<<item.avg_blue<<"'/>";
}
private:
float avg_red;
float avg_green;
float avg_blue;
};
// ----------------------------------------------------------------------------------------
}
......
......@@ -153,6 +153,11 @@ namespace dlib
value written to the output tensor is first divided by 256.0 so that the
resulting outputs are all in the range [0,1].
!*/
// Provided for compatibility with input_rgb_image_pyramid's interface
bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
};
// ----------------------------------------------------------------------------------------
......@@ -234,6 +239,12 @@ namespace dlib
subtracted (according to get_avg_red(), get_avg_green(), or
get_avg_blue()) and then is divided by 256.0.
!*/
// Provided for compatibility with input_rgb_image_pyramid's interface
bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
};
// ----------------------------------------------------------------------------------------
......@@ -253,6 +264,159 @@ namespace dlib
};
// ----------------------------------------------------------------------------------------
template <
typename PYRAMID_TYPE
>
class input_rgb_image_pyramid
{
/*!
REQUIREMENTS ON PYRAMID_TYPE
PYRAMID_TYPE must be an instance of the dlib::pyramid_down template.
WHAT THIS OBJECT REPRESENTS
This input layer works with RGB images of type matrix<rgb_pixel>. It is
identical to input_rgb_image except that it outputs a tensor containing a
tiled image pyramid of each input image rather than a simple copy of each
image. The tiled image pyramid is created using create_tiled_pyramid().
!*/
public:
typedef matrix<rgb_pixel> input_type;
typedef PYRAMID_TYPE pyramid_type;
input_rgb_image_pyramid (
);
/*!
ensures
- #get_avg_red() == 122.782
- #get_avg_green() == 117.001
- #get_avg_blue() == 104.298
!*/
input_rgb_image_pyramid (
float avg_red,
float avg_green,
float avg_blue
);
/*!
ensures
- #get_avg_red() == avg_red
- #get_avg_green() == avg_green
- #get_avg_blue() == avg_blue
!*/
float get_avg_red(
) const;
/*!
ensures
- returns the value subtracted from the red color channel.
!*/
float get_avg_green(
) const;
/*!
ensures
- returns the value subtracted from the green color channel.
!*/
float get_avg_blue(
) const;
/*!
ensures
- returns the value subtracted from the blue color channel.
!*/
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
forward_iterator iend,
resizable_tensor& data
) const;
/*!
requires
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- The input range should contain images that all have the same
dimensions.
ensures
- Converts the iterator range into a tensor and stores it into #data. In
particular, we will have:
- #data.num_samples() == std::distance(ibegin,iend)
- #data.k() == 3
- Each sample in #data contains a tiled image pyramid of the
corresponding input image. The tiled pyramid is created by
create_tiled_pyramid().
Moreover, each color channel is normalized by having its average value
subtracted (according to get_avg_red(), get_avg_green(), or
get_avg_blue()) and then is divided by 256.0.
!*/
bool image_contained_point (
const tensor& data,
const point& p
) const;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
ensures
- Since data is a tensor that is built from a bunch of identically sized
images, we can ask if those images were big enough to contain the point
p. This function returns the answer to that question.
!*/
drectangle image_space_to_tensor_space (
const tensor& data,
double scale,
drectangle r
) const;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
- 0 < scale <= 1
ensures
- This function maps from to_tensor()'s input image space to its output
tensor space. Therefore, given that data is a tensor produced by
to_tensor(), image_space_to_tensor_space() allows you to ask for the
rectangle in data that corresponds to a rectangle in the original image
space.
Note that since the output tensor contains an image pyramid, there are
multiple points in the output tensor that correspond to any input
location. So you must also specify a scale so we know what level of the
pyramid is needed. So given a rectangle r in an input image, you can
ask, what rectangle in data corresponds to r when things are scale times
smaller? That rectangle is returned by this function.
- A scale of 1 means we don't move anywhere in the pyramid scale space relative
to the input image while smaller values of scale mean we move down the
pyramid.
!*/
drectangle tensor_space_to_image_space (
const tensor& data,
drectangle r
) const;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
ensures
- This function maps from to_tensor()'s output tensor space to its input
image space. Therefore, given that data is a tensor produced by
to_tensor(), tensor_space_to_image_space() allows you to ask for the
rectangle in the input image that corresponds to a rectangle in data.
- It should be noted that this function isn't always an inverse of
image_space_to_tensor_space(). This is because you can ask
image_space_to_tensor_space() for the coordinates of points outside the input
image and they will be mapped to somewhere that doesn't have an inverse.
But for points actually inside the input image this function performs an
approximate inverse mapping. I.e. when image_contained_point(data,center(r))==true
there is an approximate inverse.
!*/
};
// ----------------------------------------------------------------------------------------
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment