Commit 858824e8 authored by Davis King's avatar Davis King

Upgraded the input layer so you can give input<std::array<matrix<T>,K>> types

as input layer specifications.  This will create input tensors with K
channels.
parent 6d5ad339
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "../pixel.h" #include "../pixel.h"
#include "../image_processing.h" #include "../image_processing.h"
#include <sstream> #include <sstream>
#include <array>
#include "tensor_tools.h" #include "tensor_tools.h"
...@@ -399,6 +400,99 @@ namespace dlib ...@@ -399,6 +400,99 @@ namespace dlib
} }
}; };
// ----------------------------------------------------------------------------------------
template <typename T, long NR, long NC, typename MM, typename L, size_t K>
class input<std::array<matrix<T,NR,NC,MM,L>,K>>
{
public:
typedef std::array<matrix<T,NR,NC,MM,L>,K> input_type;
input() {}
input(const input&) {}
bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
forward_iterator iend,
resizable_tensor& data
) const
{
DLIB_CASSERT(std::distance(ibegin,iend) > 0);
DLIB_CASSERT(ibegin->size() != 0, "When using std::array<matrix> inputs you can't give 0 sized arrays.");
const auto nr = (*ibegin)[0].nr();
const auto nc = (*ibegin)[0].nc();
// make sure all the input matrices have the same dimensions
for (auto i = ibegin; i != iend; ++i)
{
for (size_t k = 0; k < K; ++k)
{
const auto& arr = *i;
DLIB_CASSERT(arr[k].nr()==nr && arr[k].nc()==nc,
"\t input::to_tensor()"
<< "\n\t When using std::array<matrix> as input, all matrices in a batch must have the same dimensions."
<< "\n\t nr: " << nr
<< "\n\t nc: " << nc
<< "\n\t k: " << k
<< "\n\t arr[k].nr(): " << arr[k].nr()
<< "\n\t arr[k].nc(): " << arr[k].nc()
);
}
}
// initialize data to the right size to contain the stuff in the iterator range.
data.set_size(std::distance(ibegin,iend), K, nr, nc);
auto ptr = data.host();
for (auto i = ibegin; i != iend; ++i)
{
for (size_t k = 0; k < K; ++k)
{
for (long r = 0; r < nr; ++r)
{
for (long c = 0; c < nc; ++c)
{
if (is_same_type<T,unsigned char>::value)
*ptr++ = (*i)[k](r,c)/256.0;
else
*ptr++ = (*i)[k](r,c);
}
}
}
}
}
friend void serialize(const input& /*item*/, std::ostream& out)
{
serialize("input<array<matrix>>", out);
}
friend void deserialize(input& /*item*/, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "input<array<matrix>>")
throw serialization_error("Unexpected version found while deserializing dlib::input<array<matrix>>.");
}
friend std::ostream& operator<<(std::ostream& out, const input& /*item*/)
{
out << "input<array<matrix>>";
return out;
}
friend void to_xml(const input& /*item*/, std::ostream& out)
{
out << "<input/>";
}
};
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename T, typename MM> template <typename T, typename MM>
......
...@@ -116,8 +116,11 @@ namespace dlib ...@@ -116,8 +116,11 @@ namespace dlib
{ {
/*! /*!
REQUIREMENTS ON T REQUIREMENTS ON T
T is a matrix or array2d object and it must contain some kind of pixel One of the following must be true:
type. I.e. pixel_traits<T::type> must be defined. - T is a matrix or array2d object and it must contain some kind of
pixel type. I.e. pixel_traits<T::type> must be defined.
- T is a std::array<matrix<U>> where U is any built in scalar type like
float, double, or unsigned char.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This is a basic input layer that simply copies images into a tensor. This is a basic input layer that simply copies images into a tensor.
...@@ -141,7 +144,8 @@ namespace dlib ...@@ -141,7 +144,8 @@ namespace dlib
ensures ensures
- Converts the iterator range into a tensor and stores it into #data. In - Converts the iterator range into a tensor and stores it into #data. In
particular, if the input images have R rows, C columns, and K channels particular, if the input images have R rows, C columns, and K channels
(where K is given by pixel_traits::num) then we will have: (where K is given by pixel_traits::num or std::array::size() if
std::array inputs are used) then we will have:
- #data.num_samples() == std::distance(ibegin,iend) - #data.num_samples() == std::distance(ibegin,iend)
- #data.nr() == R - #data.nr() == R
- #data.nc() == C - #data.nc() == C
...@@ -149,6 +153,8 @@ namespace dlib ...@@ -149,6 +153,8 @@ namespace dlib
For example, a matrix<float,3,3> would turn into a tensor with 3 rows, 3 For example, a matrix<float,3,3> would turn into a tensor with 3 rows, 3
columns, and k()==1. Or a matrix<rgb_pixel,4,5> would turn into a tensor columns, and k()==1. Or a matrix<rgb_pixel,4,5> would turn into a tensor
with 4 rows, 5 columns, and k()==3 (since rgb_pixels have 3 channels). with 4 rows, 5 columns, and k()==3 (since rgb_pixels have 3 channels).
Or a std::array<matrix<float,3,3>,5> would turn into a tensor with 3 rows
and columns, and k()==5 channels.
- If the input data contains pixels of type unsigned char, rgb_pixel, or - If the input data contains pixels of type unsigned char, rgb_pixel, or
other pixel types with a basic_pixel_type of unsigned char then each other pixel types with a basic_pixel_type of unsigned char then each
value written to the output tensor is first divided by 256.0 so that the value written to the output tensor is first divided by 256.0 so that the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment