Commit 858824e8 authored by Davis King's avatar Davis King

Upgraded the input layer so you can give input<std::array<matrix<T>,K>> types

as input layer specifications.  This will create input tensors with K
channels.
parent 6d5ad339
......@@ -9,6 +9,7 @@
#include "../pixel.h"
#include "../image_processing.h"
#include <sstream>
#include <array>
#include "tensor_tools.h"
......@@ -399,6 +400,99 @@ namespace dlib
}
};
// ----------------------------------------------------------------------------------------
template <typename T, long NR, long NC, typename MM, typename L, size_t K>
class input<std::array<matrix<T,NR,NC,MM,L>,K>>
{
public:
typedef std::array<matrix<T,NR,NC,MM,L>,K> input_type;
input() {}
input(const input&) {}
bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
forward_iterator iend,
resizable_tensor& data
) const
{
DLIB_CASSERT(std::distance(ibegin,iend) > 0);
DLIB_CASSERT(ibegin->size() != 0, "When using std::array<matrix> inputs you can't give 0 sized arrays.");
const auto nr = (*ibegin)[0].nr();
const auto nc = (*ibegin)[0].nc();
// make sure all the input matrices have the same dimensions
for (auto i = ibegin; i != iend; ++i)
{
for (size_t k = 0; k < K; ++k)
{
const auto& arr = *i;
DLIB_CASSERT(arr[k].nr()==nr && arr[k].nc()==nc,
"\t input::to_tensor()"
<< "\n\t When using std::array<matrix> as input, all matrices in a batch must have the same dimensions."
<< "\n\t nr: " << nr
<< "\n\t nc: " << nc
<< "\n\t k: " << k
<< "\n\t arr[k].nr(): " << arr[k].nr()
<< "\n\t arr[k].nc(): " << arr[k].nc()
);
}
}
// initialize data to the right size to contain the stuff in the iterator range.
data.set_size(std::distance(ibegin,iend), K, nr, nc);
auto ptr = data.host();
for (auto i = ibegin; i != iend; ++i)
{
for (size_t k = 0; k < K; ++k)
{
for (long r = 0; r < nr; ++r)
{
for (long c = 0; c < nc; ++c)
{
if (is_same_type<T,unsigned char>::value)
*ptr++ = (*i)[k](r,c)/256.0;
else
*ptr++ = (*i)[k](r,c);
}
}
}
}
}
friend void serialize(const input& /*item*/, std::ostream& out)
{
serialize("input<array<matrix>>", out);
}
friend void deserialize(input& /*item*/, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "input<array<matrix>>")
throw serialization_error("Unexpected version found while deserializing dlib::input<array<matrix>>.");
}
friend std::ostream& operator<<(std::ostream& out, const input& /*item*/)
{
out << "input<array<matrix>>";
return out;
}
friend void to_xml(const input& /*item*/, std::ostream& out)
{
out << "<input/>";
}
};
// ----------------------------------------------------------------------------------------
template <typename T, typename MM>
......
......@@ -116,8 +116,11 @@ namespace dlib
{
/*!
REQUIREMENTS ON T
T is a matrix or array2d object and it must contain some kind of pixel
type. I.e. pixel_traits<T::type> must be defined.
One of the following must be true:
- T is a matrix or array2d object and it must contain some kind of
pixel type. I.e. pixel_traits<T::type> must be defined.
- T is a std::array<matrix<U>> where U is any built in scalar type like
float, double, or unsigned char.
WHAT THIS OBJECT REPRESENTS
This is a basic input layer that simply copies images into a tensor.
......@@ -141,7 +144,8 @@ namespace dlib
ensures
- Converts the iterator range into a tensor and stores it into #data. In
particular, if the input images have R rows, C columns, and K channels
(where K is given by pixel_traits::num) then we will have:
(where K is given by pixel_traits::num or std::array::size() if
std::array inputs are used) then we will have:
- #data.num_samples() == std::distance(ibegin,iend)
- #data.nr() == R
- #data.nc() == C
......@@ -149,6 +153,8 @@ namespace dlib
For example, a matrix<float,3,3> would turn into a tensor with 3 rows, 3
columns, and k()==1. Or a matrix<rgb_pixel,4,5> would turn into a tensor
with 4 rows, 5 columns, and k()==3 (since rgb_pixels have 3 channels).
Or a std::array<matrix<float,3,3>,5> would turn into a tensor with 3 rows
and columns, and k()==5 channels.
- If the input data contains pixels of type unsigned char, rgb_pixel, or
other pixel types with a basic_pixel_type of unsigned char then each
value written to the output tensor is first divided by 256.0 so that the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment