Commit 9b56dc35 authored by Davis King's avatar Davis King

Finished the input interface and implementations. Also added docs about

layer conversions.
parent bcef0f86
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
#ifndef DLIB_DNn_INPUT_H_ #ifndef DLIB_DNn_INPUT_H_
#define DLIB_DNn_INPUT_H_ #define DLIB_DNn_INPUT_H_
#include "input_abstract.h"
#include "../matrix.h" #include "../matrix.h"
#include "../array2d.h"
#include "../pixel.h" #include "../pixel.h"
...@@ -13,51 +15,20 @@ namespace dlib ...@@ -13,51 +15,20 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename T> template <typename T>
class input class input
{ {
public: const static bool always_false = sizeof(T)!=sizeof(T);
static_assert(always_false, "Unsupported type given to input<>. input<> only supports "
// sample_expansion_factor must be > 0 "dlib::matrix and dlib::array2d objects.");
const static unsigned int sample_expansion_factor = 1;
typedef T input_type;
template <typename input_iterator>
void to_tensor (
input_iterator begin,
input_iterator end,
resizable_tensor& data
) const
/*!
requires
- [begin, end) is an iterator range over input_type objects.
ensures
- Converts the iterator range into a tensor and stores it into #data.
- Normally you would have #data.num_samples() == distance(begin,end) but
you can also expand the output by some integer factor so long as the loss
you use can deal with it correctly.
- #data.num_samples() == distance(begin,end)*sample_expansion_factor.
!*/
{
// initialize data to the right size to contain the stuff in the iterator range.
for (input_iterator i = begin; i != end; ++i)
{
matrix<rgb_pixel> temp = *i;
// now copy *i into the right part of data.
}
}
}; };
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename T,long NR, typename MM, typename L> template <typename T, long NR, long NC, typename MM, typename L>
class input<matrix<T,NR,1,MM,L>> class input<matrix<T,NR,NC,MM,L>>
{ {
public: public:
typedef matrix<T,NR,NC,MM,L> input_type;
// TODO, maybe we should only allow T to be float? Seems kinda pointless to allow
// double. Don't forget to remove the matrix_cast if we enforce just float.
typedef matrix<T,NR,1,MM,L> input_type;
const static unsigned int sample_expansion_factor = 1; const static unsigned int sample_expansion_factor = 1;
template <typename input_iterator> template <typename input_iterator>
...@@ -66,40 +37,51 @@ namespace dlib ...@@ -66,40 +37,51 @@ namespace dlib
input_iterator end, input_iterator end,
resizable_tensor& data resizable_tensor& data
) const ) const
/*!
requires
- [begin, end) is an iterator range over input_type objects.
ensures
- converts the iterator range into a tensor and stores it into #data.
- Normally you would have #data.num_samples() == distance(begin,end) but
you can also expand the output by some integer factor so long as the loss
you use can deal with it correctly.
- #data.num_samples() == distance(begin,end)*sample_expansion_factor.
!*/
{ {
DLIB_CASSERT(std::distance(begin,end) > 0,"");
const auto nr = begin->nr();
const auto nc = begin->nc();
// make sure all the input matrices have the same dimensions
for (auto i = begin; i != end; ++i)
{
DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
"\t input::to_tensor()"
<< "\n\t All matrices given to to_tensor() must have the same dimensions."
<< "\n\t nr: " << nr
<< "\n\t nc: " << nc
<< "\n\t i->nr(): " << i->nr()
<< "\n\t i->nc(): " << i->nc()
);
}
// initialize data to the right size to contain the stuff in the iterator range. // initialize data to the right size to contain the stuff in the iterator range.
data.set_size(std::distance(begin,end), 1, 1, begin->size()); data.set_size(std::distance(begin,end), nr, nc, pixel_traits<T>::num);
unsigned long idx = 0; auto ptr = data.host();
for (input_iterator i = begin; i != end; ++i) for (auto i = begin; i != end; ++i)
{ {
data.set_sample(idx++, matrix_cast<float>(*i)); for (long r = 0; r < nr; ++r)
{
for (long c = 0; c < nc; ++c)
{
auto temp = pixel_to_vector<float>((*i)(r,c));
for (long j = 0; j < temp.size(); ++j)
*ptr++ = temp(j);
}
}
} }
} }
}; };
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename T> template <typename T, typename MM>
class input2 class input<array2d<T,MM>>
{ {
public: public:
typedef array2d<T,MM> input_type;
input2(){}
input2(const input<T>&) {}
typedef T input_type;
const static unsigned int sample_expansion_factor = 1; const static unsigned int sample_expansion_factor = 1;
template <typename input_iterator> template <typename input_iterator>
...@@ -108,24 +90,41 @@ namespace dlib ...@@ -108,24 +90,41 @@ namespace dlib
input_iterator end, input_iterator end,
resizable_tensor& data resizable_tensor& data
) const ) const
/*!
requires
- [begin, end) is an iterator range over T objects.
ensures
- converts the iterator range into a tensor and stores it into #data.
- Normally you would have #data.num_samples() == distance(begin,end) but
you can also expand the output by some integer factor so long as the loss
you use can deal with it correctly.
- #data.num_samples() == distance(begin,end)*K where K is an integer >= 1.
!*/
{ {
DLIB_CASSERT(std::distance(begin,end) > 0,"");
const auto nr = begin->nr();
const auto nc = begin->nc();
// make sure all the input matrices have the same dimensions
for (auto i = begin; i != end; ++i)
{
DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
"\t input::to_tensor()"
<< "\n\t All array2d objects given to to_tensor() must have the same dimensions."
<< "\n\t nr: " << nr
<< "\n\t nc: " << nc
<< "\n\t i->nr(): " << i->nr()
<< "\n\t i->nc(): " << i->nc()
);
}
// initialize data to the right size to contain the stuff in the iterator range. // initialize data to the right size to contain the stuff in the iterator range.
data.set_size(std::distance(begin,end), nr, nc, pixel_traits<T>::num);
for (input_iterator i = begin; i != end; ++i) auto ptr = data.host();
for (auto i = begin; i != end; ++i)
{ {
matrix<rgb_pixel> temp = *i; for (long r = 0; r < nr; ++r)
// now copy *i into the right part of data. {
for (long c = 0; c < nc; ++c)
{
auto temp = pixel_to_vector<float>((*i)[r][c]);
for (long j = 0; j < temp.size(); ++j)
*ptr++ = temp(j);
}
}
} }
} }
}; };
......
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_DNn_INPUT_ABSTRACT_H_
#ifdef DLIB_DNn_INPUT_ABSTRACT_H_
#include "../matrix.h"
#include "../pixel.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class EXAMPLE_INPUT
{
/*!
WHAT THIS OBJECT REPRESENTS
Each deep neural network model in dlib begins with an input layer. The job
of the input layer is to convert an input_type into a tensor. Nothing more
and nothing less.
Note that there is no dlib::EXAMPLE_INPUT type. It is shown here purely to
document the interface that an input layer object must implement. If you
are using some kind of image or matrix object as your input_type then you
can use the provided dlib::input layer type defined below. Otherwise, you
need to define your own custom input layer.
!*/
public:
EXAMPLE_INPUT(
);
/*!
ensures
- Default constructs this object. This function is not required to do
anything in particular but it is required that layer objects be default
constructable.
!*/
EXAMPLE_INPUT(
const some_other_input_layer_type& item
);
/*!
ensures
- Constructs this object from item. This form of constructor is optional
but it allows you to provide a conversion from one input layer type to
another. For example, the following code is valid only if my_input2 can
be constructed from my_input1:
relu<fc<relu<fc<my_input1>>>> my_dnn1;
relu<fc<relu<fc<my_input2>>>> my_dnn2(my_dnn1);
This kind of pattern is useful if you want to use one type of input layer
during training but a different type of layer during testing since it
allows you to easily convert between related deep neural network types.
!*/
// sample_expansion_factor must be > 0
const static unsigned int sample_expansion_factor = 1;
typedef whatever_type_to_tensor_expects input_type;
template <typename input_iterator>
void to_tensor (
input_iterator begin,
input_iterator end,
resizable_tensor& data
) const
/*!
requires
- [begin, end) is an iterator range over input_type objects.
- std::distance(begin,end) > 0
ensures
- Converts the iterator range into a tensor and stores it into #data.
- #data.num_samples() == distance(begin,end)*sample_expansion_factor.
- Normally you would have #data.num_samples() == distance(begin,end) but
you can also expand the output by some integer factor so long as the loss
you use can deal with it correctly.
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename T
>
class input
{
/*!
REQUIREMENTS ON T
T is a matrix or array2d object and it must contain some kind of pixel
type. I.e. pixel_traits<T::type> must be defined.
WHAT THIS OBJECT REPRESENTS
This is a basic input layer that simply copies images into a tensor.
!*/
public:
const static unsigned int sample_expansion_factor = 1;
typedef T input_type;
template <typename input_iterator>
void to_tensor (
input_iterator begin,
input_iterator end,
resizable_tensor& data
) const;
/*!
requires
- [begin, end) is an iterator range over input_type objects.
- std::distance(begin,end) > 0
- The input range should contain image objects that all have the same
dimensions.
ensures
- Converts the iterator range into a tensor and stores it into #data. In
particular, if the input images have R rows, C columns, and K channels
(where K is given by pixel_traits::num) then we will have:
- #data.num_samples() == std::distance(begin,end)
- #data.nr() == R
- #data.nc() == C
- #data.k() == K
For example, a matrix<float,3,3> would turn into a tensor with 3 rows, 3
columns, and k()==1. Or a matrix<rgb_pixel,4,5> would turn into a tensor
with 4 rows, 5 columns, and k()==3 (since rgb_pixels have 3 channels).
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_DNn_INPUT_ABSTRACT_H_
...@@ -68,6 +68,22 @@ namespace dlib ...@@ -68,6 +68,22 @@ namespace dlib
constructable. constructable.
!*/ !*/
EXAMPLE_LAYER_(
const some_other_layer_type& item
);
/*!
ensures
- Constructs this object from item. This form of constructor is optional
but it allows you to provide a conversion from one layer type to another.
For example, the following code is valid only if my_layer2 can be
constructed from my_layer1:
relu<fc<my_layer1<fc<input<matrix<float>>>>>> my_dnn1;
relu<fc<my_layer2<fc<input<matrix<float>>>>>> my_dnn2(my_dnn1);
This kind of pattern is useful if you want to use one type of layer
during training but a different type of layer during testing since it
allows you to easily convert between related deep neural network types.
!*/
template <typename SUB_NET> template <typename SUB_NET>
void setup ( void setup (
const SUB_NET& sub const SUB_NET& sub
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment