Commit 603d4743 authored by Davis King's avatar Davis King

- Renamed network_type::num_layers to network_type::num_computational_layers.

- Made layer() recurse into repeat objects so that the index given to layer()
  does what you would expect.
- Added an operator<< for network objects that prints the network architecture.
parent 088546f3
This diff is collapsed.
...@@ -205,8 +205,12 @@ namespace dlib ...@@ -205,8 +205,12 @@ namespace dlib
typedef SUBNET subnet_type; typedef SUBNET subnet_type;
typedef typename subnet_type::input_type input_type; typedef typename subnet_type::input_type input_type;
const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
// If SUBNET is an input layer then num_layers == 1, otherwise it has the // num_computational_layers will always give the number of layers in the network
// definition shown here: // that transform tensors (i.e. layers defined by something that implements the
// EXAMPLE_COMPUTATIONAL_LAYER_ interface). This is all the layers except for
// loss, tag, and skip layers.
const static size_t num_computational_layers = subnet_type::num_computational_layers + 1;
// num_layers counts all the layers in the network regardless of their type.
const static size_t num_layers = subnet_type::num_layers + 1; const static size_t num_layers = subnet_type::num_layers + 1;
add_layer( add_layer(
...@@ -444,7 +448,7 @@ namespace dlib ...@@ -444,7 +448,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers - solvers.size() >= num_computational_layers
- 0 < step_size <= 1 - 0 < step_size <= 1
ensures ensures
- Back propagates the error gradient, get_gradient_input(), through this - Back propagates the error gradient, get_gradient_input(), through this
...@@ -473,7 +477,7 @@ namespace dlib ...@@ -473,7 +477,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers - solvers.size() >= num_computational_layers
- 0 < step_size <= 1 - 0 < step_size <= 1
ensures ensures
- This function is identical to the version of update() defined immediately - This function is identical to the version of update() defined immediately
...@@ -508,9 +512,15 @@ namespace dlib ...@@ -508,9 +512,15 @@ namespace dlib
}; };
template <typename T, typename U>, template <typename T, typename U>
std::ostream& operator<<(std::ostream& out, const add_layer<T,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template <typename T, typename U>
void serialize(const add_layer<T,U>& item, std::ostream& out); void serialize(const add_layer<T,U>& item, std::ostream& out);
template <typename T, typename U>, template <typename T, typename U>
void deserialize(add_layer<T,U>& item, std::istream& in); void deserialize(add_layer<T,U>& item, std::istream& in);
/*! /*!
provides serialization support provides serialization support
...@@ -555,9 +565,8 @@ namespace dlib ...@@ -555,9 +565,8 @@ namespace dlib
typedef LOSS_DETAILS loss_details_type; typedef LOSS_DETAILS loss_details_type;
typedef SUBNET subnet_type; typedef SUBNET subnet_type;
typedef typename subnet_type::input_type input_type; typedef typename subnet_type::input_type input_type;
// Note that the loss layer doesn't count as an additional layer since it doesn't const static size_t num_computational_layers = subnet_type::num_computational_layers;
// have any learnable parameters. const static size_t num_layers = subnet_type::num_layers + 1;
const static size_t num_layers = subnet_type::num_layers;
const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
// If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type. // If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type.
// Otherwise it is defined as follows: // Otherwise it is defined as follows:
...@@ -838,7 +847,7 @@ namespace dlib ...@@ -838,7 +847,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers - solvers.size() >= num_computational_layers
- 0 < step_size <= 1 - 0 < step_size <= 1
ensures ensures
- runs x through the network, compares the output to the expected output - runs x through the network, compares the output to the expected output
...@@ -870,7 +879,7 @@ namespace dlib ...@@ -870,7 +879,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers - solvers.size() >= num_computational_layers
- 0 < step_size <= 1 - 0 < step_size <= 1
ensures ensures
- runs [ibegin,iend) through the network, compares the output to the - runs [ibegin,iend) through the network, compares the output to the
...@@ -901,7 +910,7 @@ namespace dlib ...@@ -901,7 +910,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers - solvers.size() >= num_computational_layers
- 0 < step_size <= 1 - 0 < step_size <= 1
ensures ensures
- runs x through the network and updates the network parameters by - runs x through the network and updates the network parameters by
...@@ -928,7 +937,7 @@ namespace dlib ...@@ -928,7 +937,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers - solvers.size() >= num_computational_layers
- 0 < step_size <= 1 - 0 < step_size <= 1
ensures ensures
- runs [ibegin,iend) through the network and updates the network parameters - runs [ibegin,iend) through the network and updates the network parameters
...@@ -951,9 +960,15 @@ namespace dlib ...@@ -951,9 +960,15 @@ namespace dlib
!*/ !*/
}; };
template <typename T, typename U>, template <typename T, typename U>
std::ostream& operator<<(std::ostream& out, const add_loss_layer<T,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template <typename T, typename U>
void serialize(const add_loss_layer<T,U>& item, std::ostream& out); void serialize(const add_loss_layer<T,U>& item, std::ostream& out);
template <typename T, typename U>, template <typename T, typename U>
void deserialize(add_loss_layer<T,U>& item, std::istream& in); void deserialize(add_loss_layer<T,U>& item, std::istream& in);
/*! /*!
provides serialization support provides serialization support
...@@ -1013,6 +1028,7 @@ namespace dlib ...@@ -1013,6 +1028,7 @@ namespace dlib
typedef SUBNET subnet_type; typedef SUBNET subnet_type;
typedef typename SUBNET::input_type input_type; typedef typename SUBNET::input_type input_type;
const static size_t num_computational_layers = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers)*num + SUBNET::num_computational_layers;
const static size_t num_layers = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers; const static size_t num_layers = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers;
const static unsigned int sample_expansion_factor = SUBNET::sample_expansion_factor; const static unsigned int sample_expansion_factor = SUBNET::sample_expansion_factor;
typedef REPEATED_LAYER<an_unspecified_input_type> repeated_layer_type; typedef REPEATED_LAYER<an_unspecified_input_type> repeated_layer_type;
...@@ -1095,6 +1111,20 @@ namespace dlib ...@@ -1095,6 +1111,20 @@ namespace dlib
!*/ !*/
}; };
template < size_t num, template<typename> class T, typename U >
std::ostream& operator<<(std::ostream& out, const repeat<num,T,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template < size_t num, template<typename> class T, typename U >
void serialize(const repeat<num,T,U>& item, std::ostream& out);
template < size_t num, template<typename> class T, typename U >
void deserialize(repeat<num,T,U>& item, std::istream& in);
/*!
provides serialization support
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
...@@ -1124,9 +1154,15 @@ namespace dlib ...@@ -1124,9 +1154,15 @@ namespace dlib
!*/ !*/
}; };
template <unsigned long ID, typename U>, template <unsigned long ID, typename U>
std::ostream& operator<<(std::ostream& out, const add_tag_layer<ID,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template <unsigned long ID, typename U>
void serialize(const add_tag_layer<ID,U>& item, std::ostream& out); void serialize(const add_tag_layer<ID,U>& item, std::ostream& out);
template <unsigned long ID, typename U>, template <unsigned long ID, typename U>
void deserialize(add_tag_layer<ID,U>& item, std::istream& in); void deserialize(add_tag_layer<ID,U>& item, std::istream& in);
/*! /*!
provides serialization support provides serialization support
...@@ -1168,6 +1204,12 @@ namespace dlib ...@@ -1168,6 +1204,12 @@ namespace dlib
!*/ !*/
}; };
template <template<typename> class T, typename U>
std::ostream& operator<<(std::ostream& out, const add_skip_layer<T,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template <template<typename> class T, typename U> template <template<typename> class T, typename U>
void serialize(const add_skip_layer<T,U>& item, std::ostream& out); void serialize(const add_skip_layer<T,U>& item, std::ostream& out);
template <template<typename> class T, typename U> template <template<typename> class T, typename U>
...@@ -1200,9 +1242,15 @@ namespace dlib ...@@ -1200,9 +1242,15 @@ namespace dlib
requires requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer. add_tag_layer.
- i < net_type::num_layers
ensures ensures
- This function chains together i calls to n.subnet() and returns the - This function allows you to access any layer in a network by its layer index
result. So for example: i. Therefore, it will walk i steps down the network and return the layer
object there. Since networks can be big, the best way to find layer index
numbers is to print a network to the screen since the print out will include
indexes for each layer.
- In general, this function chains together i calls to n.subnet() and returns
the result. So for example:
- if (i == 0) - if (i == 0)
- returns n - returns n
- else if (i == 1) - else if (i == 1)
...@@ -1213,6 +1261,10 @@ namespace dlib ...@@ -1213,6 +1261,10 @@ namespace dlib
- returns n.subnet().subnet().subnet() - returns n.subnet().subnet().subnet()
- else - else
- etc. - etc.
Except that when it hits a repeat layer it recurses into the repeated layers
contained inside. That is, if the layer index indicates a layer in a repeat
object this function will make the appropriate call to get_repeated_layer()
and do the right thing.
!*/ !*/
template < template <
......
...@@ -119,6 +119,12 @@ namespace dlib ...@@ -119,6 +119,12 @@ namespace dlib
deserialize(item.avg_blue, in); deserialize(item.avg_blue, in);
} }
friend std::ostream& operator<<(std::ostream& out, const input_rgb_image& item)
{
out << "input_rgb_image("<<item.avg_red<<","<<item.avg_green<<","<<item.avg_blue<<")";
return out;
}
private: private:
float avg_red; float avg_red;
float avg_green; float avg_green;
...@@ -201,6 +207,12 @@ namespace dlib ...@@ -201,6 +207,12 @@ namespace dlib
throw serialization_error("Unexpected version found while deserializing dlib::input."); throw serialization_error("Unexpected version found while deserializing dlib::input.");
} }
friend std::ostream& operator<<(std::ostream& out, const input& item)
{
out << "input<matrix>";
return out;
}
}; };
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -277,6 +289,11 @@ namespace dlib ...@@ -277,6 +289,11 @@ namespace dlib
if (version != "input<array2d>") if (version != "input<array2d>")
throw serialization_error("Unexpected version found while deserializing dlib::input."); throw serialization_error("Unexpected version found while deserializing dlib::input.");
} }
friend std::ostream& operator<<(std::ostream& out, const input& item)
{
out << "input<array2d>";
return out;
}
}; };
......
...@@ -86,6 +86,11 @@ namespace dlib ...@@ -86,6 +86,11 @@ namespace dlib
!*/ !*/
}; };
std::ostream& operator<<(std::ostream& out, const EXAMPLE_INPUT_LAYER& item);
/*!
print a string describing this layer.
!*/
void serialize(const EXAMPLE_INPUT_LAYER& item, std::ostream& out); void serialize(const EXAMPLE_INPUT_LAYER& item, std::ostream& out);
void deserialize(EXAMPLE_INPUT_LAYER& item, std::istream& in); void deserialize(EXAMPLE_INPUT_LAYER& item, std::istream& in);
/*! /*!
...@@ -142,14 +147,6 @@ namespace dlib ...@@ -142,14 +147,6 @@ namespace dlib
!*/ !*/
}; };
template <typename T>
void serialize(const input<T>& item, std::ostream& out);
template <typename T>
void deserialize(input<T>& item, std::istream& in);
/*!
provides serialization support
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
class input_rgb_image class input_rgb_image
...@@ -158,8 +155,8 @@ namespace dlib ...@@ -158,8 +155,8 @@ namespace dlib
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This input layer works with RGB images of type matrix<rgb_pixel>. It is This input layer works with RGB images of type matrix<rgb_pixel>. It is
very similar to the dlib::input layer except that it allows you to subtract very similar to the dlib::input layer except that it allows you to subtract
the average color value from each color channel when convting an image to a the average color value from each color channel when converting an image to
tensor. a tensor.
!*/ !*/
public: public:
typedef matrix<rgb_pixel> input_type; typedef matrix<rgb_pixel> input_type;
...@@ -227,14 +224,11 @@ namespace dlib ...@@ -227,14 +224,11 @@ namespace dlib
- #data.nc() == C - #data.nc() == C
- #data.k() == 3 - #data.k() == 3
Moreover, each color channel is normalized by having its average value Moreover, each color channel is normalized by having its average value
subtracted (accroding to get_avg_red(), get_avg_green(), or subtracted (according to get_avg_red(), get_avg_green(), or
get_avg_blue()) and then is divided by 256.0. get_avg_blue()) and then is divided by 256.0.
!*/ !*/
}; };
void serialize(const input_rgb_image& item, std::ostream& out);
void deserialize(input_rgb_image& item, std::istream& in);
} }
#endif // DLIB_DNn_INPUT_ABSTRACT_H_ #endif // DLIB_DNn_INPUT_ABSTRACT_H_
......
...@@ -157,6 +157,20 @@ namespace dlib ...@@ -157,6 +157,20 @@ namespace dlib
if (stride_x != _stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::con_"); if (stride_x != _stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::con_");
} }
friend std::ostream& operator<<(std::ostream& out, const con_& item)
{
out << "con\t ("
<< "num_filters="<<_num_filters
<< ", nr="<<_nr
<< ", nc="<<_nc
<< ", stride_y="<<_stride_y
<< ", stride_x="<<_stride_x
<< ")";
return out;
}
private: private:
resizable_tensor params; resizable_tensor params;
...@@ -277,6 +291,18 @@ namespace dlib ...@@ -277,6 +291,18 @@ namespace dlib
if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::max_pool_"); if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::max_pool_");
} }
friend std::ostream& operator<<(std::ostream& out, const max_pool_& item)
{
out << "max_pool ("
<< "nr="<<_nr
<< ", nc="<<_nc
<< ", stride_y="<<_stride_y
<< ", stride_x="<<_stride_x
<< ")";
return out;
}
private: private:
...@@ -393,6 +419,16 @@ namespace dlib ...@@ -393,6 +419,16 @@ namespace dlib
if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::avg_pool_"); if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::avg_pool_");
} }
friend std::ostream& operator<<(std::ostream& out, const avg_pool_& item)
{
out << "avg_pool ("
<< "nr="<<_nr
<< ", nc="<<_nc
<< ", stride_y="<<_stride_y
<< ", stride_x="<<_stride_x
<< ")";
return out;
}
private: private:
tt::pooling ap; tt::pooling ap;
...@@ -557,6 +593,15 @@ namespace dlib ...@@ -557,6 +593,15 @@ namespace dlib
} }
} }
friend std::ostream& operator<<(std::ostream& out, const bn_& item)
{
if (mode == CONV_MODE)
out << "bn_con";
else
out << "bn_fc";
return out;
}
private: private:
friend class affine_; friend class affine_;
...@@ -695,6 +740,23 @@ namespace dlib ...@@ -695,6 +740,23 @@ namespace dlib
if (bias_mode != (fc_bias_mode)bmode) throw serialization_error("Wrong fc_bias_mode found while deserializing dlib::fc_"); if (bias_mode != (fc_bias_mode)bmode) throw serialization_error("Wrong fc_bias_mode found while deserializing dlib::fc_");
} }
friend std::ostream& operator<<(std::ostream& out, const fc_& item)
{
if (bias_mode == FC_HAS_BIAS)
{
out << "fc\t ("
<< "num_outputs="<<item.num_outputs
<< ")";
}
else
{
out << "fc_no_bias ("
<< "num_outputs="<<item.num_outputs
<< ")";
}
return out;
}
private: private:
unsigned long num_outputs; unsigned long num_outputs;
...@@ -793,6 +855,14 @@ namespace dlib ...@@ -793,6 +855,14 @@ namespace dlib
deserialize(item.mask, in); deserialize(item.mask, in);
} }
friend std::ostream& operator<<(std::ostream& out, const dropout_& item)
{
out << "dropout\t ("
<< "drop_rate="<<item.drop_rate
<< ")";
return out;
}
private: private:
float drop_rate; float drop_rate;
resizable_tensor mask; resizable_tensor mask;
...@@ -872,6 +942,14 @@ namespace dlib ...@@ -872,6 +942,14 @@ namespace dlib
deserialize(item.val, in); deserialize(item.val, in);
} }
friend std::ostream& operator<<(std::ostream& out, const multiply_& item)
{
out << "multiply ("
<< "val="<<item.val
<< ")";
return out;
}
private: private:
float val; float val;
resizable_tensor params; // unused resizable_tensor params; // unused
...@@ -1021,6 +1099,12 @@ namespace dlib ...@@ -1021,6 +1099,12 @@ namespace dlib
item.mode = (layer_mode)mode; item.mode = (layer_mode)mode;
} }
friend std::ostream& operator<<(std::ostream& out, const affine_& )
{
out << "affine";
return out;
}
private: private:
resizable_tensor params, empty_params; resizable_tensor params, empty_params;
alias_tensor gamma, beta; alias_tensor gamma, beta;
...@@ -1079,6 +1163,13 @@ namespace dlib ...@@ -1079,6 +1163,13 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::add_prev_."); throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::add_prev_.");
} }
friend std::ostream& operator<<(std::ostream& out, const add_prev_& item)
{
out << "add_prev";
return out;
}
private: private:
resizable_tensor params; resizable_tensor params;
}; };
...@@ -1156,6 +1247,13 @@ namespace dlib ...@@ -1156,6 +1247,13 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::relu_."); throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::relu_.");
} }
friend std::ostream& operator<<(std::ostream& out, const relu_& )
{
out << "relu";
return out;
}
private: private:
resizable_tensor params; resizable_tensor params;
}; };
...@@ -1226,6 +1324,14 @@ namespace dlib ...@@ -1226,6 +1324,14 @@ namespace dlib
deserialize(item.initial_param_value, in); deserialize(item.initial_param_value, in);
} }
friend std::ostream& operator<<(std::ostream& out, const prelu_& item)
{
out << "prelu\t ("
<< "initial_param_value="<<item.initial_param_value
<< ")";
return out;
}
private: private:
resizable_tensor params; resizable_tensor params;
float initial_param_value; float initial_param_value;
...@@ -1279,6 +1385,13 @@ namespace dlib ...@@ -1279,6 +1385,13 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::sig_."); throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::sig_.");
} }
friend std::ostream& operator<<(std::ostream& out, const sig_& )
{
out << "sig";
return out;
}
private: private:
resizable_tensor params; resizable_tensor params;
}; };
...@@ -1332,6 +1445,13 @@ namespace dlib ...@@ -1332,6 +1445,13 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::htan_."); throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::htan_.");
} }
friend std::ostream& operator<<(std::ostream& out, const htan_& )
{
out << "htan";
return out;
}
private: private:
resizable_tensor params; resizable_tensor params;
}; };
...@@ -1385,6 +1505,12 @@ namespace dlib ...@@ -1385,6 +1505,12 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::softmax_."); throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::softmax_.");
} }
friend std::ostream& operator<<(std::ostream& out, const softmax_& )
{
out << "softmax";
return out;
}
private: private:
resizable_tensor params; resizable_tensor params;
}; };
......
...@@ -306,6 +306,11 @@ namespace dlib ...@@ -306,6 +306,11 @@ namespace dlib
}; };
std::ostream& operator<<(std::ostream& out, const EXAMPLE_COMPUTATIONAL_LAYER_& item);
/*!
print a string describing this layer.
!*/
void serialize(const EXAMPLE_COMPUTATIONAL_LAYER_& item, std::ostream& out); void serialize(const EXAMPLE_COMPUTATIONAL_LAYER_& item, std::ostream& out);
void deserialize(EXAMPLE_COMPUTATIONAL_LAYER_& item, std::istream& in); void deserialize(EXAMPLE_COMPUTATIONAL_LAYER_& item, std::istream& in);
/*! /*!
...@@ -390,11 +395,6 @@ namespace dlib ...@@ -390,11 +395,6 @@ namespace dlib
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/ !*/
friend void serialize(const fc_& item, std::ostream& out);
friend void deserialize(fc_& item, std::istream& in);
/*!
provides serialization support
!*/
}; };
template < template <
...@@ -500,12 +500,6 @@ namespace dlib ...@@ -500,12 +500,6 @@ namespace dlib
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/ !*/
friend void serialize(const con_& item, std::ostream& out);
friend void deserialize(con_& item, std::istream& in);
/*!
provides serialization support
!*/
}; };
template < template <
...@@ -565,12 +559,6 @@ namespace dlib ...@@ -565,12 +559,6 @@ namespace dlib
!*/ !*/
}; };
void serialize(const dropout_& item, std::ostream& out);
void deserialize(dropout_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using dropout = add_layer<dropout_, SUBNET>; using dropout = add_layer<dropout_, SUBNET>;
...@@ -623,12 +611,6 @@ namespace dlib ...@@ -623,12 +611,6 @@ namespace dlib
!*/ !*/
}; };
void serialize(const multiply_& item, std::ostream& out);
void deserialize(multiply_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using multiply = add_layer<multiply_, SUBNET>; using multiply = add_layer<multiply_, SUBNET>;
...@@ -725,13 +707,6 @@ namespace dlib ...@@ -725,13 +707,6 @@ namespace dlib
/*! /*!
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/ !*/
friend void serialize(const bn_& item, std::ostream& out);
friend void deserialize(bn_& item, std::istream& in);
/*!
provides serialization support
!*/
}; };
template <typename SUBNET> template <typename SUBNET>
...@@ -829,12 +804,6 @@ namespace dlib ...@@ -829,12 +804,6 @@ namespace dlib
}; };
void serialize(const affine_& item, std::ostream& out);
void deserialize(affine_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using affine = add_layer<affine_, SUBNET>; using affine = add_layer<affine_, SUBNET>;
...@@ -927,12 +896,6 @@ namespace dlib ...@@ -927,12 +896,6 @@ namespace dlib
interface. Note that this layer doesn't have any parameters, so the tensor interface. Note that this layer doesn't have any parameters, so the tensor
returned by get_layer_params() is always empty. returned by get_layer_params() is always empty.
!*/ !*/
friend void serialize(const max_pool_& item, std::ostream& out);
friend void deserialize(max_pool_& item, std::istream& in);
/*!
provides serialization support
!*/
}; };
template < template <
...@@ -1034,11 +997,6 @@ namespace dlib ...@@ -1034,11 +997,6 @@ namespace dlib
returned by get_layer_params() is always empty. returned by get_layer_params() is always empty.
!*/ !*/
friend void serialize(const avg_pool_& item, std::ostream& out);
friend void deserialize(avg_pool_& item, std::istream& in);
/*!
provides serialization support
!*/
}; };
template < template <
...@@ -1080,12 +1038,6 @@ namespace dlib ...@@ -1080,12 +1038,6 @@ namespace dlib
!*/ !*/
}; };
void serialize(const relu_& item, std::ostream& out);
void deserialize(relu_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using relu = add_layer<relu_, SUBNET>; using relu = add_layer<relu_, SUBNET>;
...@@ -1137,12 +1089,6 @@ namespace dlib ...@@ -1137,12 +1089,6 @@ namespace dlib
!*/ !*/
}; };
void serialize(const prelu_& item, std::ostream& out);
void deserialize(prelu_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using prelu = add_layer<prelu_, SUBNET>; using prelu = add_layer<prelu_, SUBNET>;
...@@ -1176,12 +1122,6 @@ namespace dlib ...@@ -1176,12 +1122,6 @@ namespace dlib
!*/ !*/
}; };
void serialize(const sig_& item, std::ostream& out);
void deserialize(sig_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using sig = add_layer<sig_, SUBNET>; using sig = add_layer<sig_, SUBNET>;
...@@ -1215,12 +1155,6 @@ namespace dlib ...@@ -1215,12 +1155,6 @@ namespace dlib
!*/ !*/
}; };
void serialize(const htan_& item, std::ostream& out);
void deserialize(htan_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using htan = add_layer<htan_, SUBNET>; using htan = add_layer<htan_, SUBNET>;
...@@ -1262,12 +1196,6 @@ namespace dlib ...@@ -1262,12 +1196,6 @@ namespace dlib
!*/ !*/
}; };
void serialize(const softmax_& item, std::ostream& out);
void deserialize(softmax_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using softmax = add_layer<softmax_, SUBNET>; using softmax = add_layer<softmax_, SUBNET>;
...@@ -1307,11 +1235,6 @@ namespace dlib ...@@ -1307,11 +1235,6 @@ namespace dlib
!*/ !*/
}; };
void serialize(const add_prev_& item, std::ostream& out);
void deserialize(add_prev_& item, std::istream& in);
/*!
provides serialization support
!*/
template < template <
template<typename> class tag, template<typename> class tag,
......
...@@ -100,6 +100,12 @@ namespace dlib ...@@ -100,6 +100,12 @@ namespace dlib
throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_hinge_."); throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_hinge_.");
} }
friend std::ostream& operator<<(std::ostream& out, const loss_binary_hinge_& )
{
out << "loss_binary_hinge";
return out;
}
}; };
template <typename SUBNET> template <typename SUBNET>
...@@ -203,6 +209,12 @@ namespace dlib ...@@ -203,6 +209,12 @@ namespace dlib
throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_log_."); throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_log_.");
} }
friend std::ostream& operator<<(std::ostream& out, const loss_binary_log_& )
{
out << "loss_binary_log";
return out;
}
}; };
template <typename SUBNET> template <typename SUBNET>
...@@ -307,6 +319,13 @@ namespace dlib ...@@ -307,6 +319,13 @@ namespace dlib
throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_."); throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_.");
} }
friend std::ostream& operator<<(std::ostream& out, const loss_multiclass_log_& )
{
out << "loss_multiclass_log";
return out;
}
}; };
template <typename SUBNET> template <typename SUBNET>
......
...@@ -125,6 +125,11 @@ namespace dlib ...@@ -125,6 +125,11 @@ namespace dlib
!*/ !*/
}; };
std::ostream& operator<<(std::ostream& out, const EXAMPLE_LOSS_LAYER_& item);
/*!
print a string describing this layer.
!*/
void serialize(const EXAMPLE_LOSS_LAYER_& item, std::ostream& out); void serialize(const EXAMPLE_LOSS_LAYER_& item, std::ostream& out);
void deserialize(EXAMPLE_LOSS_LAYER_& item, std::istream& in); void deserialize(EXAMPLE_LOSS_LAYER_& item, std::istream& in);
/*! /*!
...@@ -200,12 +205,6 @@ namespace dlib ...@@ -200,12 +205,6 @@ namespace dlib
}; };
void serialize(const loss_binary_hinge_& item, std::ostream& out);
void deserialize(loss_binary_hinge_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using loss_binary_hinge = add_loss_layer<loss_binary_hinge_, SUBNET>; using loss_binary_hinge = add_loss_layer<loss_binary_hinge_, SUBNET>;
...@@ -272,12 +271,6 @@ namespace dlib ...@@ -272,12 +271,6 @@ namespace dlib
}; };
void serialize(const loss_binary_log_& item, std::ostream& out);
void deserialize(loss_binary_log_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using loss_binary_log = add_loss_layer<loss_binary_log_, SUBNET>; using loss_binary_log = add_loss_layer<loss_binary_log_, SUBNET>;
...@@ -346,12 +339,6 @@ namespace dlib ...@@ -346,12 +339,6 @@ namespace dlib
}; };
void serialize(const loss_multiclass_log_& item, std::ostream& out);
void deserialize(loss_multiclass_log_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET> template <typename SUBNET>
using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>; using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>;
......
...@@ -37,12 +37,12 @@ namespace dlib ...@@ -37,12 +37,12 @@ namespace dlib
typedef typename net_type::label_type label_type; typedef typename net_type::label_type label_type;
typedef typename net_type::input_type input_type; typedef typename net_type::input_type input_type;
const static size_t num_layers = net_type::num_layers; const static size_t num_computational_layers = net_type::num_computational_layers;
dnn_trainer() = delete; dnn_trainer() = delete;
dnn_trainer(const dnn_trainer&) = delete; dnn_trainer(const dnn_trainer&) = delete;
explicit dnn_trainer(net_type& net_) : job_pipe(0), net(net_), solvers(num_layers) explicit dnn_trainer(net_type& net_) : job_pipe(0), net(net_), solvers(num_computational_layers)
{ {
init(); init();
} }
...@@ -50,7 +50,7 @@ namespace dlib ...@@ -50,7 +50,7 @@ namespace dlib
dnn_trainer( dnn_trainer(
net_type& net_, net_type& net_,
const solver_type& solver_ const solver_type& solver_
) : job_pipe(0), net(net_), solvers(num_layers, solver_) ) : job_pipe(0), net(net_), solvers(num_computational_layers, solver_)
{ {
init(); init();
} }
...@@ -75,7 +75,7 @@ namespace dlib ...@@ -75,7 +75,7 @@ namespace dlib
) )
{ {
wait_for_thread_to_pause(); wait_for_thread_to_pause();
solvers = std::vector<solver_type>(num_layers, solver_); solvers = std::vector<solver_type>(num_computational_layers, solver_);
} }
unsigned long get_mini_batch_size ( unsigned long get_mini_batch_size (
...@@ -504,7 +504,7 @@ namespace dlib ...@@ -504,7 +504,7 @@ namespace dlib
int version = 5; int version = 5;
serialize(version, out); serialize(version, out);
size_t nl = dnn_trainer::num_layers; size_t nl = dnn_trainer::num_computational_layers;
serialize(nl, out); serialize(nl, out);
serialize(item.rs, out); serialize(item.rs, out);
serialize(item.previous_loss_values, out); serialize(item.previous_loss_values, out);
...@@ -530,14 +530,14 @@ namespace dlib ...@@ -530,14 +530,14 @@ namespace dlib
if (version != 5) if (version != 5)
throw serialization_error("Unexpected version found while deserializing dlib::dnn_trainer."); throw serialization_error("Unexpected version found while deserializing dlib::dnn_trainer.");
size_t num_layers = 0; size_t num_computational_layers = 0;
deserialize(num_layers, in); deserialize(num_computational_layers, in);
if (num_layers != dnn_trainer::num_layers) if (num_computational_layers != dnn_trainer::num_computational_layers)
{ {
std::ostringstream sout; std::ostringstream sout;
sout << "Error deserializing dlib::dnn_trainer. The saved sync file is for a network with " << std::endl; sout << "Error deserializing dlib::dnn_trainer. The saved sync file is for a network with " << std::endl;
sout << "a different number of layers. We expected the number of layers to be " << dnn_trainer::num_layers << " but" << std::endl; sout << "a different number of layers. We expected the number of layers to be " << dnn_trainer::num_computational_layers << " but" << std::endl;
sout << "instead the file contains " << num_layers << " layers." << std::endl; sout << "instead the file contains " << num_computational_layers << " computational layers." << std::endl;
throw serialization_error(sout.str()); throw serialization_error(sout.str());
} }
......
...@@ -44,7 +44,7 @@ namespace dlib ...@@ -44,7 +44,7 @@ namespace dlib
typedef typename net_type::label_type label_type; typedef typename net_type::label_type label_type;
typedef typename net_type::input_type input_type; typedef typename net_type::input_type input_type;
const static size_t num_layers = net_type::num_layers; const static size_t num_computational_layers = net_type::num_computational_layers;
dnn_trainer() = delete; dnn_trainer() = delete;
dnn_trainer(const dnn_trainer&) = delete; dnn_trainer(const dnn_trainer&) = delete;
...@@ -110,9 +110,9 @@ namespace dlib ...@@ -110,9 +110,9 @@ namespace dlib
get_solvers()[0], the second layer's solver is get_solvers()[0], the second layer's solver is
get_solvers()[1], and so on. get_solvers()[1], and so on.
- It should be noted that you should never change the number of elements in - It should be noted that you should never change the number of elements in
the vector returned by get_solvers() (i.e. don't do something that the vector returned by get_solvers() (i.e. don't do something that changes
changes get_solvers().size()). It will be set to net_type::num_layers by get_solvers().size()). It will be set to net_type::num_computational_layers
this object and you should leave it at that. The non-const version of by this object and you should leave it at that. The non-const version of
get_solvers() is provided only so you can tweak the parameters of a get_solvers() is provided only so you can tweak the parameters of a
particular solver. particular solver.
!*/ !*/
......
...@@ -1190,8 +1190,10 @@ namespace ...@@ -1190,8 +1190,10 @@ namespace
net_type net; net_type net;
net_type net2(num_fc_outputs(4)); net_type net2(num_fc_outputs(4));
DLIB_TEST(layer<tag1>(net).num_layers == 8); DLIB_TEST(layer<tag1>(net).num_computational_layers == 8);
DLIB_TEST(layer<skip1>(net).num_layers == 8+3+3); DLIB_TEST(layer<skip1>(net).num_computational_layers == 8+3+3);
DLIB_TEST(layer<tag1>(net).num_layers == 10);
DLIB_TEST(layer<skip1>(net).num_layers == 10+3+3+1);
DLIB_TEST(&layer<skip1>(net).get_output() == &layer<tag1>(net).get_output()); DLIB_TEST(&layer<skip1>(net).get_output() == &layer<tag1>(net).get_output());
DLIB_TEST(&layer<skip1>(net).get_output() != &layer<tag1>(net).subnet().subnet().get_output()); DLIB_TEST(&layer<skip1>(net).get_output() != &layer<tag1>(net).subnet().subnet().get_output());
DLIB_TEST(net.subnet().subnet().subnet().layer_details().get_num_outputs() == 10); DLIB_TEST(net.subnet().subnet().subnet().layer_details().get_num_outputs() == 10);
......
...@@ -122,7 +122,7 @@ int main(int argc, char** argv) try ...@@ -122,7 +122,7 @@ int main(int argc, char** argv) try
// prelu layers have a floating point parameter. If you want to set it to // prelu layers have a floating point parameter. If you want to set it to
// something other than its default value you can do so like this: // something other than its default value you can do so like this:
net_type2 pnet(prelu_(0.2), net_type2 pnet(prelu_(0.2),
prelu_(0.2), prelu_(0.25),
repeat_group(prelu_(0.3),prelu_(0.4)) // Initialize all the prelu instances in the repeat repeat_group(prelu_(0.3),prelu_(0.4)) // Initialize all the prelu instances in the repeat
// layer. repeat_group() is needed to group the // layer. repeat_group() is needed to group the
// things that are part of repeat's block. // things that are part of repeat's block.
...@@ -132,59 +132,72 @@ int main(int argc, char** argv) try ...@@ -132,59 +132,72 @@ int main(int argc, char** argv) try
// order the layers are defined, but it will skip layers where the // order the layers are defined, but it will skip layers where the
// assignment doesn't make sense. // assignment doesn't make sense.
// The API shown above lets you modify layers at construction time. But // Now let's print the details of the pnet to the screen and inspect it.
// what about after that? There are a number of ways to access layers cout << "The pnet has " << pnet.num_layers << " layers in it." << endl;
// inside a net object. cout << pnet << endl;
// These print statements will output this (I've truncated it since it's
// You can access sub layers of the network like this to get their output // long, but you get the idea):
// tensors. The following 3 statements are all equivalent and access the /*
// same layer's output. The pnet has 125 layers in it.
pnet.subnet().subnet().subnet().get_output(); layer<0> loss_multiclass_log
layer<1> fc (num_outputs=10)
layer<2> avg_pool (nr=11, nc=11, stride_y=11, _stride_x=11)
layer<3> prelu (initial_param_value=0.2)
layer<4> add_prev
layer<5> bn_con
layer<6> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1)
layer<7> prelu (initial_param_value=0.25)
layer<8> bn_con
layer<9> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1)
layer<10> tag1
...
layer<33> con (num_filters=8, nr=3, nc=3, stride_y=2, stride_x=2)
layer<34> tag1
layer<35> tag4
layer<36> prelu (initial_param_value=0.3)
layer<37> add_prev
layer<38> bn_con
...
layer<114> con (num_filters=8, nr=3, nc=3, stride_y=2, stride_x=2)
layer<115> tag1
layer<116> relu
layer<117> add_prev
layer<118> bn_con
layer<119> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1)
layer<120> relu
layer<121> bn_con
layer<122> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1)
layer<123> tag1
layer<124> input<matrix>
*/
// Now that we know the index numbers for each layer, we can access them
// individually using layer<index>(pnet). For example, to access the output
// tensor for the first prelu layer we can say:
layer<3>(pnet).get_output(); layer<3>(pnet).get_output();
layer<prelu>(pnet).get_output(); // Or to print the prelu parameter for layer 7 we can say:
// Similarly, to get access to the prelu_ object that defines the layer's cout << "prelu param: "<< layer<7>(pnet).layer_details().get_initial_param_value() << endl;
// behavior we can say:
pnet.subnet().subnet().subnet().layer_details(); // We can also access layers by their type. This next statement finds the
// or // first tag1 layer in pnet, and is therefore equivalent to calling
layer<prelu>(pnet).layer_details(); // layer<10>(pnet):
// So for example, to print the prelu parameter:
cout << "first prelu layer's initial param value: "
<< pnet.subnet().subnet().subnet().layer_details().get_initial_param_value() << endl;
// From this it should be clear that layer() is a general tool for accessing
// sub layers. It makes repeated calls to subnet() so you don't have to.
// One of it's most important uses is to access tagged layers. For example,
// to access the first tag1 layer we can say:
layer<tag1>(pnet); layer<tag1>(pnet);
// To further illustrate the use of layer(), let's loop over the repeated // The tag layers don't do anything at all and exist simply so you can tag
// prelu layers and print out their parameters. But first, let's grab a // parts of your network and access them by layer<tag>(). You can also
// reference to the repeat layer. Since we tagged the repeat layer we can // index relative to a tag. So for example, to access the layer immediately
// access it using the layer() method. layer<tag4>(pnet) returns the tag4 // after tag4 you can say:
// layer, but we want the repeat layer right after it so we can give an layer<tag4,1>(pnet); // Equivalent to layer<35+1>(pnet).
// integer as the second argument and it will jump that many layers down the
// network. In our case we need to jump just 1 layer down to get to repeat. // Or to access the layer 2 layers after tag4:
auto&& repeat_layer = layer<tag4,1>(pnet); layer<tag4,2>(pnet);
for (size_t i = 0; i < repeat_layer.num_repetitions(); ++i) // Tagging is a very useful tool for making complex network structures. For
{ // example, the add_prev1 layer is implemented internally by using a call to
// The repeat layer just instantiates the network block a bunch of // layer<tag1>().
// times. get_repeated_layer() allows us to grab each of these
// instances.
auto&& repeated_layer = repeat_layer.get_repeated_layer(i);
// Now that we have the i-th layer inside our repeat layer we can look
// at its properties. Recall that we repeated the "pres" network block,
// which is itself a network with a bunch of layers. So we can again
// use layer() to jump to the prelu layers we are interested in like so:
prelu_ prelu1 = layer<prelu>(repeated_layer).layer_details();
prelu_ prelu2 = layer<prelu>(repeated_layer.subnet()).layer_details();
cout << "first prelu layer parameter value: "<< prelu1.get_initial_param_value() << endl;;
cout << "second prelu layer parameter value: "<< prelu2.get_initial_param_value() << endl;;
}
// Ok, so that's enough talk about defining networks. Let's talk about // Ok, that's enough talk about defining and inspecting networks. Let's
// training networks! // talk about training networks!
// The dnn_trainer will use SGD by default, but you can tell it to use // The dnn_trainer will use SGD by default, but you can tell it to use
// different solvers like adam. // different solvers like adam.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment