diff --git a/dlib/dnn/core.h b/dlib/dnn/core.h index 14eabf1b40425045ee928603c7c9b3c85d33be18..59439292cc81d9cb36d09567de0da7ac2d745e1f 100644 --- a/dlib/dnn/core.h +++ b/dlib/dnn/core.h @@ -17,12 +17,48 @@ #include <cmath> #include <vector> #include "tensor_tools.h" +#include <type_traits> namespace dlib { +// ---------------------------------------------------------------------------------------- + + namespace impl + { + class repeat_input_layer + { + /*! + None of the declarations in this object are really used. The only reason it + exists is to allow the repeat object to use a special input layer in its + internal networks which will cause add_tag_layer objects that happen to be + right at the input to not create copies of their input tensors. So + introducing the repeat_input_layer object allows us to optimize the + implementation of add_tag_layer for a special case that arises when it's + used in the context of the repeat layer. + !*/ + public: + typedef int input_type; + const static unsigned int sample_expansion_factor = 1; + + template <typename input_iterator> + void to_tensor ( + input_iterator , + input_iterator , + resizable_tensor& + ) const + { + DLIB_CASSERT(false,"This function should never be called"); + } + + friend void serialize(const repeat_input_layer&, std::ostream&){} + friend void deserialize(repeat_input_layer&, std::istream&){} + friend std::ostream& operator<<(std::ostream& out, const repeat_input_layer&) { out << "FUCK"; return out; } + }; + } + // ---------------------------------------------------------------------------------------- inline double log1pexp(double x) @@ -459,7 +495,7 @@ namespace dlib subnet_wrapper& operator=(const subnet_wrapper&) = delete; typedef T wrapped_type; - const static size_t num_layers = T::num_layers; + const static size_t num_computational_layers = T::num_computational_layers; subnet_wrapper(T& l_) : l(l_),subnetwork(l.subnet()) {} @@ -483,7 +519,7 @@ namespace dlib subnet_wrapper& operator=(const subnet_wrapper&) = delete; typedef T wrapped_type; - const static size_t num_layers = T::num_layers; + const static size_t num_computational_layers = T::num_computational_layers; subnet_wrapper(T& l_) : l(l_),subnetwork(l.subnet()) {} @@ -516,6 +552,7 @@ namespace dlib typedef SUBNET subnet_type; typedef typename subnet_type::input_type input_type; const static size_t num_layers = subnet_type::num_layers + 1; + const static size_t num_computational_layers = subnet_type::num_computational_layers + 1; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; add_layer( @@ -776,7 +813,7 @@ namespace dlib template <typename solver_type> void update(const tensor& x, const tensor& gradient_input, sstack<solver_type> solvers, double step_size) { - DLIB_CASSERT(solvers.size()>=num_layers,""); + DLIB_CASSERT(solvers.size()>=num_computational_layers,""); dimpl::subnet_wrapper<subnet_type> wsub(*subnetwork); params_grad.copy_size(details.get_layer_params()); impl::call_layer_backward(details, private_get_output(), @@ -836,6 +873,18 @@ namespace dlib deserialize(item.cached_output, in); } + friend std::ostream& operator<< (std::ostream& out, const add_layer& item) + { + item.print(out, 0); + return out; + } + + void print (std::ostream& out, unsigned long idx=0) const + { + out << "layer<" << idx << ">\t" << layer_details() << "\n"; + subnet().print(out, idx+1); + } + private: bool this_layer_operates_inplace( @@ -895,7 +944,8 @@ namespace dlib typedef INPUT_LAYER subnet_type; typedef typename INPUT_LAYER::input_type input_type; const static unsigned int sample_expansion_factor = INPUT_LAYER::sample_expansion_factor; - const static size_t num_layers = 1; + const static size_t num_layers = 2; + const static size_t num_computational_layers = 1; static_assert(sample_expansion_factor >= 1, "The input layer can't produce fewer output tensors than there are inputs."); @@ -1077,7 +1127,7 @@ namespace dlib template <typename solver_type> void update(const tensor& x, const tensor& gradient_input, sstack<solver_type> solvers, double step_size) { - DLIB_CASSERT(solvers.size()>=num_layers,""); + DLIB_CASSERT(solvers.size()>=num_computational_layers,""); // make sure grad_final is initialized to 0 if (!have_same_dimensions(x, grad_final)) grad_final.copy_size(x); @@ -1143,6 +1193,21 @@ namespace dlib deserialize(item.grad_final, in); } + friend std::ostream& operator<< (std::ostream& out, const add_layer& item) + { + item.print(out, 0); + return out; + } + + void print (std::ostream& out, unsigned long idx=0) const + { + out << "layer<" << idx << ">\t" << layer_details() << "\n"; + // Don't print the repeat_input_layer since it doesn't exist from the user's + // point of view. It's just an artifact of how repeat<> works. + if (!std::is_same<subnet_type, impl::repeat_input_layer>::value) + out << "layer<" << idx+1 << ">\t" << subnet() << "\n"; + } + private: bool this_layer_requires_forward_output( @@ -1217,7 +1282,8 @@ namespace dlib public: typedef SUBNET subnet_type; typedef typename subnet_type::input_type input_type; - const static size_t num_layers = subnet_type::num_layers; + const static size_t num_layers = subnet_type::num_layers + 1; + const static size_t num_computational_layers = subnet_type::num_computational_layers; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; static_assert(sample_expansion_factor >= 1, "The input layer can't produce fewer output tensors than there are inputs."); @@ -1317,6 +1383,18 @@ namespace dlib deserialize(item.subnetwork, in); } + friend std::ostream& operator<< (std::ostream& out, const add_tag_layer& item) + { + item.print(out, 0); + return out; + } + + void print (std::ostream& out, unsigned long idx=0) const + { + out << "layer<" << idx << ">\ttag" << ID << "\n"; + subnet().print(out, idx+1); + } + private: template <typename T, typename U, typename E> @@ -1356,38 +1434,6 @@ namespace dlib // ---------------------------------------------------------------------------------------- - namespace impl - { - class repeat_input_layer - { - /*! - None of the declarations in this object are really used. The only reason it - exists is to allow the repeat object to use a special input layer in its - internal networks which will cause add_tag_layer objects that happen to be - right at the input to not create copies of their input tensors. So - introducing the repeat_input_layer object allows us to optimize the - implementation of add_tag_layer for a special case that arises when it's - used in the context of the repeat layer. - !*/ - public: - typedef int input_type; - const static unsigned int sample_expansion_factor = 1; - - template <typename input_iterator> - void to_tensor ( - input_iterator , - input_iterator , - resizable_tensor& - ) const - { - DLIB_CASSERT(false,"This function should never be called"); - } - - friend void serialize(const repeat_input_layer&, std::ostream&){} - friend void deserialize(repeat_input_layer&, std::istream&){} - }; - } - template <typename ...T> struct decorator_repeat_group { @@ -1416,7 +1462,14 @@ namespace dlib public: typedef SUBNET subnet_type; typedef typename SUBNET::input_type input_type; - const static size_t num_layers = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers; + const static size_t comp_layers_in_each_group = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers); + const static size_t comp_layers_in_repeated_group = comp_layers_in_each_group*num; + const static size_t num_computational_layers = comp_layers_in_repeated_group + SUBNET::num_computational_layers; + + const static size_t layers_in_each_group = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers); + const static size_t layers_in_repeated_group = layers_in_each_group*num; + const static size_t num_layers = subnet_type::num_layers + layers_in_repeated_group; + const static unsigned int sample_expansion_factor = SUBNET::sample_expansion_factor; typedef REPEATED_LAYER<impl::repeat_input_layer> repeated_layer_type; @@ -1554,7 +1607,7 @@ namespace dlib template <typename solver_type> void update(const tensor& x, const tensor& gradient_input, sstack<solver_type> solvers, double step_size) { - const auto cnt = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers); + const auto cnt = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers); if (details.size() > 1) { details[0].update(details[1].get_output(), gradient_input, solvers,step_size); @@ -1602,8 +1655,24 @@ namespace dlib deserialize(item.subnetwork, in); } + friend std::ostream& operator<< (std::ostream& out, const repeat& item) + { + item.print(out, 0); + return out; + } + + void print (std::ostream& out, unsigned long idx=0) const + { + for (size_t i = 0; i < num_repetitions(); ++i) + { + get_repeated_layer(i).print(out, idx); + idx += layers_in_each_group; + } + subnet().print(out, idx); + } private: + template <typename T, typename U, typename E> friend class add_layer; template <typename T, bool is_first, typename E> @@ -1653,7 +1722,10 @@ namespace dlib public: typedef INPUT_LAYER subnet_type; typedef typename subnet_type::input_type input_type; - const static size_t num_layers = 1; + // This layer counts as a computational layer because it copies and stores the + // inputs. + const static size_t num_computational_layers = 1; + const static size_t num_layers = 2; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; static_assert(sample_expansion_factor >= 1, "The input layer can't produce fewer output tensors than there are inputs."); @@ -1809,6 +1881,21 @@ namespace dlib item.cached_output_ptr = nullptr; } + friend std::ostream& operator<< (std::ostream& out, const add_tag_layer& item) + { + item.print(out, 0); + return out; + } + + void print (std::ostream& out, unsigned long idx=0) const + { + out << "layer<"<<idx << ">\ttag" << ID << "\n"; + // Don't print the repeat_input_layer since it doesn't exist from the user's + // point of view. It's just an artifact of how repeat<> works. + if (!std::is_same<subnet_type, impl::repeat_input_layer>::value) + out << "layer<"<< idx+1 << ">\t" << subnet() << "\n"; + } + private: template <typename T, typename U, typename E> @@ -1905,8 +1992,9 @@ namespace dlib typedef LOSS_DETAILS loss_details_type; typedef SUBNET subnet_type; typedef typename subnet_type::input_type input_type; - // Note that the loss layer doesn't count as an additional layer. - const static size_t num_layers = subnet_type::num_layers; + const static size_t num_layers = subnet_type::num_layers + 1; + // Note that the loss layer doesn't count as an additional computational layer. + const static size_t num_computational_layers = subnet_type::num_computational_layers; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; typedef typename get_loss_layer_label_type<LOSS_DETAILS>::type label_type; @@ -2137,8 +2225,21 @@ namespace dlib deserialize(item.subnetwork, in); } + friend std::ostream& operator<< (std::ostream& out, const add_loss_layer& item) + { + item.print(out, 0); + return out; + } + + void print (std::ostream& out, unsigned long idx=0) const + { + out << "layer<" << idx << ">\t" << loss_details() << "\n"; + subnet().print(out, idx+1); + } + private: + void swap(add_loss_layer& item) { std::swap(loss, item.loss); @@ -2164,9 +2265,10 @@ namespace dlib namespace impl { - template <unsigned int i, typename T> + template <unsigned int i, typename T, typename enabled = void> struct layer_helper { + static_assert(i < T::num_layers, "Call to layer() attempted to access non-existing layer in neural network."); static T& makeT(); using next_type = typename std::remove_reference<decltype(makeT().subnet())>::type; using type = typename layer_helper<i-1,next_type>::type; @@ -2175,8 +2277,51 @@ namespace dlib return layer_helper<i-1,next_type>::layer(n.subnet()); } }; + template < + unsigned int i, + size_t N, template<typename> class L, typename S + > + struct layer_helper<i,repeat<N,L,S>, typename std::enable_if<(i!=0&&i>=repeat<N,L,S>::layers_in_repeated_group)>::type> + { + const static size_t layers_in_repeated_group = repeat<N,L,S>::layers_in_repeated_group; + + static repeat<N,L,S>& makeT(); + using next_type = typename std::remove_reference<decltype(makeT().subnet())>::type; + using type = typename layer_helper<i-layers_in_repeated_group,next_type>::type; + static type& layer(repeat<N,L,S>& n) + { + return layer_helper<i-layers_in_repeated_group,next_type>::layer(n.subnet()); + } + }; + template < + unsigned int i, + size_t N, template<typename> class L, typename S + > + struct layer_helper<i,repeat<N,L,S>, typename std::enable_if<(i!=0&&i<repeat<N,L,S>::layers_in_repeated_group)>::type> + { + const static size_t layers_in_each_group = repeat<N,L,S>::layers_in_each_group; + typedef typename repeat<N,L,S>::repeated_layer_type repeated_layer_type; + using next_type = repeated_layer_type; + using type = typename layer_helper<i%layers_in_each_group,next_type>::type; + static type& layer(repeat<N,L,S>& n) + { + return layer_helper<i%layers_in_each_group,next_type>::layer(n.get_repeated_layer(i/layers_in_each_group)); + } + }; + template < + size_t N, template<typename> class L, typename S + > + struct layer_helper<0,repeat<N,L,S>, void> + { + typedef typename repeat<N,L,S>::repeated_layer_type repeated_layer_type; + using type = repeated_layer_type; + static type& layer(repeat<N,L,S>& n) + { + return n.get_repeated_layer(0); + } + }; template <typename T> - struct layer_helper<0,T> + struct layer_helper<0,T,void> { using type = T; static type& layer(T& n) @@ -2258,7 +2403,8 @@ namespace dlib public: typedef SUBNET subnet_type; typedef typename subnet_type::input_type input_type; - const static size_t num_layers = subnet_type::num_layers; + const static size_t num_layers = subnet_type::num_layers + 1; + const static size_t num_computational_layers = subnet_type::num_computational_layers; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; static_assert(sample_expansion_factor >= 1, "The input layer can't produce fewer output tensors than there are inputs."); @@ -2374,8 +2520,21 @@ namespace dlib deserialize(item.subnetwork, in); } + friend std::ostream& operator<< (std::ostream& out, const add_skip_layer& item) + { + item.print(out, 0); + return out; + } + + void print (std::ostream& out, unsigned long idx=0) const + { + out << "layer<" << idx << ">\tskip\n"; + subnet().print(out, idx+1); + } + private: + template <typename T, typename U, typename E> friend class add_layer; template <typename T, bool is_first, typename E> diff --git a/dlib/dnn/core_abstract.h b/dlib/dnn/core_abstract.h index 27981db531c787b8d4c0d90f3a2b4ce678684ff6..43251fbf1532c4ad6be9cee3ad445a6c1386e0e0 100644 --- a/dlib/dnn/core_abstract.h +++ b/dlib/dnn/core_abstract.h @@ -205,8 +205,12 @@ namespace dlib typedef SUBNET subnet_type; typedef typename subnet_type::input_type input_type; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; - // If SUBNET is an input layer then num_layers == 1, otherwise it has the - // definition shown here: + // num_computational_layers will always give the number of layers in the network + // that transform tensors (i.e. layers defined by something that implements the + // EXAMPLE_COMPUTATIONAL_LAYER_ interface). This is all the layers except for + // loss, tag, and skip layers. + const static size_t num_computational_layers = subnet_type::num_computational_layers + 1; + // num_layers counts all the layers in the network regardless of their type. const static size_t num_layers = subnet_type::num_layers + 1; add_layer( @@ -444,7 +448,7 @@ namespace dlib - The given solvers have only ever been used with this network. That is, if you want to call update() on some other neural network object then you must NOT reuse the same solvers object. - - solvers.size() >= num_layers + - solvers.size() >= num_computational_layers - 0 < step_size <= 1 ensures - Back propagates the error gradient, get_gradient_input(), through this @@ -473,7 +477,7 @@ namespace dlib - The given solvers have only ever been used with this network. That is, if you want to call update() on some other neural network object then you must NOT reuse the same solvers object. - - solvers.size() >= num_layers + - solvers.size() >= num_computational_layers - 0 < step_size <= 1 ensures - This function is identical to the version of update() defined immediately @@ -508,9 +512,15 @@ namespace dlib }; - template <typename T, typename U>, + template <typename T, typename U> + std::ostream& operator<<(std::ostream& out, const add_layer<T,U>& item); + /*! + prints the network architecture to the given output stream. + !*/ + + template <typename T, typename U> void serialize(const add_layer<T,U>& item, std::ostream& out); - template <typename T, typename U>, + template <typename T, typename U> void deserialize(add_layer<T,U>& item, std::istream& in); /*! provides serialization support @@ -555,9 +565,8 @@ namespace dlib typedef LOSS_DETAILS loss_details_type; typedef SUBNET subnet_type; typedef typename subnet_type::input_type input_type; - // Note that the loss layer doesn't count as an additional layer since it doesn't - // have any learnable parameters. - const static size_t num_layers = subnet_type::num_layers; + const static size_t num_computational_layers = subnet_type::num_computational_layers; + const static size_t num_layers = subnet_type::num_layers + 1; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; // If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type. // Otherwise it is defined as follows: @@ -838,7 +847,7 @@ namespace dlib - The given solvers have only ever been used with this network. That is, if you want to call update() on some other neural network object then you must NOT reuse the same solvers object. - - solvers.size() >= num_layers + - solvers.size() >= num_computational_layers - 0 < step_size <= 1 ensures - runs x through the network, compares the output to the expected output @@ -870,7 +879,7 @@ namespace dlib - The given solvers have only ever been used with this network. That is, if you want to call update() on some other neural network object then you must NOT reuse the same solvers object. - - solvers.size() >= num_layers + - solvers.size() >= num_computational_layers - 0 < step_size <= 1 ensures - runs [ibegin,iend) through the network, compares the output to the @@ -901,7 +910,7 @@ namespace dlib - The given solvers have only ever been used with this network. That is, if you want to call update() on some other neural network object then you must NOT reuse the same solvers object. - - solvers.size() >= num_layers + - solvers.size() >= num_computational_layers - 0 < step_size <= 1 ensures - runs x through the network and updates the network parameters by @@ -928,7 +937,7 @@ namespace dlib - The given solvers have only ever been used with this network. That is, if you want to call update() on some other neural network object then you must NOT reuse the same solvers object. - - solvers.size() >= num_layers + - solvers.size() >= num_computational_layers - 0 < step_size <= 1 ensures - runs [ibegin,iend) through the network and updates the network parameters @@ -951,9 +960,15 @@ namespace dlib !*/ }; - template <typename T, typename U>, + template <typename T, typename U> + std::ostream& operator<<(std::ostream& out, const add_loss_layer<T,U>& item); + /*! + prints the network architecture to the given output stream. + !*/ + + template <typename T, typename U> void serialize(const add_loss_layer<T,U>& item, std::ostream& out); - template <typename T, typename U>, + template <typename T, typename U> void deserialize(add_loss_layer<T,U>& item, std::istream& in); /*! provides serialization support @@ -1013,6 +1028,7 @@ namespace dlib typedef SUBNET subnet_type; typedef typename SUBNET::input_type input_type; + const static size_t num_computational_layers = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers)*num + SUBNET::num_computational_layers; const static size_t num_layers = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers; const static unsigned int sample_expansion_factor = SUBNET::sample_expansion_factor; typedef REPEATED_LAYER<an_unspecified_input_type> repeated_layer_type; @@ -1095,6 +1111,20 @@ namespace dlib !*/ }; + template < size_t num, template<typename> class T, typename U > + std::ostream& operator<<(std::ostream& out, const repeat<num,T,U>& item); + /*! + prints the network architecture to the given output stream. + !*/ + + template < size_t num, template<typename> class T, typename U > + void serialize(const repeat<num,T,U>& item, std::ostream& out); + template < size_t num, template<typename> class T, typename U > + void deserialize(repeat<num,T,U>& item, std::istream& in); + /*! + provides serialization support + !*/ + // ---------------------------------------------------------------------------------------- template < @@ -1124,9 +1154,15 @@ namespace dlib !*/ }; - template <unsigned long ID, typename U>, + template <unsigned long ID, typename U> + std::ostream& operator<<(std::ostream& out, const add_tag_layer<ID,U>& item); + /*! + prints the network architecture to the given output stream. + !*/ + + template <unsigned long ID, typename U> void serialize(const add_tag_layer<ID,U>& item, std::ostream& out); - template <unsigned long ID, typename U>, + template <unsigned long ID, typename U> void deserialize(add_tag_layer<ID,U>& item, std::istream& in); /*! provides serialization support @@ -1168,6 +1204,12 @@ namespace dlib !*/ }; + template <template<typename> class T, typename U> + std::ostream& operator<<(std::ostream& out, const add_skip_layer<T,U>& item); + /*! + prints the network architecture to the given output stream. + !*/ + template <template<typename> class T, typename U> void serialize(const add_skip_layer<T,U>& item, std::ostream& out); template <template<typename> class T, typename U> @@ -1200,9 +1242,15 @@ namespace dlib requires - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or add_tag_layer. + - i < net_type::num_layers ensures - - This function chains together i calls to n.subnet() and returns the - result. So for example: + - This function allows you to access any layer in a network by its layer index + i. Therefore, it will walk i steps down the network and return the layer + object there. Since networks can be big, the best way to find layer index + numbers is to print a network to the screen since the print out will include + indexes for each layer. + - In general, this function chains together i calls to n.subnet() and returns + the result. So for example: - if (i == 0) - returns n - else if (i == 1) @@ -1213,6 +1261,10 @@ namespace dlib - returns n.subnet().subnet().subnet() - else - etc. + Except that when it hits a repeat layer it recurses into the repeated layers + contained inside. That is, if the layer index indicates a layer in a repeat + object this function will make the appropriate call to get_repeated_layer() + and do the right thing. !*/ template < diff --git a/dlib/dnn/input.h b/dlib/dnn/input.h index 5291f07ca23ccb770280c9a889b3530c43493633..443f334597a386bab0f11449a4384a495fd4d85d 100644 --- a/dlib/dnn/input.h +++ b/dlib/dnn/input.h @@ -119,6 +119,12 @@ namespace dlib deserialize(item.avg_blue, in); } + friend std::ostream& operator<<(std::ostream& out, const input_rgb_image& item) + { + out << "input_rgb_image("<<item.avg_red<<","<<item.avg_green<<","<<item.avg_blue<<")"; + return out; + } + private: float avg_red; float avg_green; @@ -201,6 +207,12 @@ namespace dlib throw serialization_error("Unexpected version found while deserializing dlib::input."); } + friend std::ostream& operator<<(std::ostream& out, const input& item) + { + out << "input<matrix>"; + return out; + } + }; // ---------------------------------------------------------------------------------------- @@ -277,6 +289,11 @@ namespace dlib if (version != "input<array2d>") throw serialization_error("Unexpected version found while deserializing dlib::input."); } + friend std::ostream& operator<<(std::ostream& out, const input& item) + { + out << "input<array2d>"; + return out; + } }; diff --git a/dlib/dnn/input_abstract.h b/dlib/dnn/input_abstract.h index f4154b4b216e2312df7463651bb8ca5fe87087cd..96a7aaee2f330d67036de5737fc763b18312349d 100644 --- a/dlib/dnn/input_abstract.h +++ b/dlib/dnn/input_abstract.h @@ -86,6 +86,11 @@ namespace dlib !*/ }; + std::ostream& operator<<(std::ostream& out, const EXAMPLE_INPUT_LAYER& item); + /*! + print a string describing this layer. + !*/ + void serialize(const EXAMPLE_INPUT_LAYER& item, std::ostream& out); void deserialize(EXAMPLE_INPUT_LAYER& item, std::istream& in); /*! @@ -142,14 +147,6 @@ namespace dlib !*/ }; - template <typename T> - void serialize(const input<T>& item, std::ostream& out); - template <typename T> - void deserialize(input<T>& item, std::istream& in); - /*! - provides serialization support - !*/ - // ---------------------------------------------------------------------------------------- class input_rgb_image @@ -158,8 +155,8 @@ namespace dlib WHAT THIS OBJECT REPRESENTS This input layer works with RGB images of type matrix<rgb_pixel>. It is very similar to the dlib::input layer except that it allows you to subtract - the average color value from each color channel when convting an image to a - tensor. + the average color value from each color channel when converting an image to + a tensor. !*/ public: typedef matrix<rgb_pixel> input_type; @@ -227,14 +224,11 @@ namespace dlib - #data.nc() == C - #data.k() == 3 Moreover, each color channel is normalized by having its average value - subtracted (accroding to get_avg_red(), get_avg_green(), or + subtracted (according to get_avg_red(), get_avg_green(), or get_avg_blue()) and then is divided by 256.0. !*/ }; - void serialize(const input_rgb_image& item, std::ostream& out); - void deserialize(input_rgb_image& item, std::istream& in); - } #endif // DLIB_DNn_INPUT_ABSTRACT_H_ diff --git a/dlib/dnn/layers.h b/dlib/dnn/layers.h index 62c71cdc7f00c0ea2f17c9072f990afc3a24e16f..b5b7bf8c65dbb7f10546e67109e3b03343b43832 100644 --- a/dlib/dnn/layers.h +++ b/dlib/dnn/layers.h @@ -157,6 +157,20 @@ namespace dlib if (stride_x != _stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::con_"); } + + friend std::ostream& operator<<(std::ostream& out, const con_& item) + { + out << "con\t (" + << "num_filters="<<_num_filters + << ", nr="<<_nr + << ", nc="<<_nc + << ", stride_y="<<_stride_y + << ", stride_x="<<_stride_x + << ")"; + return out; + } + + private: resizable_tensor params; @@ -277,6 +291,18 @@ namespace dlib if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::max_pool_"); } + friend std::ostream& operator<<(std::ostream& out, const max_pool_& item) + { + out << "max_pool (" + << "nr="<<_nr + << ", nc="<<_nc + << ", stride_y="<<_stride_y + << ", stride_x="<<_stride_x + << ")"; + return out; + } + + private: @@ -393,6 +419,16 @@ namespace dlib if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::avg_pool_"); } + friend std::ostream& operator<<(std::ostream& out, const avg_pool_& item) + { + out << "avg_pool (" + << "nr="<<_nr + << ", nc="<<_nc + << ", stride_y="<<_stride_y + << ", stride_x="<<_stride_x + << ")"; + return out; + } private: tt::pooling ap; @@ -557,6 +593,15 @@ namespace dlib } } + friend std::ostream& operator<<(std::ostream& out, const bn_& item) + { + if (mode == CONV_MODE) + out << "bn_con"; + else + out << "bn_fc"; + return out; + } + private: friend class affine_; @@ -695,6 +740,23 @@ namespace dlib if (bias_mode != (fc_bias_mode)bmode) throw serialization_error("Wrong fc_bias_mode found while deserializing dlib::fc_"); } + friend std::ostream& operator<<(std::ostream& out, const fc_& item) + { + if (bias_mode == FC_HAS_BIAS) + { + out << "fc\t (" + << "num_outputs="<<item.num_outputs + << ")"; + } + else + { + out << "fc_no_bias (" + << "num_outputs="<<item.num_outputs + << ")"; + } + return out; + } + private: unsigned long num_outputs; @@ -793,6 +855,14 @@ namespace dlib deserialize(item.mask, in); } + friend std::ostream& operator<<(std::ostream& out, const dropout_& item) + { + out << "dropout\t (" + << "drop_rate="<<item.drop_rate + << ")"; + return out; + } + private: float drop_rate; resizable_tensor mask; @@ -872,6 +942,14 @@ namespace dlib deserialize(item.val, in); } + friend std::ostream& operator<<(std::ostream& out, const multiply_& item) + { + out << "multiply (" + << "val="<<item.val + << ")"; + return out; + } + private: float val; resizable_tensor params; // unused @@ -1021,6 +1099,12 @@ namespace dlib item.mode = (layer_mode)mode; } + friend std::ostream& operator<<(std::ostream& out, const affine_& ) + { + out << "affine"; + return out; + } + private: resizable_tensor params, empty_params; alias_tensor gamma, beta; @@ -1079,6 +1163,13 @@ namespace dlib throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::add_prev_."); } + friend std::ostream& operator<<(std::ostream& out, const add_prev_& item) + { + out << "add_prev"; + return out; + } + + private: resizable_tensor params; }; @@ -1156,6 +1247,13 @@ namespace dlib throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::relu_."); } + friend std::ostream& operator<<(std::ostream& out, const relu_& ) + { + out << "relu"; + return out; + } + + private: resizable_tensor params; }; @@ -1226,6 +1324,14 @@ namespace dlib deserialize(item.initial_param_value, in); } + friend std::ostream& operator<<(std::ostream& out, const prelu_& item) + { + out << "prelu\t (" + << "initial_param_value="<<item.initial_param_value + << ")"; + return out; + } + private: resizable_tensor params; float initial_param_value; @@ -1279,6 +1385,13 @@ namespace dlib throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::sig_."); } + friend std::ostream& operator<<(std::ostream& out, const sig_& ) + { + out << "sig"; + return out; + } + + private: resizable_tensor params; }; @@ -1332,6 +1445,13 @@ namespace dlib throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::htan_."); } + friend std::ostream& operator<<(std::ostream& out, const htan_& ) + { + out << "htan"; + return out; + } + + private: resizable_tensor params; }; @@ -1385,6 +1505,12 @@ namespace dlib throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::softmax_."); } + friend std::ostream& operator<<(std::ostream& out, const softmax_& ) + { + out << "softmax"; + return out; + } + private: resizable_tensor params; }; diff --git a/dlib/dnn/layers_abstract.h b/dlib/dnn/layers_abstract.h index 4c2ebfdfcd12486c29e57972c6f2ad410e316507..4a46a2759fe835b92e1aa6a7f8d8abc714d17d4e 100644 --- a/dlib/dnn/layers_abstract.h +++ b/dlib/dnn/layers_abstract.h @@ -306,6 +306,11 @@ namespace dlib }; + std::ostream& operator<<(std::ostream& out, const EXAMPLE_COMPUTATIONAL_LAYER_& item); + /*! + print a string describing this layer. + !*/ + void serialize(const EXAMPLE_COMPUTATIONAL_LAYER_& item, std::ostream& out); void deserialize(EXAMPLE_COMPUTATIONAL_LAYER_& item, std::istream& in); /*! @@ -390,11 +395,6 @@ namespace dlib These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. !*/ - friend void serialize(const fc_& item, std::ostream& out); - friend void deserialize(fc_& item, std::istream& in); - /*! - provides serialization support - !*/ }; template < @@ -500,12 +500,6 @@ namespace dlib These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. !*/ - friend void serialize(const con_& item, std::ostream& out); - friend void deserialize(con_& item, std::istream& in); - /*! - provides serialization support - !*/ - }; template < @@ -565,12 +559,6 @@ namespace dlib !*/ }; - void serialize(const dropout_& item, std::ostream& out); - void deserialize(dropout_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using dropout = add_layer<dropout_, SUBNET>; @@ -623,12 +611,6 @@ namespace dlib !*/ }; - void serialize(const multiply_& item, std::ostream& out); - void deserialize(multiply_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using multiply = add_layer<multiply_, SUBNET>; @@ -725,13 +707,6 @@ namespace dlib /*! These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. !*/ - - friend void serialize(const bn_& item, std::ostream& out); - friend void deserialize(bn_& item, std::istream& in); - /*! - provides serialization support - !*/ - }; template <typename SUBNET> @@ -829,12 +804,6 @@ namespace dlib }; - void serialize(const affine_& item, std::ostream& out); - void deserialize(affine_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using affine = add_layer<affine_, SUBNET>; @@ -927,12 +896,6 @@ namespace dlib interface. Note that this layer doesn't have any parameters, so the tensor returned by get_layer_params() is always empty. !*/ - - friend void serialize(const max_pool_& item, std::ostream& out); - friend void deserialize(max_pool_& item, std::istream& in); - /*! - provides serialization support - !*/ }; template < @@ -1034,11 +997,6 @@ namespace dlib returned by get_layer_params() is always empty. !*/ - friend void serialize(const avg_pool_& item, std::ostream& out); - friend void deserialize(avg_pool_& item, std::istream& in); - /*! - provides serialization support - !*/ }; template < @@ -1080,12 +1038,6 @@ namespace dlib !*/ }; - void serialize(const relu_& item, std::ostream& out); - void deserialize(relu_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using relu = add_layer<relu_, SUBNET>; @@ -1137,12 +1089,6 @@ namespace dlib !*/ }; - void serialize(const prelu_& item, std::ostream& out); - void deserialize(prelu_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using prelu = add_layer<prelu_, SUBNET>; @@ -1176,12 +1122,6 @@ namespace dlib !*/ }; - void serialize(const sig_& item, std::ostream& out); - void deserialize(sig_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using sig = add_layer<sig_, SUBNET>; @@ -1215,12 +1155,6 @@ namespace dlib !*/ }; - void serialize(const htan_& item, std::ostream& out); - void deserialize(htan_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using htan = add_layer<htan_, SUBNET>; @@ -1262,12 +1196,6 @@ namespace dlib !*/ }; - void serialize(const softmax_& item, std::ostream& out); - void deserialize(softmax_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using softmax = add_layer<softmax_, SUBNET>; @@ -1307,11 +1235,6 @@ namespace dlib !*/ }; - void serialize(const add_prev_& item, std::ostream& out); - void deserialize(add_prev_& item, std::istream& in); - /*! - provides serialization support - !*/ template < template<typename> class tag, diff --git a/dlib/dnn/loss.h b/dlib/dnn/loss.h index 18345d46deb491132be60310e218b517ec4ecfbf..64e53bbb7ec44b7c841f12ddac9a98b0a1a1e1a2 100644 --- a/dlib/dnn/loss.h +++ b/dlib/dnn/loss.h @@ -100,6 +100,12 @@ namespace dlib throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_hinge_."); } + friend std::ostream& operator<<(std::ostream& out, const loss_binary_hinge_& ) + { + out << "loss_binary_hinge"; + return out; + } + }; template <typename SUBNET> @@ -203,6 +209,12 @@ namespace dlib throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_log_."); } + friend std::ostream& operator<<(std::ostream& out, const loss_binary_log_& ) + { + out << "loss_binary_log"; + return out; + } + }; template <typename SUBNET> @@ -307,6 +319,13 @@ namespace dlib throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_."); } + friend std::ostream& operator<<(std::ostream& out, const loss_multiclass_log_& ) + { + out << "loss_multiclass_log"; + return out; + } + + }; template <typename SUBNET> diff --git a/dlib/dnn/loss_abstract.h b/dlib/dnn/loss_abstract.h index f28666f18d211c0170a78235118e453c0856a07a..ff832f0b5419fa0ad66898b24625361c4dc33f15 100644 --- a/dlib/dnn/loss_abstract.h +++ b/dlib/dnn/loss_abstract.h @@ -125,6 +125,11 @@ namespace dlib !*/ }; + std::ostream& operator<<(std::ostream& out, const EXAMPLE_LOSS_LAYER_& item); + /*! + print a string describing this layer. + !*/ + void serialize(const EXAMPLE_LOSS_LAYER_& item, std::ostream& out); void deserialize(EXAMPLE_LOSS_LAYER_& item, std::istream& in); /*! @@ -200,12 +205,6 @@ namespace dlib }; - void serialize(const loss_binary_hinge_& item, std::ostream& out); - void deserialize(loss_binary_hinge_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using loss_binary_hinge = add_loss_layer<loss_binary_hinge_, SUBNET>; @@ -272,12 +271,6 @@ namespace dlib }; - void serialize(const loss_binary_log_& item, std::ostream& out); - void deserialize(loss_binary_log_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using loss_binary_log = add_loss_layer<loss_binary_log_, SUBNET>; @@ -346,12 +339,6 @@ namespace dlib }; - void serialize(const loss_multiclass_log_& item, std::ostream& out); - void deserialize(loss_multiclass_log_& item, std::istream& in); - /*! - provides serialization support - !*/ - template <typename SUBNET> using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>; diff --git a/dlib/dnn/trainer.h b/dlib/dnn/trainer.h index 3df3562d961fabd888e4ce6d0e753c10560a4f1a..08a75826b7e705b2c1de739b3f32de0fc3c40a7c 100644 --- a/dlib/dnn/trainer.h +++ b/dlib/dnn/trainer.h @@ -37,12 +37,12 @@ namespace dlib typedef typename net_type::label_type label_type; typedef typename net_type::input_type input_type; - const static size_t num_layers = net_type::num_layers; + const static size_t num_computational_layers = net_type::num_computational_layers; dnn_trainer() = delete; dnn_trainer(const dnn_trainer&) = delete; - explicit dnn_trainer(net_type& net_) : job_pipe(0), net(net_), solvers(num_layers) + explicit dnn_trainer(net_type& net_) : job_pipe(0), net(net_), solvers(num_computational_layers) { init(); } @@ -50,7 +50,7 @@ namespace dlib dnn_trainer( net_type& net_, const solver_type& solver_ - ) : job_pipe(0), net(net_), solvers(num_layers, solver_) + ) : job_pipe(0), net(net_), solvers(num_computational_layers, solver_) { init(); } @@ -75,7 +75,7 @@ namespace dlib ) { wait_for_thread_to_pause(); - solvers = std::vector<solver_type>(num_layers, solver_); + solvers = std::vector<solver_type>(num_computational_layers, solver_); } unsigned long get_mini_batch_size ( @@ -504,7 +504,7 @@ namespace dlib int version = 5; serialize(version, out); - size_t nl = dnn_trainer::num_layers; + size_t nl = dnn_trainer::num_computational_layers; serialize(nl, out); serialize(item.rs, out); serialize(item.previous_loss_values, out); @@ -530,14 +530,14 @@ namespace dlib if (version != 5) throw serialization_error("Unexpected version found while deserializing dlib::dnn_trainer."); - size_t num_layers = 0; - deserialize(num_layers, in); - if (num_layers != dnn_trainer::num_layers) + size_t num_computational_layers = 0; + deserialize(num_computational_layers, in); + if (num_computational_layers != dnn_trainer::num_computational_layers) { std::ostringstream sout; sout << "Error deserializing dlib::dnn_trainer. The saved sync file is for a network with " << std::endl; - sout << "a different number of layers. We expected the number of layers to be " << dnn_trainer::num_layers << " but" << std::endl; - sout << "instead the file contains " << num_layers << " layers." << std::endl; + sout << "a different number of layers. We expected the number of layers to be " << dnn_trainer::num_computational_layers << " but" << std::endl; + sout << "instead the file contains " << num_computational_layers << " computational layers." << std::endl; throw serialization_error(sout.str()); } diff --git a/dlib/dnn/trainer_abstract.h b/dlib/dnn/trainer_abstract.h index 7cb924971ecb4acb12ec9a7153e948072e65ebb0..4602f01d724bf8dcd650aed4bed5834b2c230918 100644 --- a/dlib/dnn/trainer_abstract.h +++ b/dlib/dnn/trainer_abstract.h @@ -44,7 +44,7 @@ namespace dlib typedef typename net_type::label_type label_type; typedef typename net_type::input_type input_type; - const static size_t num_layers = net_type::num_layers; + const static size_t num_computational_layers = net_type::num_computational_layers; dnn_trainer() = delete; dnn_trainer(const dnn_trainer&) = delete; @@ -110,9 +110,9 @@ namespace dlib get_solvers()[0], the second layer's solver is get_solvers()[1], and so on. - It should be noted that you should never change the number of elements in - the vector returned by get_solvers() (i.e. don't do something that - changes get_solvers().size()). It will be set to net_type::num_layers by - this object and you should leave it at that. The non-const version of + the vector returned by get_solvers() (i.e. don't do something that changes + get_solvers().size()). It will be set to net_type::num_computational_layers + by this object and you should leave it at that. The non-const version of get_solvers() is provided only so you can tweak the parameters of a particular solver. !*/ diff --git a/dlib/test/dnn.cpp b/dlib/test/dnn.cpp index 5893e1160383a01f134c6dcea967452d1ba033db..3d83bb0088856099b3988b4b87c0a23d7fbbe3b4 100644 --- a/dlib/test/dnn.cpp +++ b/dlib/test/dnn.cpp @@ -1190,8 +1190,10 @@ namespace net_type net; net_type net2(num_fc_outputs(4)); - DLIB_TEST(layer<tag1>(net).num_layers == 8); - DLIB_TEST(layer<skip1>(net).num_layers == 8+3+3); + DLIB_TEST(layer<tag1>(net).num_computational_layers == 8); + DLIB_TEST(layer<skip1>(net).num_computational_layers == 8+3+3); + DLIB_TEST(layer<tag1>(net).num_layers == 10); + DLIB_TEST(layer<skip1>(net).num_layers == 10+3+3+1); DLIB_TEST(&layer<skip1>(net).get_output() == &layer<tag1>(net).get_output()); DLIB_TEST(&layer<skip1>(net).get_output() != &layer<tag1>(net).subnet().subnet().get_output()); DLIB_TEST(net.subnet().subnet().subnet().layer_details().get_num_outputs() == 10); diff --git a/examples/dnn_mnist_advanced_ex.cpp b/examples/dnn_mnist_advanced_ex.cpp index 1f7563eff065709091bab6fd8a737023a020da3f..0de2b0a34b5fd6cbaa2cde2da0feca0727ebfab0 100644 --- a/examples/dnn_mnist_advanced_ex.cpp +++ b/examples/dnn_mnist_advanced_ex.cpp @@ -122,7 +122,7 @@ int main(int argc, char** argv) try // prelu layers have a floating point parameter. If you want to set it to // something other than its default value you can do so like this: net_type2 pnet(prelu_(0.2), - prelu_(0.2), + prelu_(0.25), repeat_group(prelu_(0.3),prelu_(0.4)) // Initialize all the prelu instances in the repeat // layer. repeat_group() is needed to group the // things that are part of repeat's block. @@ -132,59 +132,72 @@ int main(int argc, char** argv) try // order the layers are defined, but it will skip layers where the // assignment doesn't make sense. - // The API shown above lets you modify layers at construction time. But - // what about after that? There are a number of ways to access layers - // inside a net object. - - // You can access sub layers of the network like this to get their output - // tensors. The following 3 statements are all equivalent and access the - // same layer's output. - pnet.subnet().subnet().subnet().get_output(); + // Now let's print the details of the pnet to the screen and inspect it. + cout << "The pnet has " << pnet.num_layers << " layers in it." << endl; + cout << pnet << endl; + // These print statements will output this (I've truncated it since it's + // long, but you get the idea): + /* + The pnet has 125 layers in it. + layer<0> loss_multiclass_log + layer<1> fc (num_outputs=10) + layer<2> avg_pool (nr=11, nc=11, stride_y=11, _stride_x=11) + layer<3> prelu (initial_param_value=0.2) + layer<4> add_prev + layer<5> bn_con + layer<6> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1) + layer<7> prelu (initial_param_value=0.25) + layer<8> bn_con + layer<9> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1) + layer<10> tag1 + ... + layer<33> con (num_filters=8, nr=3, nc=3, stride_y=2, stride_x=2) + layer<34> tag1 + layer<35> tag4 + layer<36> prelu (initial_param_value=0.3) + layer<37> add_prev + layer<38> bn_con + ... + layer<114> con (num_filters=8, nr=3, nc=3, stride_y=2, stride_x=2) + layer<115> tag1 + layer<116> relu + layer<117> add_prev + layer<118> bn_con + layer<119> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1) + layer<120> relu + layer<121> bn_con + layer<122> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1) + layer<123> tag1 + layer<124> input<matrix> + */ + + // Now that we know the index numbers for each layer, we can access them + // individually using layer<index>(pnet). For example, to access the output + // tensor for the first prelu layer we can say: layer<3>(pnet).get_output(); - layer<prelu>(pnet).get_output(); - // Similarly, to get access to the prelu_ object that defines the layer's - // behavior we can say: - pnet.subnet().subnet().subnet().layer_details(); - // or - layer<prelu>(pnet).layer_details(); - // So for example, to print the prelu parameter: - cout << "first prelu layer's initial param value: " - << pnet.subnet().subnet().subnet().layer_details().get_initial_param_value() << endl; - - // From this it should be clear that layer() is a general tool for accessing - // sub layers. It makes repeated calls to subnet() so you don't have to. - // One of it's most important uses is to access tagged layers. For example, - // to access the first tag1 layer we can say: + // Or to print the prelu parameter for layer 7 we can say: + cout << "prelu param: "<< layer<7>(pnet).layer_details().get_initial_param_value() << endl; + + // We can also access layers by their type. This next statement finds the + // first tag1 layer in pnet, and is therefore equivalent to calling + // layer<10>(pnet): layer<tag1>(pnet); - // To further illustrate the use of layer(), let's loop over the repeated - // prelu layers and print out their parameters. But first, let's grab a - // reference to the repeat layer. Since we tagged the repeat layer we can - // access it using the layer() method. layer<tag4>(pnet) returns the tag4 - // layer, but we want the repeat layer right after it so we can give an - // integer as the second argument and it will jump that many layers down the - // network. In our case we need to jump just 1 layer down to get to repeat. - auto&& repeat_layer = layer<tag4,1>(pnet); - for (size_t i = 0; i < repeat_layer.num_repetitions(); ++i) - { - // The repeat layer just instantiates the network block a bunch of - // times. get_repeated_layer() allows us to grab each of these - // instances. - auto&& repeated_layer = repeat_layer.get_repeated_layer(i); - // Now that we have the i-th layer inside our repeat layer we can look - // at its properties. Recall that we repeated the "pres" network block, - // which is itself a network with a bunch of layers. So we can again - // use layer() to jump to the prelu layers we are interested in like so: - prelu_ prelu1 = layer<prelu>(repeated_layer).layer_details(); - prelu_ prelu2 = layer<prelu>(repeated_layer.subnet()).layer_details(); - cout << "first prelu layer parameter value: "<< prelu1.get_initial_param_value() << endl;; - cout << "second prelu layer parameter value: "<< prelu2.get_initial_param_value() << endl;; - } + // The tag layers don't do anything at all and exist simply so you can tag + // parts of your network and access them by layer<tag>(). You can also + // index relative to a tag. So for example, to access the layer immediately + // after tag4 you can say: + layer<tag4,1>(pnet); // Equivalent to layer<35+1>(pnet). + + // Or to access the layer 2 layers after tag4: + layer<tag4,2>(pnet); + // Tagging is a very useful tool for making complex network structures. For + // example, the add_prev1 layer is implemented internally by using a call to + // layer<tag1>(). - - // Ok, so that's enough talk about defining networks. Let's talk about - // training networks! + // Ok, that's enough talk about defining and inspecting networks. Let's + // talk about training networks! // The dnn_trainer will use SGD by default, but you can tell it to use // different solvers like adam.