Commit 603d4743 authored by Davis King's avatar Davis King

- Renamed network_type::num_layers to network_type::num_computational_layers.

- Made layer() recurse into repeat objects so that the index given to layer()
  does what you would expect.
- Added an operator<< for network objects that prints the network architecture.
parent 088546f3
This diff is collapsed.
......@@ -205,8 +205,12 @@ namespace dlib
typedef SUBNET subnet_type;
typedef typename subnet_type::input_type input_type;
const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
// If SUBNET is an input layer then num_layers == 1, otherwise it has the
// definition shown here:
// num_computational_layers will always give the number of layers in the network
// that transform tensors (i.e. layers defined by something that implements the
// EXAMPLE_COMPUTATIONAL_LAYER_ interface). This is all the layers except for
// loss, tag, and skip layers.
const static size_t num_computational_layers = subnet_type::num_computational_layers + 1;
// num_layers counts all the layers in the network regardless of their type.
const static size_t num_layers = subnet_type::num_layers + 1;
add_layer(
......@@ -444,7 +448,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- Back propagates the error gradient, get_gradient_input(), through this
......@@ -473,7 +477,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- This function is identical to the version of update() defined immediately
......@@ -508,9 +512,15 @@ namespace dlib
};
template <typename T, typename U>,
template <typename T, typename U>
std::ostream& operator<<(std::ostream& out, const add_layer<T,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template <typename T, typename U>
void serialize(const add_layer<T,U>& item, std::ostream& out);
template <typename T, typename U>,
template <typename T, typename U>
void deserialize(add_layer<T,U>& item, std::istream& in);
/*!
provides serialization support
......@@ -555,9 +565,8 @@ namespace dlib
typedef LOSS_DETAILS loss_details_type;
typedef SUBNET subnet_type;
typedef typename subnet_type::input_type input_type;
// Note that the loss layer doesn't count as an additional layer since it doesn't
// have any learnable parameters.
const static size_t num_layers = subnet_type::num_layers;
const static size_t num_computational_layers = subnet_type::num_computational_layers;
const static size_t num_layers = subnet_type::num_layers + 1;
const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
// If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type.
// Otherwise it is defined as follows:
......@@ -838,7 +847,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- runs x through the network, compares the output to the expected output
......@@ -870,7 +879,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- runs [ibegin,iend) through the network, compares the output to the
......@@ -901,7 +910,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- runs x through the network and updates the network parameters by
......@@ -928,7 +937,7 @@ namespace dlib
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- runs [ibegin,iend) through the network and updates the network parameters
......@@ -951,9 +960,15 @@ namespace dlib
!*/
};
template <typename T, typename U>,
template <typename T, typename U>
std::ostream& operator<<(std::ostream& out, const add_loss_layer<T,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template <typename T, typename U>
void serialize(const add_loss_layer<T,U>& item, std::ostream& out);
template <typename T, typename U>,
template <typename T, typename U>
void deserialize(add_loss_layer<T,U>& item, std::istream& in);
/*!
provides serialization support
......@@ -1013,6 +1028,7 @@ namespace dlib
typedef SUBNET subnet_type;
typedef typename SUBNET::input_type input_type;
const static size_t num_computational_layers = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers)*num + SUBNET::num_computational_layers;
const static size_t num_layers = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers;
const static unsigned int sample_expansion_factor = SUBNET::sample_expansion_factor;
typedef REPEATED_LAYER<an_unspecified_input_type> repeated_layer_type;
......@@ -1095,6 +1111,20 @@ namespace dlib
!*/
};
template < size_t num, template<typename> class T, typename U >
std::ostream& operator<<(std::ostream& out, const repeat<num,T,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template < size_t num, template<typename> class T, typename U >
void serialize(const repeat<num,T,U>& item, std::ostream& out);
template < size_t num, template<typename> class T, typename U >
void deserialize(repeat<num,T,U>& item, std::istream& in);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template <
......@@ -1124,9 +1154,15 @@ namespace dlib
!*/
};
template <unsigned long ID, typename U>,
template <unsigned long ID, typename U>
std::ostream& operator<<(std::ostream& out, const add_tag_layer<ID,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template <unsigned long ID, typename U>
void serialize(const add_tag_layer<ID,U>& item, std::ostream& out);
template <unsigned long ID, typename U>,
template <unsigned long ID, typename U>
void deserialize(add_tag_layer<ID,U>& item, std::istream& in);
/*!
provides serialization support
......@@ -1168,6 +1204,12 @@ namespace dlib
!*/
};
template <template<typename> class T, typename U>
std::ostream& operator<<(std::ostream& out, const add_skip_layer<T,U>& item);
/*!
prints the network architecture to the given output stream.
!*/
template <template<typename> class T, typename U>
void serialize(const add_skip_layer<T,U>& item, std::ostream& out);
template <template<typename> class T, typename U>
......@@ -1200,9 +1242,15 @@ namespace dlib
requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer.
- i < net_type::num_layers
ensures
- This function chains together i calls to n.subnet() and returns the
result. So for example:
- This function allows you to access any layer in a network by its layer index
i. Therefore, it will walk i steps down the network and return the layer
object there. Since networks can be big, the best way to find layer index
numbers is to print a network to the screen since the print out will include
indexes for each layer.
- In general, this function chains together i calls to n.subnet() and returns
the result. So for example:
- if (i == 0)
- returns n
- else if (i == 1)
......@@ -1213,6 +1261,10 @@ namespace dlib
- returns n.subnet().subnet().subnet()
- else
- etc.
Except that when it hits a repeat layer it recurses into the repeated layers
contained inside. That is, if the layer index indicates a layer in a repeat
object this function will make the appropriate call to get_repeated_layer()
and do the right thing.
!*/
template <
......
......@@ -119,6 +119,12 @@ namespace dlib
deserialize(item.avg_blue, in);
}
friend std::ostream& operator<<(std::ostream& out, const input_rgb_image& item)
{
out << "input_rgb_image("<<item.avg_red<<","<<item.avg_green<<","<<item.avg_blue<<")";
return out;
}
private:
float avg_red;
float avg_green;
......@@ -201,6 +207,12 @@ namespace dlib
throw serialization_error("Unexpected version found while deserializing dlib::input.");
}
friend std::ostream& operator<<(std::ostream& out, const input& item)
{
out << "input<matrix>";
return out;
}
};
// ----------------------------------------------------------------------------------------
......@@ -277,6 +289,11 @@ namespace dlib
if (version != "input<array2d>")
throw serialization_error("Unexpected version found while deserializing dlib::input.");
}
friend std::ostream& operator<<(std::ostream& out, const input& item)
{
out << "input<array2d>";
return out;
}
};
......
......@@ -86,6 +86,11 @@ namespace dlib
!*/
};
std::ostream& operator<<(std::ostream& out, const EXAMPLE_INPUT_LAYER& item);
/*!
print a string describing this layer.
!*/
void serialize(const EXAMPLE_INPUT_LAYER& item, std::ostream& out);
void deserialize(EXAMPLE_INPUT_LAYER& item, std::istream& in);
/*!
......@@ -142,14 +147,6 @@ namespace dlib
!*/
};
template <typename T>
void serialize(const input<T>& item, std::ostream& out);
template <typename T>
void deserialize(input<T>& item, std::istream& in);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
class input_rgb_image
......@@ -158,8 +155,8 @@ namespace dlib
WHAT THIS OBJECT REPRESENTS
This input layer works with RGB images of type matrix<rgb_pixel>. It is
very similar to the dlib::input layer except that it allows you to subtract
the average color value from each color channel when convting an image to a
tensor.
the average color value from each color channel when converting an image to
a tensor.
!*/
public:
typedef matrix<rgb_pixel> input_type;
......@@ -227,14 +224,11 @@ namespace dlib
- #data.nc() == C
- #data.k() == 3
Moreover, each color channel is normalized by having its average value
subtracted (accroding to get_avg_red(), get_avg_green(), or
subtracted (according to get_avg_red(), get_avg_green(), or
get_avg_blue()) and then is divided by 256.0.
!*/
};
void serialize(const input_rgb_image& item, std::ostream& out);
void deserialize(input_rgb_image& item, std::istream& in);
}
#endif // DLIB_DNn_INPUT_ABSTRACT_H_
......
......@@ -157,6 +157,20 @@ namespace dlib
if (stride_x != _stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::con_");
}
friend std::ostream& operator<<(std::ostream& out, const con_& item)
{
out << "con\t ("
<< "num_filters="<<_num_filters
<< ", nr="<<_nr
<< ", nc="<<_nc
<< ", stride_y="<<_stride_y
<< ", stride_x="<<_stride_x
<< ")";
return out;
}
private:
resizable_tensor params;
......@@ -277,6 +291,18 @@ namespace dlib
if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::max_pool_");
}
friend std::ostream& operator<<(std::ostream& out, const max_pool_& item)
{
out << "max_pool ("
<< "nr="<<_nr
<< ", nc="<<_nc
<< ", stride_y="<<_stride_y
<< ", stride_x="<<_stride_x
<< ")";
return out;
}
private:
......@@ -393,6 +419,16 @@ namespace dlib
if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::avg_pool_");
}
friend std::ostream& operator<<(std::ostream& out, const avg_pool_& item)
{
out << "avg_pool ("
<< "nr="<<_nr
<< ", nc="<<_nc
<< ", stride_y="<<_stride_y
<< ", stride_x="<<_stride_x
<< ")";
return out;
}
private:
tt::pooling ap;
......@@ -557,6 +593,15 @@ namespace dlib
}
}
friend std::ostream& operator<<(std::ostream& out, const bn_& item)
{
if (mode == CONV_MODE)
out << "bn_con";
else
out << "bn_fc";
return out;
}
private:
friend class affine_;
......@@ -695,6 +740,23 @@ namespace dlib
if (bias_mode != (fc_bias_mode)bmode) throw serialization_error("Wrong fc_bias_mode found while deserializing dlib::fc_");
}
friend std::ostream& operator<<(std::ostream& out, const fc_& item)
{
if (bias_mode == FC_HAS_BIAS)
{
out << "fc\t ("
<< "num_outputs="<<item.num_outputs
<< ")";
}
else
{
out << "fc_no_bias ("
<< "num_outputs="<<item.num_outputs
<< ")";
}
return out;
}
private:
unsigned long num_outputs;
......@@ -793,6 +855,14 @@ namespace dlib
deserialize(item.mask, in);
}
friend std::ostream& operator<<(std::ostream& out, const dropout_& item)
{
out << "dropout\t ("
<< "drop_rate="<<item.drop_rate
<< ")";
return out;
}
private:
float drop_rate;
resizable_tensor mask;
......@@ -872,6 +942,14 @@ namespace dlib
deserialize(item.val, in);
}
friend std::ostream& operator<<(std::ostream& out, const multiply_& item)
{
out << "multiply ("
<< "val="<<item.val
<< ")";
return out;
}
private:
float val;
resizable_tensor params; // unused
......@@ -1021,6 +1099,12 @@ namespace dlib
item.mode = (layer_mode)mode;
}
friend std::ostream& operator<<(std::ostream& out, const affine_& )
{
out << "affine";
return out;
}
private:
resizable_tensor params, empty_params;
alias_tensor gamma, beta;
......@@ -1079,6 +1163,13 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::add_prev_.");
}
friend std::ostream& operator<<(std::ostream& out, const add_prev_& item)
{
out << "add_prev";
return out;
}
private:
resizable_tensor params;
};
......@@ -1156,6 +1247,13 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::relu_.");
}
friend std::ostream& operator<<(std::ostream& out, const relu_& )
{
out << "relu";
return out;
}
private:
resizable_tensor params;
};
......@@ -1226,6 +1324,14 @@ namespace dlib
deserialize(item.initial_param_value, in);
}
friend std::ostream& operator<<(std::ostream& out, const prelu_& item)
{
out << "prelu\t ("
<< "initial_param_value="<<item.initial_param_value
<< ")";
return out;
}
private:
resizable_tensor params;
float initial_param_value;
......@@ -1279,6 +1385,13 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::sig_.");
}
friend std::ostream& operator<<(std::ostream& out, const sig_& )
{
out << "sig";
return out;
}
private:
resizable_tensor params;
};
......@@ -1332,6 +1445,13 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::htan_.");
}
friend std::ostream& operator<<(std::ostream& out, const htan_& )
{
out << "htan";
return out;
}
private:
resizable_tensor params;
};
......@@ -1385,6 +1505,12 @@ namespace dlib
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::softmax_.");
}
friend std::ostream& operator<<(std::ostream& out, const softmax_& )
{
out << "softmax";
return out;
}
private:
resizable_tensor params;
};
......
......@@ -306,6 +306,11 @@ namespace dlib
};
std::ostream& operator<<(std::ostream& out, const EXAMPLE_COMPUTATIONAL_LAYER_& item);
/*!
print a string describing this layer.
!*/
void serialize(const EXAMPLE_COMPUTATIONAL_LAYER_& item, std::ostream& out);
void deserialize(EXAMPLE_COMPUTATIONAL_LAYER_& item, std::istream& in);
/*!
......@@ -390,11 +395,6 @@ namespace dlib
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/
friend void serialize(const fc_& item, std::ostream& out);
friend void deserialize(fc_& item, std::istream& in);
/*!
provides serialization support
!*/
};
template <
......@@ -500,12 +500,6 @@ namespace dlib
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/
friend void serialize(const con_& item, std::ostream& out);
friend void deserialize(con_& item, std::istream& in);
/*!
provides serialization support
!*/
};
template <
......@@ -565,12 +559,6 @@ namespace dlib
!*/
};
void serialize(const dropout_& item, std::ostream& out);
void deserialize(dropout_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using dropout = add_layer<dropout_, SUBNET>;
......@@ -623,12 +611,6 @@ namespace dlib
!*/
};
void serialize(const multiply_& item, std::ostream& out);
void deserialize(multiply_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using multiply = add_layer<multiply_, SUBNET>;
......@@ -725,13 +707,6 @@ namespace dlib
/*!
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/
friend void serialize(const bn_& item, std::ostream& out);
friend void deserialize(bn_& item, std::istream& in);
/*!
provides serialization support
!*/
};
template <typename SUBNET>
......@@ -829,12 +804,6 @@ namespace dlib
};
void serialize(const affine_& item, std::ostream& out);
void deserialize(affine_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using affine = add_layer<affine_, SUBNET>;
......@@ -927,12 +896,6 @@ namespace dlib
interface. Note that this layer doesn't have any parameters, so the tensor
returned by get_layer_params() is always empty.
!*/
friend void serialize(const max_pool_& item, std::ostream& out);
friend void deserialize(max_pool_& item, std::istream& in);
/*!
provides serialization support
!*/
};
template <
......@@ -1034,11 +997,6 @@ namespace dlib
returned by get_layer_params() is always empty.
!*/
friend void serialize(const avg_pool_& item, std::ostream& out);
friend void deserialize(avg_pool_& item, std::istream& in);
/*!
provides serialization support
!*/
};
template <
......@@ -1080,12 +1038,6 @@ namespace dlib
!*/
};
void serialize(const relu_& item, std::ostream& out);
void deserialize(relu_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using relu = add_layer<relu_, SUBNET>;
......@@ -1137,12 +1089,6 @@ namespace dlib
!*/
};
void serialize(const prelu_& item, std::ostream& out);
void deserialize(prelu_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using prelu = add_layer<prelu_, SUBNET>;
......@@ -1176,12 +1122,6 @@ namespace dlib
!*/
};
void serialize(const sig_& item, std::ostream& out);
void deserialize(sig_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using sig = add_layer<sig_, SUBNET>;
......@@ -1215,12 +1155,6 @@ namespace dlib
!*/
};
void serialize(const htan_& item, std::ostream& out);
void deserialize(htan_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using htan = add_layer<htan_, SUBNET>;
......@@ -1262,12 +1196,6 @@ namespace dlib
!*/
};
void serialize(const softmax_& item, std::ostream& out);
void deserialize(softmax_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using softmax = add_layer<softmax_, SUBNET>;
......@@ -1307,11 +1235,6 @@ namespace dlib
!*/
};
void serialize(const add_prev_& item, std::ostream& out);
void deserialize(add_prev_& item, std::istream& in);
/*!
provides serialization support
!*/
template <
template<typename> class tag,
......
......@@ -100,6 +100,12 @@ namespace dlib
throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_hinge_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_binary_hinge_& )
{
out << "loss_binary_hinge";
return out;
}
};
template <typename SUBNET>
......@@ -203,6 +209,12 @@ namespace dlib
throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_log_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_binary_log_& )
{
out << "loss_binary_log";
return out;
}
};
template <typename SUBNET>
......@@ -307,6 +319,13 @@ namespace dlib
throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_multiclass_log_& )
{
out << "loss_multiclass_log";
return out;
}
};
template <typename SUBNET>
......
......@@ -125,6 +125,11 @@ namespace dlib
!*/
};
std::ostream& operator<<(std::ostream& out, const EXAMPLE_LOSS_LAYER_& item);
/*!
print a string describing this layer.
!*/
void serialize(const EXAMPLE_LOSS_LAYER_& item, std::ostream& out);
void deserialize(EXAMPLE_LOSS_LAYER_& item, std::istream& in);
/*!
......@@ -200,12 +205,6 @@ namespace dlib
};
void serialize(const loss_binary_hinge_& item, std::ostream& out);
void deserialize(loss_binary_hinge_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using loss_binary_hinge = add_loss_layer<loss_binary_hinge_, SUBNET>;
......@@ -272,12 +271,6 @@ namespace dlib
};
void serialize(const loss_binary_log_& item, std::ostream& out);
void deserialize(loss_binary_log_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using loss_binary_log = add_loss_layer<loss_binary_log_, SUBNET>;
......@@ -346,12 +339,6 @@ namespace dlib
};
void serialize(const loss_multiclass_log_& item, std::ostream& out);
void deserialize(loss_multiclass_log_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>;
......
......@@ -37,12 +37,12 @@ namespace dlib
typedef typename net_type::label_type label_type;
typedef typename net_type::input_type input_type;
const static size_t num_layers = net_type::num_layers;
const static size_t num_computational_layers = net_type::num_computational_layers;
dnn_trainer() = delete;
dnn_trainer(const dnn_trainer&) = delete;
explicit dnn_trainer(net_type& net_) : job_pipe(0), net(net_), solvers(num_layers)
explicit dnn_trainer(net_type& net_) : job_pipe(0), net(net_), solvers(num_computational_layers)
{
init();
}
......@@ -50,7 +50,7 @@ namespace dlib
dnn_trainer(
net_type& net_,
const solver_type& solver_
) : job_pipe(0), net(net_), solvers(num_layers, solver_)
) : job_pipe(0), net(net_), solvers(num_computational_layers, solver_)
{
init();
}
......@@ -75,7 +75,7 @@ namespace dlib
)
{
wait_for_thread_to_pause();
solvers = std::vector<solver_type>(num_layers, solver_);
solvers = std::vector<solver_type>(num_computational_layers, solver_);
}
unsigned long get_mini_batch_size (
......@@ -504,7 +504,7 @@ namespace dlib
int version = 5;
serialize(version, out);
size_t nl = dnn_trainer::num_layers;
size_t nl = dnn_trainer::num_computational_layers;
serialize(nl, out);
serialize(item.rs, out);
serialize(item.previous_loss_values, out);
......@@ -530,14 +530,14 @@ namespace dlib
if (version != 5)
throw serialization_error("Unexpected version found while deserializing dlib::dnn_trainer.");
size_t num_layers = 0;
deserialize(num_layers, in);
if (num_layers != dnn_trainer::num_layers)
size_t num_computational_layers = 0;
deserialize(num_computational_layers, in);
if (num_computational_layers != dnn_trainer::num_computational_layers)
{
std::ostringstream sout;
sout << "Error deserializing dlib::dnn_trainer. The saved sync file is for a network with " << std::endl;
sout << "a different number of layers. We expected the number of layers to be " << dnn_trainer::num_layers << " but" << std::endl;
sout << "instead the file contains " << num_layers << " layers." << std::endl;
sout << "a different number of layers. We expected the number of layers to be " << dnn_trainer::num_computational_layers << " but" << std::endl;
sout << "instead the file contains " << num_computational_layers << " computational layers." << std::endl;
throw serialization_error(sout.str());
}
......
......@@ -44,7 +44,7 @@ namespace dlib
typedef typename net_type::label_type label_type;
typedef typename net_type::input_type input_type;
const static size_t num_layers = net_type::num_layers;
const static size_t num_computational_layers = net_type::num_computational_layers;
dnn_trainer() = delete;
dnn_trainer(const dnn_trainer&) = delete;
......@@ -110,9 +110,9 @@ namespace dlib
get_solvers()[0], the second layer's solver is
get_solvers()[1], and so on.
- It should be noted that you should never change the number of elements in
the vector returned by get_solvers() (i.e. don't do something that
changes get_solvers().size()). It will be set to net_type::num_layers by
this object and you should leave it at that. The non-const version of
the vector returned by get_solvers() (i.e. don't do something that changes
get_solvers().size()). It will be set to net_type::num_computational_layers
by this object and you should leave it at that. The non-const version of
get_solvers() is provided only so you can tweak the parameters of a
particular solver.
!*/
......
......@@ -1190,8 +1190,10 @@ namespace
net_type net;
net_type net2(num_fc_outputs(4));
DLIB_TEST(layer<tag1>(net).num_layers == 8);
DLIB_TEST(layer<skip1>(net).num_layers == 8+3+3);
DLIB_TEST(layer<tag1>(net).num_computational_layers == 8);
DLIB_TEST(layer<skip1>(net).num_computational_layers == 8+3+3);
DLIB_TEST(layer<tag1>(net).num_layers == 10);
DLIB_TEST(layer<skip1>(net).num_layers == 10+3+3+1);
DLIB_TEST(&layer<skip1>(net).get_output() == &layer<tag1>(net).get_output());
DLIB_TEST(&layer<skip1>(net).get_output() != &layer<tag1>(net).subnet().subnet().get_output());
DLIB_TEST(net.subnet().subnet().subnet().layer_details().get_num_outputs() == 10);
......
......@@ -122,7 +122,7 @@ int main(int argc, char** argv) try
// prelu layers have a floating point parameter. If you want to set it to
// something other than its default value you can do so like this:
net_type2 pnet(prelu_(0.2),
prelu_(0.2),
prelu_(0.25),
repeat_group(prelu_(0.3),prelu_(0.4)) // Initialize all the prelu instances in the repeat
// layer. repeat_group() is needed to group the
// things that are part of repeat's block.
......@@ -132,59 +132,72 @@ int main(int argc, char** argv) try
// order the layers are defined, but it will skip layers where the
// assignment doesn't make sense.
// The API shown above lets you modify layers at construction time. But
// what about after that? There are a number of ways to access layers
// inside a net object.
// You can access sub layers of the network like this to get their output
// tensors. The following 3 statements are all equivalent and access the
// same layer's output.
pnet.subnet().subnet().subnet().get_output();
// Now let's print the details of the pnet to the screen and inspect it.
cout << "The pnet has " << pnet.num_layers << " layers in it." << endl;
cout << pnet << endl;
// These print statements will output this (I've truncated it since it's
// long, but you get the idea):
/*
The pnet has 125 layers in it.
layer<0> loss_multiclass_log
layer<1> fc (num_outputs=10)
layer<2> avg_pool (nr=11, nc=11, stride_y=11, _stride_x=11)
layer<3> prelu (initial_param_value=0.2)
layer<4> add_prev
layer<5> bn_con
layer<6> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1)
layer<7> prelu (initial_param_value=0.25)
layer<8> bn_con
layer<9> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1)
layer<10> tag1
...
layer<33> con (num_filters=8, nr=3, nc=3, stride_y=2, stride_x=2)
layer<34> tag1
layer<35> tag4
layer<36> prelu (initial_param_value=0.3)
layer<37> add_prev
layer<38> bn_con
...
layer<114> con (num_filters=8, nr=3, nc=3, stride_y=2, stride_x=2)
layer<115> tag1
layer<116> relu
layer<117> add_prev
layer<118> bn_con
layer<119> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1)
layer<120> relu
layer<121> bn_con
layer<122> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1)
layer<123> tag1
layer<124> input<matrix>
*/
// Now that we know the index numbers for each layer, we can access them
// individually using layer<index>(pnet). For example, to access the output
// tensor for the first prelu layer we can say:
layer<3>(pnet).get_output();
layer<prelu>(pnet).get_output();
// Similarly, to get access to the prelu_ object that defines the layer's
// behavior we can say:
pnet.subnet().subnet().subnet().layer_details();
// or
layer<prelu>(pnet).layer_details();
// So for example, to print the prelu parameter:
cout << "first prelu layer's initial param value: "
<< pnet.subnet().subnet().subnet().layer_details().get_initial_param_value() << endl;
// From this it should be clear that layer() is a general tool for accessing
// sub layers. It makes repeated calls to subnet() so you don't have to.
// One of it's most important uses is to access tagged layers. For example,
// to access the first tag1 layer we can say:
// Or to print the prelu parameter for layer 7 we can say:
cout << "prelu param: "<< layer<7>(pnet).layer_details().get_initial_param_value() << endl;
// We can also access layers by their type. This next statement finds the
// first tag1 layer in pnet, and is therefore equivalent to calling
// layer<10>(pnet):
layer<tag1>(pnet);
// To further illustrate the use of layer(), let's loop over the repeated
// prelu layers and print out their parameters. But first, let's grab a
// reference to the repeat layer. Since we tagged the repeat layer we can
// access it using the layer() method. layer<tag4>(pnet) returns the tag4
// layer, but we want the repeat layer right after it so we can give an
// integer as the second argument and it will jump that many layers down the
// network. In our case we need to jump just 1 layer down to get to repeat.
auto&& repeat_layer = layer<tag4,1>(pnet);
for (size_t i = 0; i < repeat_layer.num_repetitions(); ++i)
{
// The repeat layer just instantiates the network block a bunch of
// times. get_repeated_layer() allows us to grab each of these
// instances.
auto&& repeated_layer = repeat_layer.get_repeated_layer(i);
// Now that we have the i-th layer inside our repeat layer we can look
// at its properties. Recall that we repeated the "pres" network block,
// which is itself a network with a bunch of layers. So we can again
// use layer() to jump to the prelu layers we are interested in like so:
prelu_ prelu1 = layer<prelu>(repeated_layer).layer_details();
prelu_ prelu2 = layer<prelu>(repeated_layer.subnet()).layer_details();
cout << "first prelu layer parameter value: "<< prelu1.get_initial_param_value() << endl;;
cout << "second prelu layer parameter value: "<< prelu2.get_initial_param_value() << endl;;
}
// The tag layers don't do anything at all and exist simply so you can tag
// parts of your network and access them by layer<tag>(). You can also
// index relative to a tag. So for example, to access the layer immediately
// after tag4 you can say:
layer<tag4,1>(pnet); // Equivalent to layer<35+1>(pnet).
// Or to access the layer 2 layers after tag4:
layer<tag4,2>(pnet);
// Tagging is a very useful tool for making complex network structures. For
// example, the add_prev1 layer is implemented internally by using a call to
// layer<tag1>().
// Ok, so that's enough talk about defining networks. Let's talk about
// training networks!
// Ok, that's enough talk about defining and inspecting networks. Let's
// talk about training networks!
// The dnn_trainer will use SGD by default, but you can tell it to use
// different solvers like adam.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment