Commit fe168596 authored by Davis King's avatar Davis King

Moved most of the layer parameters from runtime variables set in constructors

to template arguments.  This way, the type of a network specifies the entire
network architecture and most of the time the user doesn't even need to do
anything with layer constructors.
parent 001bca78
This diff is collapsed.
This diff is collapsed.
......@@ -1076,67 +1076,67 @@ namespace
}
{
print_spinner();
max_pool_ l;
max_pool_<3,3,1,1> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
avg_pool_ l;
avg_pool_<3,3,1,1> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
affine_ l(CONV_MODE);
affine_<CONV_MODE> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
affine_ l(FC_MODE);
affine_<FC_MODE> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
bn_ l(CONV_MODE);
bn_<CONV_MODE> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
bn_ l(FC_MODE);
bn_<FC_MODE> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
con_ l(3,3,3,2,2);
con_<3,3,3,2,2> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
con_ l(3,3,3,1,1);
con_<3,3,3,1,1>l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
con_ l(3,3,2,1,1);
con_<3,3,2,1,1> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
con_ l(2,1,1,1,1);
con_<2,1,1,1,1> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
fc_ l;
fc_<1,FC_HAS_BIAS> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
fc_ l(5,FC_HAS_BIAS);
fc_<5,FC_HAS_BIAS> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
fc_ l(5,FC_NO_BIAS);
fc_<5,FC_NO_BIAS> l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
......@@ -1168,29 +1168,16 @@ namespace
// ----------------------------------------------------------------------------------------
template <typename T> using rcon = max_pool<relu<bn<con<T>>>>;
std::tuple<max_pool_,relu_,bn_,con_> rcon_ (unsigned long n)
{
return std::make_tuple(max_pool_(2,2,2,2),relu_(),bn_(CONV_MODE),con_(n,5,5));
}
template <typename T> using rfc = relu<bn<fc<T>>>;
std::tuple<relu_,bn_,fc_> rfc_ (unsigned long n)
{
return std::make_tuple(relu_(),bn_(),fc_(n));
}
template <unsigned long n, typename SUBNET> using rcon = max_pool<2,2,2,2,relu<bn_con<con<n,5,5,1,1,SUBNET>>>>;
template <unsigned long n, typename SUBNET> using rfc = relu<bn_fc<fc<n,FC_HAS_BIAS,SUBNET>>>;
void test_tagging(
)
{
typedef loss_multiclass_log<rfc<skip1<rfc<rfc<tag1<rcon<rcon<input<matrix<unsigned char>>>>>>>>>> net_type;
net_type net(rfc_(10),
rfc_(84),
rfc_(120),
rcon_(16),
rcon_(6)
);
typedef loss_multiclass_log<rfc<10,skip1<rfc<84,rfc<120,tag1<rcon<16,rcon<6,input<matrix<unsigned char>>>>>>>>>> net_type;
net_type net;
net_type net2(num_fc_outputs(4));
DLIB_TEST(layer<tag1>(net).num_layers == 8);
DLIB_TEST(layer<skip1>(net).num_layers == 8+3+3);
......
......@@ -15,6 +15,7 @@
using namespace std;
using namespace dlib;
int main(int argc, char** argv) try
{
if (argc != 2)
......@@ -23,6 +24,8 @@ int main(int argc, char** argv) try
return 1;
}
std::vector<matrix<unsigned char>> training_images;
std::vector<unsigned long> training_labels;
std::vector<matrix<unsigned char>> testing_images;
......@@ -30,22 +33,18 @@ int main(int argc, char** argv) try
load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels);
typedef loss_multiclass_log<fc<relu<fc<relu<fc<max_pool<relu<con<max_pool<relu<con<
input<matrix<unsigned char>>>>>>>>>>>>>> net_type;
using net_type = loss_multiclass_log<
fc<10,FC_HAS_BIAS,
relu<fc<84,FC_HAS_BIAS,
relu<fc<120,FC_HAS_BIAS,
max_pool<2,2,2,2,relu<con<16,5,5,1,1,
max_pool<2,2,2,2,relu<con<6,5,5,1,1,
input<matrix<unsigned char>>>>>>>>>>>>>>;
net_type net(fc_(10),
relu_(),
fc_(84),
relu_(),
fc_(120),
max_pool_(2,2,2,2),
relu_(),
con_(16,5,5),
max_pool_(2,2,2,2),
relu_(),
con_(6,5,5));
net_type net;
dnn_trainer<net_type> trainer(net,sgd(0.1));
dnn_trainer<net_type> trainer(net,sgd(0.01));
trainer.set_mini_batch_size(128);
trainer.be_verbose();
trainer.set_synchronization_file("mnist_sync", std::chrono::seconds(20));
......
......@@ -9,23 +9,19 @@ using namespace dlib;
// ----------------------------------------------------------------------------------------
template <typename T> using res = relu<add_prev1<bn<con<relu<bn<con<tag1<T>>>>>>>>;
template <int stride, typename SUBNET>
using base_res = relu<add_prev1< bn_con<con<8,3,3,1,1,relu< bn_con<con<8,3,3,stride,stride,tag1<SUBNET>>>>>>>>;
std::tuple<relu_,add_prev1_,bn_,con_,relu_,bn_,con_> res_ (
unsigned long outputs,
unsigned long stride = 1
)
{
return std::make_tuple(relu_(),
add_prev1_(),
bn_(CONV_MODE),
con_(outputs,3,3,stride,stride),
relu_(),
bn_(CONV_MODE),
con_(outputs,3,3,stride,stride));
}
template <int stride, typename SUBNET>
using base_ares = relu<add_prev1<affine_con<con<8,3,3,1,1,relu<affine_con<con<8,3,3,stride,stride,tag1<SUBNET>>>>>>>>;
template <typename T> using ares = relu<add_prev1<affine<con<relu<affine<con<tag1<T>>>>>>>>;
template <typename SUBNET> using res = base_res<1,SUBNET>;
template <typename SUBNET> using res_down = base_res<2,SUBNET>;
template <typename SUBNET> using ares = base_ares<1,SUBNET>;
template <typename SUBNET> using ares_down = base_ares<2,SUBNET>;
template <typename SUBNET>
using pres = prelu<add_prev1< bn_con<con<8,3,3,1,1,prelu< bn_con<con<8,3,3,1,1,tag1<SUBNET>>>>>>>>;
// ----------------------------------------------------------------------------------------
......@@ -44,24 +40,78 @@ int main(int argc, char** argv) try
load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels);
set_dnn_prefer_smallest_algorithms();
typedef loss_multiclass_log<fc<avg_pool<
res<res<res<res<
repeat<10,res,
res<
const unsigned long number_of_classes = 10;
typedef loss_multiclass_log<fc<number_of_classes,FC_HAS_BIAS,
avg_pool<11,11,11,11,
res<res<res<res_down<
repeat<9,res, // repeat this layer 9 times
res_down<
res<
input<matrix<unsigned char>
>>>>>>>>>>> net_type;
const unsigned long number_of_classes = 10;
net_type net(fc_(number_of_classes),
avg_pool_(10,10,10,10),
res_(8),res_(8),res_(8),res_(8,2),
res_(8), // repeated 10 times
res_(8,2),
res_(8)
);
net_type net;
// If you wanted to use the same network but override the number of outputs at runtime
// you can do so like this:
net_type net2(num_fc_outputs(15));
// Let's imagine we wanted to replace some of the relu layers with prelu layers. We
// might do it like this:
typedef loss_multiclass_log<fc<number_of_classes,FC_HAS_BIAS,
avg_pool<11,11,11,11,
pres<res<res<res_down< // 2 prelu layers here
tag4<repeat<9,pres, // 9 groups, each containing 2 prelu layers
res_down<
res<
input<matrix<unsigned char>
>>>>>>>>>>>> net_type2;
// prelu layers have a floating point parameter. If you want to set it to something
// other than its default value you can do so like this:
net_type2 pnet(prelu_(0.2),
prelu_(0.2),
repeat_group(prelu_(0.3),prelu_(0.4)) // Initialize all the prelu instances in the repeat
// layer. repeat_group() is needed to group the things
// that are part of repeat's block.
);
// As you can see, a network will greedily assign things given to its constructor to
// the layers inside itself. The assignment is done in the order the layers are
// defined but it will skip layers where the assignment doesn't make sense.
// You can access sub layers of the network like this:
net.subnet().subnet().get_output();
layer<2>(net).get_output();
layer<relu>(net).get_output();
layer<tag1>(net).get_output();
// To further illustrate the use of layer(), let's loop over the repeated layers and
// print out their parameters. But first, let's grab a reference to the repeat layer.
// Since we tagged the repeat layer we can access it using the layer() method.
// layer<tag4>(pnet) returns the tag4 layer, but we want the repeat layer so we can
// give an integer as the second argument and it will jump that many layers down the
// network. In our case we need to jump just 1 layer down to get to repeat.
auto&& repeat_layer = layer<tag4,1>(pnet);
for (size_t i = 0; i < repeat_layer.num_repetitions(); ++i)
{
// The repeat layer just instantiates the network block a bunch of times as a
// network object. get_repeated_layer() allows us to grab each of these instances.
auto&& repeated_layer = repeat_layer.get_repeated_layer(i);
// Now that we have the i-th layer inside our repeat layer we can look at its
// properties. Recall that we repeated the "pres" network block, which is itself a
// network with a bunch of layers. So we can again use layer() to jump to the
// prelu layers we are interested in like so:
prelu_ prelu1 = layer<prelu>(repeated_layer).layer_details();
prelu_ prelu2 = layer<prelu>(repeated_layer.subnet()).layer_details();
cout << "first prelu layer parameter value: "<< prelu1.get_initial_param_value() << endl;;
cout << "second prelu layer parameter value: "<< prelu2.get_initial_param_value() << endl;;
}
dnn_trainer<net_type,adam> trainer(net,adam(0.001));
......@@ -89,20 +139,16 @@ int main(int argc, char** argv) try
// wait for threaded processing to stop.
trainer.get_net();
// You can access sub layers of the network like this:
net.subnet().subnet().get_output();
layer<2>(net).get_output();
layer<avg_pool>(net).get_output();
net.clean();
serialize("mnist_res_network.dat") << net;
typedef loss_multiclass_log<fc<avg_pool<
ares<ares<ares<ares<
repeat<10,ares,
ares<
typedef loss_multiclass_log<fc<number_of_classes,FC_HAS_BIAS,
avg_pool<11,11,11,11,
ares<ares<ares<ares_down<
repeat<9,res,
ares_down<
ares<
input<matrix<unsigned char>
>>>>>>>>>>> test_net_type;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment