Fixed a bug in visit_layer_parameter_gradients() and visit_layer_parameters()

caused by num_computational_layers being wrong when tax layers were placed as the first layer. These visit functions being wrong also caused multi-GPU support to not work on such networks.

Fixed a bug in visit_layer_parameter_gradients() and visit_layer_parameters()
caused by num_computational_layers being wrong when tax layers were placed as the first layer. These visit functions being wrong also caused multi-GPU support to not work on such networks.
f1896128 · Davis King · d019e9cd · f1896128 · f1896128
Commit f1896128 authored May 22, 2016 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 68 additions and 3 deletions

core.h dlib/dnn/core.h +1 -3

dnn.cpp dlib/test/dnn.cpp +67 -0

No files found.
--- a/dlib/dnn/core.h
+++ b/dlib/dnn/core.h
@@ -1851,9 +1851,7 @@ namespace dlib
    public:
        typedef INPUT_LAYER subnet_type;
        typedef typename subnet_type::input_type input_type;
-        // This layer counts as a computational layer because it copies and stores the
-        // inputs.
-        const static size_t num_computational_layers = 1;
+        const static size_t num_computational_layers = 0;
        const static size_t num_layers = 2;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
        static_assert(sample_expansion_factor >= 1,

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -1343,6 +1343,72 @@ namespace
        DLIB_TEST(net2.subnet().subnet().subnet().layer_details().get_num_outputs() == 4);
    }

+// ----------------------------------------------------------------------------------------
+
+    template <
+        int N, 
+        template <typename> class BN, 
+        int stride, 
+        typename SUBNET
+        > 
+    using block  = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
+
+    template <
+        template <int,template<typename>class,int,typename> class block, 
+        int N, 
+        template<typename>class BN, 
+        typename SUBNET
+        >
+    using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
+
+    template <
+        template <int,template<typename>class,int,typename> class block, 
+        int N, 
+        template<typename>class BN, 
+        typename SUBNET
+        >
+    using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
+
+
+    template <typename SUBNET> using res       = relu<residual<block,8,bn_con,SUBNET>>;
+    template <typename SUBNET> using ares      = relu<residual<block,8,affine,SUBNET>>;
+    template <typename SUBNET> using res_down  = relu<residual_down<block,8,bn_con,SUBNET>>;
+    template <typename SUBNET> using ares_down = relu<residual_down<block,8,affine,SUBNET>>;
+
+    template <typename SUBNET> 
+    using pres  = prelu<add_prev1<bn_con<con<8,3,3,1,1,prelu<bn_con<con<8,3,3,1,1,tag1<SUBNET>>>>>>>>;
+
+    void test_visit_funcions()
+    {
+        using net_type2 = loss_multiclass_log<fc<10,
+            avg_pool_everything<
+            pres<res<res<res_down< // 2 prelu layers here
+            tag4<repeat<9,pres,    // 9 groups, each containing 2 prelu layers  
+            res_down<
+            res<
+            input<matrix<unsigned char>>
+            >>>>>>>>>>>;
+
+        net_type2 pnet;
+
+        DLIB_CASSERT(pnet.num_layers == 131, pnet.num_layers);
+        DLIB_CASSERT(pnet.num_computational_layers == 109, pnet.num_computational_layers);
+
+        std::vector<bool> hit(pnet.num_computational_layers, false);
+        size_t count = 0;
+        visit_layer_parameter_gradients(pnet, [&](size_t i, tensor& ){hit[i] = true; ++count; });
+        for (auto x : hit)
+            DLIB_TEST(x);
+        DLIB_TEST(count == pnet.num_computational_layers);
+
+        count = 0;
+        std::vector<bool> hit2(pnet.num_computational_layers, false);
+        visit_layer_parameters(pnet, [&](size_t i, tensor& ){hit2[i] = true; ++count; });
+        for (auto x : hit2)
+            DLIB_TEST(x);
+        DLIB_TEST(count == pnet.num_computational_layers);
+    }
+
 // ----------------------------------------------------------------------------------------

    class dnn_tester : public tester
@@ -1403,6 +1469,7 @@ namespace
            test_batch_normalize_conv();
            test_basic_tensor_ops();
            test_layers();
+            test_visit_funcions();
        }
    } a;