Grouping layer added

28c4a482 · Fm · 617ffba6 · 28c4a482 · 28c4a482 · 28c4a482
Commit 28c4a482 authored May 17, 2016 by Fm
5 changed files
--- a/dlib/dnn/core.h
+++ b/dlib/dnn/core.h
--- a/dlib/dnn/tensor_tools.cpp
+++ b/dlib/dnn/tensor_tools.cpp
@@ -634,6 +634,66 @@ namespace dlib { namespace tt
 #endif
    }
+    // ----------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+    void concat_depth(tensor& dest, size_t sample_offset, const tensor& src)
+    {
+        const size_t dest_sample_size = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
+        const size_t src_sample_size = static_cast<size_t>(src.nc() * src.nr() * src.k());
+        DLIB_CASSERT(dest.num_samples() == src.num_samples() &&
+                     dest.nc() == src.nc() && dest.nr() == src.nr(), "All sources should fit into dest tensor size");
+        DLIB_CASSERT(dest_sample_size >= src_sample_size + sample_offset, "Not enough space in dest tensor");
+#ifdef DLIB_USE_CUDA
+        float* dest_p = dest.device_write_only() + sample_offset;
+        const float* src_p = src.device();
+#else
+        float* dest_p = dest.host_write_only() + sample_offset;
+        const float* src_p = src.host();
+#endif
+        for (unsigned long i = 0; i < src.num_samples(); ++i)
+        {
+#ifdef DLIB_USE_CUDA
+            CHECK_CUDA(cudaMemcpy(dest_p, src_p, src_sample_size * sizeof(float), cudaMemcpyDeviceToDevice));
+#else
+            ::memcpy(dest_p, src_p, src_sample_size * sizeof(float));
+#endif
+            dest_p += dest_sample_size;
+            src_p  += src_sample_size;
+        }
+    }
+    void split_depth(tensor& dest, size_t sample_offset, const tensor& src)
+    {
+        const size_t dest_sample_size = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
+        const size_t src_sample_size = static_cast<size_t>(src.nc() * src.nr() * src.k());
+        DLIB_CASSERT(dest.num_samples() == src.num_samples() &&
+                     dest.nc() == src.nc() && dest.nr() == src.nr(), "All sources should fit into dest tensor size");
+        DLIB_CASSERT(dest_sample_size <= src_sample_size - sample_offset, "Not enough space in dest tensor");
+#ifdef DLIB_USE_CUDA
+        float* dest_p = dest.device_write_only();
+        const float* src_p = src.device() + sample_offset;
+#else
+        float* dest_p = dest.host_write_only();
+        const float* src_p = src.host() + sample_offset;
+#endif
+        for (unsigned long i = 0; i < src.num_samples(); ++i)
+        {
+#ifdef DLIB_USE_CUDA
+            CHECK_CUDA(cudaMemcpy(dest_p, src_p, dest_sample_size * sizeof(float), cudaMemcpyDeviceToDevice));
+#else
+            ::memcpy(dest_p, src_p, dest_sample_size * sizeof(float));
+#endif
+            dest_p += dest_sample_size;
+            src_p  += src_sample_size;
+        }
+    }
 // ----------------------------------------------------------------------------------------
 }}

--- a/dlib/dnn/tensor_tools.h
+++ b/dlib/dnn/tensor_tools.h
@@ -1171,6 +1171,43 @@ namespace dlib { namespace tt
        resizable_tensor accum_buffer;
    };
+    // ----------------------------------------------------------------------------------------
+    void concat_depth(
+            tensor& dest,
+            size_t sample_offset,
+            const tensor& src
+    );
+    /*!
+        requires
+            - dest.nc() == src.nc()
+            - dest.nr() == src.nr()
+            - dest.num_samples() == src.num_samples()
+            - dest.k() >= src.k() + sample_offset
+            - is_same_object(dest,src) == false
+            - sample_offset a count of elements, not bytes
+        ensures
+            - performs: dest[i, k + sample_offset, r, c] = src[i, k, r, c], where k in [0..src.k()]
+              Copies content of each sample from src in to corresponding place of sample at dst
+    !*/
+    void split_depth(
+            tensor& dest,
+            size_t sample_offset,
+            const tensor& src
+    );
+    /*!
+        requires
+            - dest.nc() == src.nc()
+            - dest.nr() == src.nr()
+            - dest.num_samples() == src.num_samples()
+            - dest.k() <= src.k() - sample_offset
+            - is_same_object(dest,src) == false
+            - sample_offset a count of elements, not bytes
+        ensures
+            - performs: dest[i, k, r, c] = src[i, k  + sample_offset, r, c], where k in [0..dest.k()]
+              Fills each sample of dst from the corresponding part of each sample at src
+    !*/
 // ----------------------------------------------------------------------------------------

--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -33,6 +33,7 @@ ENDMACRO()
 if (COMPILER_CAN_DO_CPP_11)
   add_example(dnn_mnist_ex)
   add_example(dnn_mnist_advanced_ex)
+   add_example(dnn_inception_ex)
 endif()
 #here we apply our macros 

--- a/examples/dnn_inception_ex.cpp
+++ b/examples/dnn_inception_ex.cpp
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+    This is an example illustrating the use of the deep learning tools from the
+    dlib C++ Library.  I'm assuming you have already read the dnn_mnist_ex.cpp
+    example.  So in this example program I'm going to go over a number of more
+    advanced parts of the API, including:
+        - Using grp layer for constructing inception layer
+    Inception layer is a kind of NN architecture for running sevelar convolution types
+    on the same input area and joining all convolution results into one output.
+    For further reading refer http://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf
+*/
+#include <dlib/dnn.h>
+#include <iostream>
+#include <dlib/data_io.h>
+#include <tuple>
+using namespace std;
+using namespace dlib;
+// Here we define inception module as described in GoogLeNet specification. The depth of each sublayer can be changed
+template<typename SUBNET>
+using inception = grp<std::tuple<con<8,1,1,1,1, group_input>,
+                                 con<8,3,3,1,1, con<8,1,1,1,1, group_input>>,
+                                 con<8,5,5,1,1, con<8,1,1,1,1, group_input>>,
+                                 con<8,1,1,1,1, max_pool<3,3,1,1, group_input>>>,
+                      SUBNET>;
+int main(int argc, char** argv) try
+{
+    // This example is going to run on the MNIST dataset.  
+    if (argc != 2)
+    {
+        cout << "This example needs the MNIST dataset to run!" << endl;
+        cout << "You can get MNIST from http://yann.lecun.com/exdb/mnist/" << endl;
+        cout << "Download the 4 files that comprise the dataset, decompress them, and" << endl;
+        cout << "put them in a folder.  Then give that folder as input to this program." << endl;
+        return 1;
+    }
+    std::vector<matrix<unsigned char>> training_images;
+    std::vector<unsigned long>         training_labels;
+    std::vector<matrix<unsigned char>> testing_images;
+    std::vector<unsigned long>         testing_labels;
+    load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels);
+    // Create a the same network as in dnn_mnist_ex, but use inception layer insteam of convolution
+    // in the middle
+    using net_type = loss_multiclass_log<
+            fc<10,
+                    relu<fc<84,
+                            relu<fc<120,
+                                    max_pool<2,2,2,2,relu<inception<
+                                            max_pool<2,2,2,2,relu<con<6,5,5,1,1,
+                                                    input<matrix<unsigned char>>
+                                            >>>>>>>>>>>>;
+    // Create a network as defined above.  This network will produce 10 outputs
+    // because that's how we defined net_type.  However, fc layers can have the
+    // number of outputs they produce changed at runtime.
+    net_type net;
+    // the following training process is the same as in dnn_mnist_ex sample
+    // And then train it using the MNIST data.  The code below uses mini-batch stochastic
+    // gradient descent with an initial learning rate of 0.01 to accomplish this.
+    dnn_trainer<net_type> trainer(net);
+    trainer.set_learning_rate(0.01);
+    trainer.set_min_learning_rate(0.00001);
+    trainer.set_mini_batch_size(128);
+    trainer.be_verbose();
+    // Since DNN training can take a long time, we can ask the trainer to save its state to
+    // a file named "mnist_sync" every 20 seconds.  This way, if we kill this program and
+    // start it again it will begin where it left off rather than restarting the training
+    // from scratch.  This is because, when the program restarts, this call to
+    // set_synchronization_file() will automatically reload the settings from mnist_sync if
+    // the file exists.
+    trainer.set_synchronization_file("mnist_sync", std::chrono::seconds(20));
+    // Finally, this line begins training.  By default, it runs SGD with our specified
+    // learning rate until the loss stops decreasing.  Then it reduces the learning rate by
+    // a factor of 10 and continues running until the loss stops decreasing again.  It will
+    // keep doing this until the learning rate has dropped below the min learning rate
+    // defined above or the maximum number of epochs as been executed (defaulted to 10000). 
+    trainer.train(training_images, training_labels);
+    // At this point our net object should have learned how to classify MNIST images.  But
+    // before we try it out let's save it to disk.  Note that, since the trainer has been
+    // running images through the network, net will have a bunch of state in it related to
+    // the last batch of images it processed (e.g. outputs from each layer).  Since we
+    // don't care about saving that kind of stuff to disk we can tell the network to forget
+    // about that kind of transient data so that our file will be smaller.  We do this by
+    // "cleaning" the network before saving it.
+    net.clean();
+    serialize("mnist_network.dat") << net;
+    // Now if we later wanted to recall the network from disk we can simply say:
+    // deserialize("mnist_network.dat") >> net;
+    // Now let's run the training images through the network.  This statement runs all the
+    // images through it and asks the loss layer to convert the network's raw output into
+    // labels.  In our case, these labels are the numbers between 0 and 9.
+    std::vector<unsigned long> predicted_labels = net(training_images);
+    int num_right = 0;
+    int num_wrong = 0;
+    // And then let's see if it classified them correctly.
+    for (size_t i = 0; i < training_images.size(); ++i)
+    {
+        if (predicted_labels[i] == training_labels[i])
+            ++num_right;
+        else
+            ++num_wrong;
+    }
+    cout << "training num_right: " << num_right << endl;
+    cout << "training num_wrong: " << num_wrong << endl;
+    cout << "training accuracy:  " << num_right/(double)(num_right+num_wrong) << endl;
+    // Let's also see if the network can correctly classify the testing images.  Since
+    // MNIST is an easy dataset, we should see at least 99% accuracy.
+    predicted_labels = net(testing_images);
+    num_right = 0;
+    num_wrong = 0;
+    for (size_t i = 0; i < testing_images.size(); ++i)
+    {
+        if (predicted_labels[i] == testing_labels[i])
+            ++num_right;
+        else
+            ++num_wrong;
+    }
+    cout << "testing num_right: " << num_right << endl;
+    cout << "testing num_wrong: " << num_wrong << endl;
+    cout << "testing accuracy:  " << num_right/(double)(num_right+num_wrong) << endl;
+}
+catch(std::exception& e)
+{
+    cout << e.what() << endl;
+}