Added loss_multimulticlass_log_

ffaa322f · Davis King · 72fbed20 · ffaa322f · ffaa322f · ffaa322f
Commit ffaa322f authored Jan 27, 2018 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 565 additions and 0 deletions

loss.h dlib/dnn/loss.h +283 -0

loss_abstract.h dlib/dnn/loss_abstract.h +222 -0

dnn.cpp dlib/test/dnn.cpp +60 -0

No files found.
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -12,6 +12,7 @@
 #include "../image_processing/full_object_detection.h"
 #include "../svm/ranking_tools.h"
 #include <sstream>
+#include <map>
 namespace dlib
 {
@@ -365,6 +366,288 @@ namespace dlib
    template <typename SUBNET>
    using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>;
+// ----------------------------------------------------------------------------------------
+    class loss_multimulticlass_log_ 
+    {
+    public:
+        loss_multimulticlass_log_ () = default;
+        loss_multimulticlass_log_ (
+            const std::map<std::string,std::vector<std::string>>& labels
+        )
+        {
+            for (auto& l : labels)
+            {
+                possible_labels[l.first] = std::make_shared<decltype(l.second)>(l.second);
+                DLIB_CASSERT(l.second.size() >= 2, "Each classifier must have at least two possible labels.");
+                for (size_t i = 0; i < l.second.size(); ++i)
+                {
+                    label_idx_lookup[l.first][l.second[i]] = i;
+                    ++total_num_labels;
+                }
+            }
+        }
+        unsigned long number_of_labels() const { return total_num_labels; }
+        unsigned long number_of_classifiers() const { return possible_labels.size(); }
+        std::map<std::string,std::vector<std::string>> get_labels ( 
+        ) const 
+        {
+            std::map<std::string,std::vector<std::string>> info; 
+            for (auto& i : possible_labels)
+            {
+                for (auto& label : *i.second)
+                    info[i.first].emplace_back(label);
+            }
+            return info;
+        }
+        class classifier_output
+        {
+        public:
+            classifier_output() = default;
+            size_t num_classes() const { return class_probs.size(); }
+            double probability_of_class (
+                size_t i
+            ) const 
+            { 
+                DLIB_CASSERT(i < num_classes());
+                return class_probs(i); 
+            }
+            const std::string& label(
+                size_t i
+            ) const 
+            { 
+                DLIB_CASSERT(i < num_classes()); 
+                return (*_labels)[i]; 
+            }
+            operator std::string(
+            ) const
+            {
+                DLIB_CASSERT(num_classes() != 0); 
+                return (*_labels)[index_of_max(class_probs)];
+            }
+            friend std::ostream& operator<< (std::ostream& out, const classifier_output& item)
+            {
+                DLIB_ASSERT(item.num_classes() != 0); 
+                out << static_cast<std::string>(item);
+                return out;
+            }
+        private:
+            friend class loss_multimulticlass_log_;
+            template <typename EXP>
+            classifier_output(
+                const matrix_exp<EXP>& class_probs,
+                const std::shared_ptr<std::vector<std::string>>& _labels
+            ) : 
+                class_probs(class_probs), 
+                _labels(_labels)
+            {
+            }
+            matrix<float,1,0> class_probs;
+            std::shared_ptr<std::vector<std::string>> _labels;
+        };
+        typedef std::map<std::string,std::string> training_label_type;
+        typedef std::map<std::string,classifier_output> output_label_type;
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter_begin
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(output_tensor.nr() == 1 && 
+                         output_tensor.nc() == 1 );
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+            DLIB_CASSERT(number_of_labels() != 0, "You must give the loss_multimulticlass_log_'s constructor label data before you can use it!");
+            DLIB_CASSERT(output_tensor.k() == (long)number_of_labels(), "The output tensor must have " << number_of_labels() << " channels.");
+            long k_offset = 0;
+            for (auto& l : possible_labels)
+            {
+                auto iter = iter_begin;
+                const std::string& classifier_name = l.first;
+                const auto& labels = (*l.second); 
+                scratch.set_size(output_tensor.num_samples(), labels.size());
+                tt::copy_tensor(false, scratch, 0, output_tensor, k_offset, labels.size());
+                tt::softmax(scratch, scratch);
+                for (long i = 0; i < scratch.num_samples(); ++i)
+                    (*iter++)[classifier_name] = classifier_output(rowm(mat(scratch),i), l.second);
+                k_offset += labels.size();
+            }
+        }
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth_begin, 
+            SUBNET& sub
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() != 0);
+            DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+            DLIB_CASSERT(output_tensor.nr() == 1 && 
+                         output_tensor.nc() == 1);
+            DLIB_CASSERT(grad.nr() == 1 && 
+                         grad.nc() == 1);
+            DLIB_CASSERT(number_of_labels() != 0, "You must give the loss_multimulticlass_log_'s constructor label data before you can use it!");
+            DLIB_CASSERT(output_tensor.k() == (long)number_of_labels(), "The output tensor must have " << number_of_labels() << " channels.");
+            // The loss we output is the average loss over the mini-batch.
+            const double scale = 1.0/output_tensor.num_samples();
+            double loss = 0;
+            long k_offset = 0;
+            for (auto& l : label_idx_lookup)
+            {
+                const std::string& classifier_name = l.first;
+                const auto& int_labels = l.second; 
+                scratch.set_size(output_tensor.num_samples(), int_labels.size());
+                tt::copy_tensor(false, scratch, 0, output_tensor, k_offset, int_labels.size());
+                tt::softmax(scratch, scratch);
+                auto truth = truth_begin;
+                float* g = scratch.host();
+                for (long i = 0; i < scratch.num_samples(); ++i)
+                {
+                    const long y = int_labels.at(truth->at(classifier_name));
+                    ++truth;
+                    for (long k = 0; k < scratch.k(); ++k)
+                    {
+                        const unsigned long idx = i*scratch.k()+k;
+                        if (k == y)
+                        {
+                            loss += scale*-std::log(g[idx]);
+                            g[idx] = scale*(g[idx]-1);
+                        }
+                        else
+                        {
+                            g[idx] = scale*g[idx];
+                        }
+                    }
+                }
+                tt::copy_tensor(false, grad, k_offset, scratch, 0, int_labels.size());
+                k_offset += int_labels.size();
+            }
+            return loss;
+        }
+        friend void serialize(const loss_multimulticlass_log_& item, std::ostream& out)
+        {
+            serialize("loss_multimulticlass_log_", out);
+            serialize(item.get_labels(), out);
+        }
+        friend void deserialize(loss_multimulticlass_log_& item, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "loss_multimulticlass_log_")
+                throw serialization_error("Unexpected version found while deserializing dlib::loss_multimulticlass_log_.");
+            std::map<std::string,std::vector<std::string>> info; 
+            deserialize(info, in);
+            item = loss_multimulticlass_log_(info);
+        }
+        friend std::ostream& operator<<(std::ostream& out, const loss_multimulticlass_log_& item)
+        {
+            out << "loss_multimulticlass_log, labels={";
+            for (auto i = item.possible_labels.begin(); i != item.possible_labels.end(); )
+            {
+                auto& category = i->first;
+                auto& labels = *(i->second);
+                out << category << ":(";
+                for (size_t j = 0; j < labels.size(); ++j)
+                {
+                    out << labels[j];
+                    if (j+1 < labels.size())
+                        out << ",";
+                }
+                out << ")";
+                if (++i != item.possible_labels.end())
+                    out << ", ";
+            }
+            out << "}";
+            return out;
+        }
+        friend void to_xml(const loss_multimulticlass_log_& item, std::ostream& out)
+        {
+            out << "<loss_multimulticlass_log>\n";
+            out << item;
+            out << "\n</loss_multimulticlass_log>";
+        }
+    private:
+        std::map<std::string,std::shared_ptr<std::vector<std::string>>> possible_labels;
+        unsigned long total_num_labels = 0;
+        // We make it true that: possible_labels[classifier][label_idx_lookup[classifier][label]] == label
+        std::map<std::string, std::map<std::string,long>> label_idx_lookup;
+        // Scratch doesn't logically contribute to the state of this object.  It's just
+        // temporary scratch space used by this class.  
+        mutable resizable_tensor scratch;
+    };
+    template <typename SUBNET>
+    using loss_multimulticlass_log = add_loss_layer<loss_multimulticlass_log_, SUBNET>;
+    inline bool operator== (const std::string& lhs, const loss_multimulticlass_log_::classifier_output& rhs)
+    { return lhs == static_cast<const std::string&>(rhs); }
+    inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
+    { return rhs == static_cast<const std::string&>(lhs); }
 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

--- a/dlib/dnn/loss_abstract.h
+++ b/dlib/dnn/loss_abstract.h
@@ -362,6 +362,228 @@ namespace dlib
    template <typename SUBNET>
    using loss_multiclass_log = add_loss_layer<loss_multiclass_log_, SUBNET>;
+// ----------------------------------------------------------------------------------------
+    class loss_multimulticlass_log_ 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object implements the loss layer interface defined above by
+                EXAMPLE_LOSS_LAYER_.  In particular, it implements a collection of
+                multiclass classifiers.  An example will make its use clear.  So suppose,
+                for example, that you want to make something that takes a picture of a
+                vehicle and answers the following questions:
+                    - What type of vehicle is it? A sedan or a truck?
+                    - What color is it? red, green, blue, gray, or black?
+                You need two separate multi-class classifiers to do this.  One to decide
+                the type of vehicle, and another to decide the color.  The
+                loss_multimulticlass_log_ allows you to pack these two classifiers into one
+                neural network.  This means that when you use the network to process an
+                image it will output 2 labels for each image, the type label and the color
+                label.  
+                To create a loss_multimulticlass_log_ for the above case you would
+                construct it as follows:
+                    std::map<std::string,std::vector<std::string>> labels;
+                    labels["type"] = {"sedan", "truck"};
+                    labels["color"] = {"red", "green", "blue", "gray", "black"};
+                    loss_multimulticlass_log_ myloss(labels);
+                Then you could use myloss with a network object and train it to do this
+                task.  More generally, you can use any number of classifiers and labels
+                when using this object.  Finally, each of the classifiers uses a standard
+                multi-class logistic regression loss.
+        !*/
+    public:
+        loss_multimulticlass_log_(
+        ); 
+        /*!
+            ensures
+                - #number_of_labels() == 0
+                - #get_labels().size() == 0
+        !*/
+        loss_multimulticlass_log_ (
+            const std::map<std::string,std::vector<std::string>>& labels
+        );
+        /*!
+            requires
+                - Each vector in labels must contain at least 2 strings.  I.e. each
+                  classifier must have at least two possible labels.
+            ensures
+                - #number_of_labels() == the total number of strings in all the
+                  std::vectors in labels.
+                - #number_of_classifiers() == labels.size()
+                - #get_labels() == labels
+        !*/
+        unsigned long number_of_labels(
+        ) const; 
+        /*!
+            ensures
+                - returns the total number of labels known to this loss.  This is the count of 
+                  all the labels in each classifier.
+        !*/
+        unsigned long number_of_classifiers(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of classifiers defined by this loss.
+        !*/
+        std::map<std::string,std::vector<std::string>> get_labels ( 
+        ) const;
+        /*!
+            ensures
+                - returns the names of the classifiers and labels used by this loss.  In
+                  particular, if the returned object is L then:
+                    - L[CLASS] == the set of labels used by the classifier CLASS.
+                    - L.size() == number_of_classifiers()
+                    - The count of strings in the vectors in L == number_of_labels()
+        !*/
+        class classifier_output
+        {
+            /*!
+                WHAT THIS OBJECT REPRESENTS
+                    This object stores the predictions from one of the classifiers in
+                    loss_multimulticlass_log_.  It allows you to find out the most likely
+                    string label predicted by that classifier, as well as get the class
+                    conditional probability of any of the classes in the classifier.
+            !*/
+        public:
+            classifier_output(
+            );
+            /*!
+                ensures
+                    - #num_classes() == 0
+            !*/
+            size_t num_classes(
+            ) const; 
+            /*!
+                ensures
+                    - returns the number of possible classes output by this classifier.
+            !*/
+            double probability_of_class (
+                size_t i
+            ) const;
+            /*!
+                requires
+                    - i < num_classes()
+                ensures
+                    - returns the probability that the true class has a label of label(i).
+                    - The sum of probability_of_class(j) for j in the range [0, num_classes()) is always 1.
+            !*/
+            const std::string& label(
+                size_t i
+            ) const;
+            /*!
+                requires
+                    - i < num_classes()
+                ensures
+                    - returns the string label for the ith class.
+            !*/
+            operator std::string(
+            ) const;
+            /*!
+                requires
+                    - num_classes() != 0
+                ensures
+                    - returns the string label for the most probable class.
+            !*/
+            friend std::ostream& operator<< (std::ostream& out, const classifier_output& item);
+            /*!
+                requires
+                    - num_classes() != 0
+                ensures
+                    - prints the most probable class label to out.
+            !*/
+        };
+        // Both training_label_type and output_label_type should always have sizes equal to
+        // number_of_classifiers().  That is, the std::map should have an entry for every
+        // classifier known to this loss.
+        typedef std::map<std::string,std::string> training_label_type;
+        typedef std::map<std::string,classifier_output> output_label_type;
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
+            it has the additional calling requirements that: 
+                - number_of_labels() != 0
+                - sub.get_output().k() == number_of_labels()
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+        !*/
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth, 
+            SUBNET& sub
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient() 
+            except it has the additional calling requirements that: 
+                - number_of_labels() != 0
+                - sub.get_output().k() == number_of_labels()
+                    It should be noted that the last layer in your network should usually
+                    be an fc layer.  If so, you can satisfy this requirement of k() being
+                    number_of_labels() by calling set_num_outputs() prior to training your
+                    network like so:
+                    your_network.subnet().layer_details().set_num_outputs(your_network.loss_details().number_of_labels());
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+                - All the std::maps pointed to by truth contain entries for all the
+                  classifiers known to this loss.  That is, it must be valid to call
+                  truth[i][classifier] for any of the classifiers known to this loss.  To
+                  say this another way, all the training samples must contain labels for
+                  each of the classifiers defined by this loss.
+                  To really belabor this, this also means that truth[i].size() ==
+                  get_labels().size() and that both truth[i] and get_labels() have the same
+                  set of key strings.  It also means that the value strings in truth[i]
+                  must be strings known to the loss, i.e. they are valid labels according
+                  to get_labels().
+        !*/
+    };
+    template <typename SUBNET>
+    using loss_multimulticlass_log = add_loss_layer<loss_multimulticlass_log_, SUBNET>;
+    // Allow comparison between classifier_outputs and std::string to check if the
+    // predicted class is a particular string.
+    inline bool operator== (const std::string& lhs, const loss_multimulticlass_log_::classifier_output& rhs)
+    { return lhs == static_cast<const std::string&>(rhs); }
+    inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
+    { return rhs == static_cast<const std::string&>(lhs); }
 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -3094,6 +3094,65 @@ namespace
        }
    }
+// ----------------------------------------------------------------------------------------
+    void test_loss_multimulticlass_log()
+    {
+        print_spinner();
+        std::map<string,std::vector<string>> all_labels;
+        all_labels["c1"] = {"a", "b", "c"};
+        all_labels["c2"] = {"d", "e", "f"};
+        // make training data
+        std::vector<matrix<float>> samples;
+        std::vector<std::map<string,string>> labels;
+        for (int i = 0; i < 3; ++i)
+        {
+            for (int j = 0; j < 3; ++j)
+            {
+                matrix<float> samp(2,3);
+                samp = 0;
+                samp(0,i) = 1;
+                samp(1,j) = 1;
+                samples.push_back(samp);
+                std::map<string,string> l;
+                if (i == 0) l["c1"] = "a";
+                if (i == 1) l["c1"] = "b";
+                if (i == 2) l["c1"] = "c";
+                if (j == 0) l["c2"] = "d";
+                if (j == 1) l["c2"] = "e";
+                if (j == 2) l["c2"] = "f";
+                labels.push_back(l);
+            }
+        }
+        using net_type = loss_multimulticlass_log<
+            fc<1,        
+            input<matrix<float>> 
+            >>;
+        net_type net(all_labels);
+        net.subnet().layer_details().set_num_outputs(net.loss_details().number_of_labels());
+        dnn_trainer<net_type> trainer(net, sgd(0.1));
+        trainer.set_learning_rate(0.1);
+        trainer.set_min_learning_rate(0.00001);
+        trainer.set_iterations_without_progress_threshold(500);
+        trainer.train(samples, labels);
+        auto predicted_labels = net(samples);
+        // make sure the network predicts the right labels
+        for (size_t i = 0; i < samples.size(); ++i)
+        {
+            DLIB_TEST(predicted_labels[i]["c1"] == labels[i]["c1"]);
+            DLIB_TEST(predicted_labels[i]["c2"] == labels[i]["c2"]);
+        }
+    }
 // ----------------------------------------------------------------------------------------
    class dnn_tester : public tester
@@ -3182,6 +3241,7 @@ namespace
            test_loss_multiclass_per_pixel_weighted();
            test_serialization();
            test_loss_dot();
+            test_loss_multimulticlass_log();
        }
        void perform_test()