Added elastic_net solver.

47fbaff0 · Davis King · 5ec306d2 · 47fbaff0 · 47fbaff0 · 47fbaff0
Commit 47fbaff0 authored Apr 25, 2016 by Davis King
4 changed files
--- a/dlib/optimization/elastic_net.h
+++ b/dlib/optimization/elastic_net.h
--- a/dlib/optimization/elastic_net_abstract.h
+++ b/dlib/optimization/elastic_net_abstract.h
+// Copyright (C) 2016  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_ElASTIC_NET_ABSTRACT_Hh_
+#ifdef DLIB_ElASTIC_NET_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class elastic_net
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for solving the following optimization problem:
+
+                    min_w:      length_squared(X*w - Y) + ridge_lambda*length_squared(w)
+                    such that:  sum(abs(w)) <= lasso_budget
+
+                That is, it solves the elastic net optimization problem.  This object also
+                has the special property that you can quickly obtain different solutions
+                for different settings of ridge_lambda, lasso_budget, and target Y values.
+
+                This is because a large amount of work is precomputed in the constructor.
+                The solver will also remember the previous solution and will use that to
+                warm start subsequent invocations.  Therefore, you can efficiently get
+                solutions for a wide range of regularization parameters.
+                
+                
+                The particular algorithm used to solve it is described in the paper:
+                    Zhou, Quan, et al. "A reduction of the elastic net to support vector
+                    machines with an application to gpu computing." arXiv preprint
+                    arXiv:1409.1976 (2014).  APA 
+
+                And for the SVM solver sub-component we use the algorithm from:
+                    Hsieh, Cho-Jui, et al. "A dual coordinate descent method for large-scale
+                    linear SVM." Proceedings of the 25th international conference on Machine
+                    learning. ACM, 2008. 
+        !*/
+
+    public:
+
+        template <typename EXP>
+        explicit elastic_net(
+            const matrix_exp<EXP>& X
+        ); 
+        /*!
+            requires
+                - X.size() != 0
+            ensures
+                - #get_epsilon() == 1e-5
+                - #get_max_iterations() == 50000
+                - this object will not be verbose unless be_verbose() is called
+                - #size() == X.nc()
+                - #have_target_values() == false
+        !*/
+
+        template <typename EXP1, typename EXP2>
+        elastic_net(
+            const matrix_exp<EXP1>& X,
+            const matrix_exp<EXP2>& Y
+        ); 
+        /*!
+            requires
+                - X.size() != 0
+                - is_col_vector(Y)
+                - X.nc() == Y.size()
+            ensures
+                - constructs this object by calling the elastic_net(X) constructor and then
+                  calling this->set_y(Y).
+                - #have_target_values() == true 
+        !*/
+
+        long size (
+        ) const; 
+        /*!
+            ensures
+                - returns the number of samples loaded into this object.  
+        !*/
+
+        bool have_target_values (
+        ) const;
+        /*!
+            ensures
+                - returns true if set_y() has been called and false otherwise.
+        !*/
+
+        template <typename EXP>
+        void set_y(
+            const matrix_exp<EXP>& Y
+        );
+        /*!
+            requires
+                - is_col_vector(Y)
+                - Y.size() == size()
+            ensures
+                - #have_target_values() == true
+                - Sets the target values, the Y variable in the objective function, to the
+                  given Y.
+        !*/
+
+        void set_epsilon(
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when the solver should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  execute.  
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const; 
+        /*!
+            ensures
+                - returns the maximum number of iterations the optimizer is allowed to run
+                  before it is required to stop and return a result.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out.
+        !*/
+
+
+        matrix<double,0,1> operator() (
+            double ridge_lambda,
+            double lasso_budget = std::numeric_limits<double>::infinity()
+        );
+        /*!
+            requires
+                - have_target_values() == true
+                - ridge_lambda > 0
+                - lasso_budget > 0
+            ensures
+                - Solves the optimization problem described in the WHAT THIS OBJECT
+                  REPRESENTS section above and returns the optimal w.
+                - if (lasso_budget == infinity) then
+                    - The lasso constraint is ignored 
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ElASTIC_NET_ABSTRACT_Hh_
+
+
--- a/dlib/test/CMakeLists.txt
+++ b/dlib/test/CMakeLists.txt
@@ -156,6 +156,7 @@ if (COMPILER_CAN_DO_CPP_11)
      dnn.cpp
      cublas.cpp
      find_optimal_parameters.cpp
+      elastic_net.cpp
      )
 endif()


--- a/dlib/test/elastic_net.cpp
+++ b/dlib/test/elastic_net.cpp
+// Copyright (C) 2016  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+#include <dlib/optimization/elastic_net.h>
+#include "tester.h"
+#include <dlib/svm.h>
+#include <dlib/rand.h>
+#include <dlib/string.h>
+#include <vector>
+#include <sstream>
+#include <ctime>
+
+namespace  
+{
+    using namespace test;
+    using namespace dlib;
+    using namespace std;
+    dlib::logger dlog("test.elastic_net");
+
+// ----------------------------------------------------------------------------------------
+
+    matrix<double,0,1> basic_elastic_net(
+        const matrix<double>& X,
+        const matrix<double,0,1>& Y,
+        double ridge_lambda,
+        double lasso_budget,
+        double eps
+    )
+    {
+        DLIB_CASSERT(X.nc() == Y.nr(),"");
+
+
+        typedef matrix<double,0,1> sample_type;
+        typedef linear_kernel<sample_type> kernel_type;
+
+        svm_c_linear_dcd_trainer<kernel_type> trainer;
+        trainer.solve_svm_l2_problem(true);
+        const double C = 1/(2*ridge_lambda);
+        trainer.set_c(C);
+        trainer.set_epsilon(eps);
+        trainer.enable_shrinking(true);
+        trainer.include_bias(false);
+
+
+        std::vector<sample_type> samples;
+        std::vector<double> labels;
+        for (long r = 0; r < X.nr(); ++r)
+        {
+            sample_type temp = trans(rowm(X,r));
+
+            const double xmul = (1/lasso_budget);
+            samples.push_back(temp - xmul*Y);
+            labels.push_back(+1);
+            samples.push_back(temp + xmul*Y);
+            labels.push_back(-1);
+        }
+
+        svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
+        auto df = trainer.train(samples, labels, state);
+        auto&& alpha = state.get_alpha();
+
+        matrix<double,0,1> betas(alpha.size()/2);
+        for (long i = 0; i < betas.size(); ++i)
+            betas(i) = lasso_budget*(alpha[2*i] - alpha[2*i+1]);
+        betas /= sum(mat(alpha));
+        return betas;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    class test_elastic_net : public tester
+    {
+    public:
+        test_elastic_net (
+        ) :
+            tester (
+                "test_elastic_net",       
+                "Run tests on the elastic_net object.", 
+                0                     
+            )
+        {
+        }
+
+        void perform_test (
+        )
+        {
+            matrix<double> w = {1,2,0,4, 0,0,0,0,0, 6, 7,8,0, 9, 0};
+
+            matrix<double> X = randm(w.size(),1000);
+            matrix<double> Y = trans(X)*w;
+            Y += 0.1*(randm(Y.nr(), Y.nc())-0.5);
+
+
+            double ridge_lambda = 0.1;
+            double lasso_budget = sum(abs(w));
+            double eps = 0.0000001;
+
+            dlib::elastic_net solver(X,Y);
+            solver.set_epsilon(eps);
+
+
+            matrix<double,0,1> results;
+            matrix<double,0,1> results2;
+            for (double s = 1.2; s > 0.10; s *= 0.9)
+            {
+                print_spinner();
+                dlog << LINFO << "s: "<< s;
+                // make sure the two solvers agree.  
+                results = basic_elastic_net(X, Y, ridge_lambda, lasso_budget*s, eps);
+                results2 = solver(ridge_lambda, lasso_budget*s);
+                dlog << LINFO << "error: "<< max(abs(results - results2));
+                DLIB_TEST(max(abs(results - results2) < 1e-3));
+            }
+        }
+    } a;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+
+