Commit 47fbaff0 authored by Davis King's avatar Davis King

Added elastic_net solver.

parent 5ec306d2
This diff is collapsed.
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_ElASTIC_NET_ABSTRACT_Hh_
#ifdef DLIB_ElASTIC_NET_ABSTRACT_Hh_
#include "../matrix.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class elastic_net
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool for solving the following optimization problem:
min_w: length_squared(X*w - Y) + ridge_lambda*length_squared(w)
such that: sum(abs(w)) <= lasso_budget
That is, it solves the elastic net optimization problem. This object also
has the special property that you can quickly obtain different solutions
for different settings of ridge_lambda, lasso_budget, and target Y values.
This is because a large amount of work is precomputed in the constructor.
The solver will also remember the previous solution and will use that to
warm start subsequent invocations. Therefore, you can efficiently get
solutions for a wide range of regularization parameters.
The particular algorithm used to solve it is described in the paper:
Zhou, Quan, et al. "A reduction of the elastic net to support vector
machines with an application to gpu computing." arXiv preprint
arXiv:1409.1976 (2014). APA
And for the SVM solver sub-component we use the algorithm from:
Hsieh, Cho-Jui, et al. "A dual coordinate descent method for large-scale
linear SVM." Proceedings of the 25th international conference on Machine
learning. ACM, 2008.
!*/
public:
template <typename EXP>
explicit elastic_net(
const matrix_exp<EXP>& X
);
/*!
requires
- X.size() != 0
ensures
- #get_epsilon() == 1e-5
- #get_max_iterations() == 50000
- this object will not be verbose unless be_verbose() is called
- #size() == X.nc()
- #have_target_values() == false
!*/
template <typename EXP1, typename EXP2>
elastic_net(
const matrix_exp<EXP1>& X,
const matrix_exp<EXP2>& Y
);
/*!
requires
- X.size() != 0
- is_col_vector(Y)
- X.nc() == Y.size()
ensures
- constructs this object by calling the elastic_net(X) constructor and then
calling this->set_y(Y).
- #have_target_values() == true
!*/
long size (
) const;
/*!
ensures
- returns the number of samples loaded into this object.
!*/
bool have_target_values (
) const;
/*!
ensures
- returns true if set_y() has been called and false otherwise.
!*/
template <typename EXP>
void set_y(
const matrix_exp<EXP>& Y
);
/*!
requires
- is_col_vector(Y)
- Y.size() == size()
ensures
- #have_target_values() == true
- Sets the target values, the Y variable in the objective function, to the
given Y.
!*/
void set_epsilon(
double eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
double get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when the solver should stop.
Smaller values may result in a more accurate solution but take longer to
execute.
!*/
unsigned long get_max_iterations (
) const;
/*!
ensures
- returns the maximum number of iterations the optimizer is allowed to run
before it is required to stop and return a result.
!*/
void set_max_iterations (
unsigned long max_iter
);
/*!
ensures
- #get_max_iterations() == max_iter
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out.
!*/
matrix<double,0,1> operator() (
double ridge_lambda,
double lasso_budget = std::numeric_limits<double>::infinity()
);
/*!
requires
- have_target_values() == true
- ridge_lambda > 0
- lasso_budget > 0
ensures
- Solves the optimization problem described in the WHAT THIS OBJECT
REPRESENTS section above and returns the optimal w.
- if (lasso_budget == infinity) then
- The lasso constraint is ignored
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_ElASTIC_NET_ABSTRACT_Hh_
......@@ -156,6 +156,7 @@ if (COMPILER_CAN_DO_CPP_11)
dnn.cpp
cublas.cpp
find_optimal_parameters.cpp
elastic_net.cpp
)
endif()
......
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/optimization/elastic_net.h>
#include "tester.h"
#include <dlib/svm.h>
#include <dlib/rand.h>
#include <dlib/string.h>
#include <vector>
#include <sstream>
#include <ctime>
namespace
{
using namespace test;
using namespace dlib;
using namespace std;
dlib::logger dlog("test.elastic_net");
// ----------------------------------------------------------------------------------------
matrix<double,0,1> basic_elastic_net(
const matrix<double>& X,
const matrix<double,0,1>& Y,
double ridge_lambda,
double lasso_budget,
double eps
)
{
DLIB_CASSERT(X.nc() == Y.nr(),"");
typedef matrix<double,0,1> sample_type;
typedef linear_kernel<sample_type> kernel_type;
svm_c_linear_dcd_trainer<kernel_type> trainer;
trainer.solve_svm_l2_problem(true);
const double C = 1/(2*ridge_lambda);
trainer.set_c(C);
trainer.set_epsilon(eps);
trainer.enable_shrinking(true);
trainer.include_bias(false);
std::vector<sample_type> samples;
std::vector<double> labels;
for (long r = 0; r < X.nr(); ++r)
{
sample_type temp = trans(rowm(X,r));
const double xmul = (1/lasso_budget);
samples.push_back(temp - xmul*Y);
labels.push_back(+1);
samples.push_back(temp + xmul*Y);
labels.push_back(-1);
}
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
auto df = trainer.train(samples, labels, state);
auto&& alpha = state.get_alpha();
matrix<double,0,1> betas(alpha.size()/2);
for (long i = 0; i < betas.size(); ++i)
betas(i) = lasso_budget*(alpha[2*i] - alpha[2*i+1]);
betas /= sum(mat(alpha));
return betas;
}
// ----------------------------------------------------------------------------------------
class test_elastic_net : public tester
{
public:
test_elastic_net (
) :
tester (
"test_elastic_net",
"Run tests on the elastic_net object.",
0
)
{
}
void perform_test (
)
{
matrix<double> w = {1,2,0,4, 0,0,0,0,0, 6, 7,8,0, 9, 0};
matrix<double> X = randm(w.size(),1000);
matrix<double> Y = trans(X)*w;
Y += 0.1*(randm(Y.nr(), Y.nc())-0.5);
double ridge_lambda = 0.1;
double lasso_budget = sum(abs(w));
double eps = 0.0000001;
dlib::elastic_net solver(X,Y);
solver.set_epsilon(eps);
matrix<double,0,1> results;
matrix<double,0,1> results2;
for (double s = 1.2; s > 0.10; s *= 0.9)
{
print_spinner();
dlog << LINFO << "s: "<< s;
// make sure the two solvers agree.
results = basic_elastic_net(X, Y, ridge_lambda, lasso_budget*s, eps);
results2 = solver(ridge_lambda, lasso_budget*s);
dlog << LINFO << "error: "<< max(abs(results - results2));
DLIB_TEST(max(abs(results - results2) < 1e-3));
}
}
} a;
// ----------------------------------------------------------------------------------------
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment