Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
47fbaff0
Commit
47fbaff0
authored
Apr 25, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added elastic_net solver.
parent
5ec306d2
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
682 additions
and
0 deletions
+682
-0
elastic_net.h
dlib/optimization/elastic_net.h
+380
-0
elastic_net_abstract.h
dlib/optimization/elastic_net_abstract.h
+179
-0
CMakeLists.txt
dlib/test/CMakeLists.txt
+1
-0
elastic_net.cpp
dlib/test/elastic_net.cpp
+122
-0
No files found.
dlib/optimization/elastic_net.h
0 → 100644
View file @
47fbaff0
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_ElASTIC_NET_Hh_
#define DLIB_ElASTIC_NET_Hh_
#include "../matrix.h"
#include "elastic_net_abstract.h"
namespace
dlib
{
// ----------------------------------------------------------------------------------------
class
elastic_net
{
public
:
template
<
typename
EXP
>
explicit
elastic_net
(
const
matrix_exp
<
EXP
>&
X_
)
:
eps
(
1e-5
),
max_iterations
(
50000
),
verbose
(
false
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
X_
.
size
()
>
0
,
"
\t
elastic_net::elastic_net(X)"
<<
"
\n\t
X can't be empty"
<<
"
\n\t
this: "
<<
this
);
// If the number of columns in X is big and in particular bigger than the number of
// rows then we can get rid of them by doing some SVD magic. Doing this doesn't
// make the final results of anything change but makes all the matrices have
// dimensions that are X.nr() in size, which can be much smaller.
matrix
<
double
,
0
,
1
>
s
;
svd3
(
trans
(
X_
),
u
,
s
,
eig_vects
);
X
=
eig_vects
*
diagm
(
s
);
// Later on, we will use the eigenvalues of X*trans(X) to compute the solution
// without lasso. So we save them here.
eig_vals
=
squared
(
s
);
samples
.
resize
(
X
.
nr
()
*
2
);
for
(
size_t
i
=
0
;
i
<
samples
.
size
();
++
i
)
index
.
push_back
(
i
);
active_size
=
index
.
size
();
// setup the training samples used in the SVM optimizer below
for
(
size_t
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
auto
&
x
=
samples
[
i
];
const
long
idx
=
i
/
2
;
if
(
i
%
2
==
0
)
x
.
label
=
+
1
;
else
x
.
label
=
-
1
;
x
.
r
=
idx
%
X
.
nr
();
}
}
template
<
typename
EXP1
,
typename
EXP2
>
elastic_net
(
const
matrix_exp
<
EXP1
>&
X_
,
const
matrix_exp
<
EXP2
>&
Y_
)
:
elastic_net
(
X_
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
X_
.
size
()
>
0
&&
is_col_vector
(
Y_
)
&&
X_
.
nc
()
==
Y_
.
size
()
,
"
\t
elastic_net::elastic_net(X,Y)"
<<
"
\n\t
Invalid inputs were given to this function."
<<
"
\n\t
X_.size(): "
<<
X_
.
size
()
<<
"
\n\t
is_col_vector(Y_): "
<<
is_col_vector
(
Y_
)
<<
"
\n\t
X_.nc(): "
<<
X_
.
nc
()
<<
"
\n\t
Y_.size(): "
<<
Y_
.
size
()
<<
"
\n\t
this: "
<<
this
);
set_y
(
Y_
);
}
long
size
(
)
const
{
return
u
.
nr
();
}
template
<
typename
EXP
>
void
set_y
(
const
matrix_exp
<
EXP
>&
Y_
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
is_col_vector
(
Y_
)
&&
Y_
.
size
()
==
size
(),
"
\t
void elastic_net::set_y(Y)"
<<
"
\n\t
Invalid inputs were given to this function."
<<
"
\n\t
is_col_vector(Y_): "
<<
is_col_vector
(
Y_
)
<<
"
\n\t
size(): "
<<
size
()
<<
"
\n\t
Y_.size(): "
<<
Y_
.
size
()
<<
"
\n\t
this: "
<<
this
);
ynorm
=
length_squared
(
Y_
);
Y
=
trans
(
u
)
*
Y_
;
xdoty
=
X
*
Y
;
eig_vects_xdoty
=
trans
(
eig_vects
)
*
xdoty
;
w
.
set_size
(
Y
.
size
());
// zero out any memory of previous solutions
alpha
.
assign
(
X
.
nr
()
*
2
,
0
);
}
bool
have_target_values
(
)
const
{
return
Y
.
size
()
!=
0
;
}
void
set_epsilon
(
double
eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
eps_
>
0
,
"
\t
void elastic_net::set_epsilon()"
<<
"
\n\t
eps_ must be greater than 0"
<<
"
\n\t
eps_: "
<<
eps_
<<
"
\n\t
this: "
<<
this
);
eps
=
eps_
;
}
unsigned
long
get_max_iterations
(
)
const
{
return
max_iterations
;
}
void
set_max_iterations
(
unsigned
long
max_iter
)
{
max_iterations
=
max_iter
;
}
void
be_verbose
(
)
{
verbose
=
true
;
}
void
be_quiet
(
)
{
verbose
=
false
;
}
double
get_epsilon
(
)
const
{
return
eps
;
}
matrix
<
double
,
0
,
1
>
operator
()
(
double
ridge_lambda
,
double
lasso_budget
=
std
::
numeric_limits
<
double
>::
infinity
()
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
have_target_values
()
&&
ridge_lambda
>
0
&&
lasso_budget
>
0
,
"
\t
matrix<double,0,1> elastic_net::operator()()"
<<
"
\n\t
Invalid inputs were given to this function."
<<
"
\n\t
have_target_values(): "
<<
have_target_values
()
<<
"
\n\t
ridge_lambda: "
<<
ridge_lambda
<<
"
\n\t
lasso_budget: "
<<
lasso_budget
<<
"
\n\t
this: "
<<
this
);
// First check if lasso_budget is so big that it isn't even active. We do this
// by doing just ridge regression and checking the result.
matrix
<
double
,
0
,
1
>
betas
=
eig_vects
*
tmp
(
inv
(
diagm
(
eig_vals
+
ridge_lambda
))
*
eig_vects_xdoty
);
if
(
sum
(
abs
(
betas
))
<=
lasso_budget
)
return
betas
;
// Set w back to 0. We will compute the w corresponding to what is currently
// in alpha layer on. This way w and alpha are always in sync.
w
=
0
;
wy_mult
=
0
;
wdoty
=
0
;
// return dot(w,x)
auto
dot
=
[
&
](
const
matrix
<
double
,
0
,
1
>&
w
,
const
en_sample2
&
x
)
{
const
double
xmul
=
-
x
.
label
*
(
1
/
lasso_budget
);
// Do the base dot product but don't forget to add in the -(1/t)*y part from the svm reduction paper
double
val
=
rowm
(
X
,
x
.
r
)
*
w
+
xmul
*
wdoty
+
wy_mult
*
xdoty
(
x
.
r
)
+
xmul
*
wy_mult
*
ynorm
;
return
val
;
};
// perform w += scale*x;
auto
add_to
=
[
&
](
matrix
<
double
,
0
,
1
>&
w
,
double
scale
,
const
en_sample2
&
x
)
{
const
double
xmul
=
-
x
.
label
*
(
1
/
lasso_budget
);
wy_mult
+=
scale
*
xmul
;
wdoty
+=
scale
*
xdoty
(
x
.
r
);
w
+=
scale
*
trans
(
rowm
(
X
,
x
.
r
));
};
const
double
Dii
=
ridge_lambda
;
// setup the training samples used in the SVM optimizer below
for
(
size_t
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
auto
&
x
=
samples
[
i
];
const
double
xmul
=
-
x
.
label
*
(
1
/
lasso_budget
);
x
.
xdotx
=
xmul
*
xmul
*
ynorm
;
for
(
long
c
=
0
;
c
<
X
.
nc
();
++
c
)
x
.
xdotx
+=
std
::
pow
(
X
(
x
.
r
,
c
)
+
xmul
*
Y
(
c
),
2
.
0
)
-
std
::
pow
(
xmul
*
Y
(
c
),
2
.
0
);
// compute the correct w given whatever might be in alpha.
if
(
alpha
[
i
]
!=
0
)
add_to
(
w
,
x
.
label
*
alpha
[
i
],
samples
[
i
]);
}
// Now run the optimizer
double
PG_max_prev
=
std
::
numeric_limits
<
double
>::
infinity
();
double
PG_min_prev
=
-
std
::
numeric_limits
<
double
>::
infinity
();
unsigned
int
iter
;
for
(
iter
=
0
;
iter
<
max_iterations
;
++
iter
)
{
// randomly shuffle the indices
for
(
unsigned
long
i
=
0
;
i
<
active_size
;
++
i
)
{
// pick a random index >= i
const
long
j
=
i
+
rnd
.
get_random_32bit_number
()
%
(
active_size
-
i
);
std
::
swap
(
index
[
i
],
index
[
j
]);
}
double
PG_max
=
-
std
::
numeric_limits
<
double
>::
infinity
();
double
PG_min
=
std
::
numeric_limits
<
double
>::
infinity
();
for
(
size_t
ii
=
0
;
ii
<
active_size
;
++
ii
)
{
const
auto
i
=
index
[
ii
];
const
auto
&
x
=
samples
[
i
];
double
G
=
x
.
label
*
dot
(
w
,
x
)
-
1
+
Dii
*
alpha
[
i
];
double
PG
=
0
;
if
(
alpha
[
i
]
==
0
)
{
if
(
G
>
PG_max_prev
)
{
// shrink the active set of training examples
--
active_size
;
std
::
swap
(
index
[
ii
],
index
[
active_size
]);
--
ii
;
continue
;
}
if
(
G
<
0
)
PG
=
G
;
}
else
{
PG
=
G
;
}
if
(
PG
>
PG_max
)
PG_max
=
PG
;
if
(
PG
<
PG_min
)
PG_min
=
PG
;
// if PG != 0
if
(
std
::
abs
(
PG
)
>
1e-12
)
{
const
double
alpha_old
=
alpha
[
i
];
alpha
[
i
]
=
std
::
max
(
alpha
[
i
]
-
G
/
(
x
.
xdotx
+
Dii
),
(
double
)
0
.
0
);
const
double
delta
=
(
alpha
[
i
]
-
alpha_old
)
*
x
.
label
;
add_to
(
w
,
delta
,
x
);
}
}
if
(
verbose
)
{
using
namespace
std
;
cout
<<
"gap: "
<<
PG_max
-
PG_min
<<
endl
;
cout
<<
"active_size: "
<<
active_size
<<
endl
;
cout
<<
"iter: "
<<
iter
<<
endl
;
cout
<<
endl
;
}
if
(
PG_max
-
PG_min
<=
eps
)
{
// stop if we are within eps tolerance and the last iteration
// was over all the samples
if
(
active_size
==
index
.
size
())
break
;
// Turn off shrinking on the next iteration. We will stop if the
// tolerance is still <= eps when shrinking is off.
active_size
=
index
.
size
();
PG_max_prev
=
std
::
numeric_limits
<
double
>::
infinity
();
PG_min_prev
=
-
std
::
numeric_limits
<
double
>::
infinity
();
}
else
{
PG_max_prev
=
PG_max
;
PG_min_prev
=
PG_min
;
if
(
PG_max_prev
<=
0
)
PG_max_prev
=
std
::
numeric_limits
<
double
>::
infinity
();
if
(
PG_min_prev
>=
0
)
PG_min_prev
=
-
std
::
numeric_limits
<
double
>::
infinity
();
}
// recalculate wdoty every so often to avoid drift.
if
(
iter
%
100
==
0
)
wdoty
=
dlib
::
dot
(
Y
,
w
);
}
betas
.
set_size
(
alpha
.
size
()
/
2
);
for
(
long
i
=
0
;
i
<
betas
.
size
();
++
i
)
betas
(
i
)
=
lasso_budget
*
(
alpha
[
2
*
i
]
-
alpha
[
2
*
i
+
1
]);
betas
/=
sum
(
mat
(
alpha
));
return
betas
;
}
private
:
struct
en_sample2
{
// X location
long
r
;
double
label
;
double
xdotx
;
};
std
::
vector
<
en_sample2
>
samples
;
std
::
vector
<
double
>
alpha
;
double
ynorm
;
matrix
<
double
>
X
;
matrix
<
double
,
0
,
1
>
Y
;
matrix
<
double
,
0
,
1
>
xdoty
;
double
wdoty
;
double
wy_mult
;
// logically, the real w is what is in the w vector + wy_mult*Y
matrix
<
double
,
0
,
1
>
w
;
std
::
vector
<
long
>
index
;
unsigned
long
active_size
;
matrix
<
double
,
0
,
1
>
eig_vects_xdoty
;
matrix
<
double
,
0
,
1
>
eig_vals
;
matrix
<
double
>
eig_vects
;
matrix
<
double
>
u
;
dlib
::
rand
rnd
;
double
eps
;
unsigned
long
max_iterations
;
bool
verbose
;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_ElASTIC_NET_Hh_
dlib/optimization/elastic_net_abstract.h
0 → 100644
View file @
47fbaff0
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_ElASTIC_NET_ABSTRACT_Hh_
#ifdef DLIB_ElASTIC_NET_ABSTRACT_Hh_
#include "../matrix.h"
namespace
dlib
{
// ----------------------------------------------------------------------------------------
class
elastic_net
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool for solving the following optimization problem:
min_w: length_squared(X*w - Y) + ridge_lambda*length_squared(w)
such that: sum(abs(w)) <= lasso_budget
That is, it solves the elastic net optimization problem. This object also
has the special property that you can quickly obtain different solutions
for different settings of ridge_lambda, lasso_budget, and target Y values.
This is because a large amount of work is precomputed in the constructor.
The solver will also remember the previous solution and will use that to
warm start subsequent invocations. Therefore, you can efficiently get
solutions for a wide range of regularization parameters.
The particular algorithm used to solve it is described in the paper:
Zhou, Quan, et al. "A reduction of the elastic net to support vector
machines with an application to gpu computing." arXiv preprint
arXiv:1409.1976 (2014). APA
And for the SVM solver sub-component we use the algorithm from:
Hsieh, Cho-Jui, et al. "A dual coordinate descent method for large-scale
linear SVM." Proceedings of the 25th international conference on Machine
learning. ACM, 2008.
!*/
public
:
template
<
typename
EXP
>
explicit
elastic_net
(
const
matrix_exp
<
EXP
>&
X
);
/*!
requires
- X.size() != 0
ensures
- #get_epsilon() == 1e-5
- #get_max_iterations() == 50000
- this object will not be verbose unless be_verbose() is called
- #size() == X.nc()
- #have_target_values() == false
!*/
template
<
typename
EXP1
,
typename
EXP2
>
elastic_net
(
const
matrix_exp
<
EXP1
>&
X
,
const
matrix_exp
<
EXP2
>&
Y
);
/*!
requires
- X.size() != 0
- is_col_vector(Y)
- X.nc() == Y.size()
ensures
- constructs this object by calling the elastic_net(X) constructor and then
calling this->set_y(Y).
- #have_target_values() == true
!*/
long
size
(
)
const
;
/*!
ensures
- returns the number of samples loaded into this object.
!*/
bool
have_target_values
(
)
const
;
/*!
ensures
- returns true if set_y() has been called and false otherwise.
!*/
template
<
typename
EXP
>
void
set_y
(
const
matrix_exp
<
EXP
>&
Y
);
/*!
requires
- is_col_vector(Y)
- Y.size() == size()
ensures
- #have_target_values() == true
- Sets the target values, the Y variable in the objective function, to the
given Y.
!*/
void
set_epsilon
(
double
eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
double
get_epsilon
(
)
const
;
/*!
ensures
- returns the error epsilon that determines when the solver should stop.
Smaller values may result in a more accurate solution but take longer to
execute.
!*/
unsigned
long
get_max_iterations
(
)
const
;
/*!
ensures
- returns the maximum number of iterations the optimizer is allowed to run
before it is required to stop and return a result.
!*/
void
set_max_iterations
(
unsigned
long
max_iter
);
/*!
ensures
- #get_max_iterations() == max_iter
!*/
void
be_verbose
(
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void
be_quiet
(
);
/*!
ensures
- this object will not print anything to standard out.
!*/
matrix
<
double
,
0
,
1
>
operator
()
(
double
ridge_lambda
,
double
lasso_budget
=
std
::
numeric_limits
<
double
>::
infinity
()
);
/*!
requires
- have_target_values() == true
- ridge_lambda > 0
- lasso_budget > 0
ensures
- Solves the optimization problem described in the WHAT THIS OBJECT
REPRESENTS section above and returns the optimal w.
- if (lasso_budget == infinity) then
- The lasso constraint is ignored
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_ElASTIC_NET_ABSTRACT_Hh_
dlib/test/CMakeLists.txt
View file @
47fbaff0
...
@@ -156,6 +156,7 @@ if (COMPILER_CAN_DO_CPP_11)
...
@@ -156,6 +156,7 @@ if (COMPILER_CAN_DO_CPP_11)
dnn.cpp
dnn.cpp
cublas.cpp
cublas.cpp
find_optimal_parameters.cpp
find_optimal_parameters.cpp
elastic_net.cpp
)
)
endif
()
endif
()
...
...
dlib/test/elastic_net.cpp
0 → 100644
View file @
47fbaff0
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/optimization/elastic_net.h>
#include "tester.h"
#include <dlib/svm.h>
#include <dlib/rand.h>
#include <dlib/string.h>
#include <vector>
#include <sstream>
#include <ctime>
namespace
{
using
namespace
test
;
using
namespace
dlib
;
using
namespace
std
;
dlib
::
logger
dlog
(
"test.elastic_net"
);
// ----------------------------------------------------------------------------------------
matrix
<
double
,
0
,
1
>
basic_elastic_net
(
const
matrix
<
double
>&
X
,
const
matrix
<
double
,
0
,
1
>&
Y
,
double
ridge_lambda
,
double
lasso_budget
,
double
eps
)
{
DLIB_CASSERT
(
X
.
nc
()
==
Y
.
nr
(),
""
);
typedef
matrix
<
double
,
0
,
1
>
sample_type
;
typedef
linear_kernel
<
sample_type
>
kernel_type
;
svm_c_linear_dcd_trainer
<
kernel_type
>
trainer
;
trainer
.
solve_svm_l2_problem
(
true
);
const
double
C
=
1
/
(
2
*
ridge_lambda
);
trainer
.
set_c
(
C
);
trainer
.
set_epsilon
(
eps
);
trainer
.
enable_shrinking
(
true
);
trainer
.
include_bias
(
false
);
std
::
vector
<
sample_type
>
samples
;
std
::
vector
<
double
>
labels
;
for
(
long
r
=
0
;
r
<
X
.
nr
();
++
r
)
{
sample_type
temp
=
trans
(
rowm
(
X
,
r
));
const
double
xmul
=
(
1
/
lasso_budget
);
samples
.
push_back
(
temp
-
xmul
*
Y
);
labels
.
push_back
(
+
1
);
samples
.
push_back
(
temp
+
xmul
*
Y
);
labels
.
push_back
(
-
1
);
}
svm_c_linear_dcd_trainer
<
kernel_type
>::
optimizer_state
state
;
auto
df
=
trainer
.
train
(
samples
,
labels
,
state
);
auto
&&
alpha
=
state
.
get_alpha
();
matrix
<
double
,
0
,
1
>
betas
(
alpha
.
size
()
/
2
);
for
(
long
i
=
0
;
i
<
betas
.
size
();
++
i
)
betas
(
i
)
=
lasso_budget
*
(
alpha
[
2
*
i
]
-
alpha
[
2
*
i
+
1
]);
betas
/=
sum
(
mat
(
alpha
));
return
betas
;
}
// ----------------------------------------------------------------------------------------
class
test_elastic_net
:
public
tester
{
public
:
test_elastic_net
(
)
:
tester
(
"test_elastic_net"
,
"Run tests on the elastic_net object."
,
0
)
{
}
void
perform_test
(
)
{
matrix
<
double
>
w
=
{
1
,
2
,
0
,
4
,
0
,
0
,
0
,
0
,
0
,
6
,
7
,
8
,
0
,
9
,
0
};
matrix
<
double
>
X
=
randm
(
w
.
size
(),
1000
);
matrix
<
double
>
Y
=
trans
(
X
)
*
w
;
Y
+=
0.1
*
(
randm
(
Y
.
nr
(),
Y
.
nc
())
-
0.5
);
double
ridge_lambda
=
0.1
;
double
lasso_budget
=
sum
(
abs
(
w
));
double
eps
=
0.0000001
;
dlib
::
elastic_net
solver
(
X
,
Y
);
solver
.
set_epsilon
(
eps
);
matrix
<
double
,
0
,
1
>
results
;
matrix
<
double
,
0
,
1
>
results2
;
for
(
double
s
=
1.2
;
s
>
0.10
;
s
*=
0.9
)
{
print_spinner
();
dlog
<<
LINFO
<<
"s: "
<<
s
;
// make sure the two solvers agree.
results
=
basic_elastic_net
(
X
,
Y
,
ridge_lambda
,
lasso_budget
*
s
,
eps
);
results2
=
solver
(
ridge_lambda
,
lasso_budget
*
s
);
dlog
<<
LINFO
<<
"error: "
<<
max
(
abs
(
results
-
results2
));
DLIB_TEST
(
max
(
abs
(
results
-
results2
)
<
1e-3
));
}
}
}
a
;
// ----------------------------------------------------------------------------------------
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment