Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
de39ebdf
Commit
de39ebdf
authored
Feb 22, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added the option to use the elastic net regularizer to the OCA solver.
parent
2889e520
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
186 additions
and
42 deletions
+186
-42
optimization_oca.h
dlib/optimization/optimization_oca.h
+63
-14
optimization_oca_abstract.h
dlib/optimization/optimization_oca_abstract.h
+47
-0
optimization_solve_qp_using_smo.h
dlib/optimization/optimization_solve_qp_using_smo.h
+33
-8
optimization_solve_qp_using_smo_abstract.h
dlib/optimization/optimization_solve_qp_using_smo_abstract.h
+16
-6
oca.cpp
dlib/test/oca.cpp
+8
-0
opt_qp_solver.cpp
dlib/test/opt_qp_solver.cpp
+19
-14
No files found.
dlib/optimization/optimization_oca.h
View file @
de39ebdf
...
@@ -117,7 +117,21 @@ namespace dlib
...
@@ -117,7 +117,21 @@ namespace dlib
)
const
)
const
{
{
matrix_type
empty_prior
;
matrix_type
empty_prior
;
return
oca_impl
(
problem
,
w
,
empty_prior
,
false
,
num_nonnegative
,
force_weight_to_1
);
return
oca_impl
(
problem
,
w
,
empty_prior
,
false
,
num_nonnegative
,
force_weight_to_1
,
0
);
}
template
<
typename
matrix_type
>
typename
matrix_type
::
type
solve_with_elastic_net
(
const
oca_problem
<
matrix_type
>&
problem
,
matrix_type
&
w
,
double
lasso_lambda
,
unsigned
long
force_weight_to_1
=
std
::
numeric_limits
<
unsigned
long
>::
max
()
)
const
{
matrix_type
empty_prior
;
return
oca_impl
(
problem
,
w
,
empty_prior
,
false
,
0
,
force_weight_to_1
,
lasso_lambda
);
}
}
template
<
template
<
...
@@ -141,7 +155,7 @@ namespace dlib
...
@@ -141,7 +155,7 @@ namespace dlib
// disable the force weight to 1 option for this mode. We also disable the
// disable the force weight to 1 option for this mode. We also disable the
// non-negative constraints.
// non-negative constraints.
unsigned
long
force_weight_to_1
=
std
::
numeric_limits
<
unsigned
long
>::
max
();
unsigned
long
force_weight_to_1
=
std
::
numeric_limits
<
unsigned
long
>::
max
();
return
oca_impl
(
problem
,
w
,
prior
,
true
,
0
,
force_weight_to_1
);
return
oca_impl
(
problem
,
w
,
prior
,
true
,
0
,
force_weight_to_1
,
0
);
}
}
private
:
private
:
...
@@ -152,24 +166,37 @@ namespace dlib
...
@@ -152,24 +166,37 @@ namespace dlib
typename
matrix_type
::
type
oca_impl
(
typename
matrix_type
::
type
oca_impl
(
const
oca_problem
<
matrix_type
>&
problem
,
const
oca_problem
<
matrix_type
>&
problem
,
matrix_type
&
w
,
matrix_type
&
w
,
const
matrix_type
prior
,
const
matrix_type
&
prior
,
bool
have_prior
,
bool
have_prior
,
unsigned
long
num_nonnegative
,
unsigned
long
num_nonnegative
,
unsigned
long
force_weight_to_1
unsigned
long
force_weight_to_1
,
const
double
lasso_lambda
)
const
)
const
{
{
const
unsigned
long
num_dims
=
problem
.
get_num_dimensions
();
const
unsigned
long
num_dims
=
problem
.
get_num_dimensions
();
// make sure requires clause is not broken
// make sure requires clause is not broken
DLIB_ASSERT
(
problem
.
get_c
()
>
0
&&
DLIB_ASSERT
(
problem
.
get_c
()
>
0
&&
problem
.
get_num_dimensions
()
>
0
,
problem
.
get_num_dimensions
()
>
0
&&
0
<=
lasso_lambda
&&
lasso_lambda
<
1
,
"
\t
scalar_type oca::operator()"
"
\t
scalar_type oca::operator()"
<<
"
\n\t
The oca_problem is invalid"
<<
"
\n\t
The oca_problem is invalid"
<<
"
\n\t
problem.get_c(): "
<<
problem
.
get_c
()
<<
"
\n\t
problem.get_c(): "
<<
problem
.
get_c
()
<<
"
\n\t
problem.get_num_dimensions(): "
<<
num_dims
<<
"
\n\t
problem.get_num_dimensions(): "
<<
num_dims
<<
"
\n\t
lasso_lambda: "
<<
lasso_lambda
<<
"
\n\t
this: "
<<
this
<<
"
\n\t
this: "
<<
this
);
);
if
(
have_prior
)
{
DLIB_ASSERT
(
lasso_lambda
==
0
,
"Solver doesn't support using a prior with lasso."
);
DLIB_ASSERT
(
num_nonnegative
==
0
,
"Solver doesn't support using a prior with non-negative constraints."
);
}
else
if
(
lasso_lambda
!=
0
)
{
DLIB_ASSERT
(
num_nonnegative
==
0
,
"Solver doesn't support using lasso with non-negative constraints."
);
}
const
double
ridge_lambda
=
1
-
lasso_lambda
;
if
(
num_nonnegative
>
num_dims
)
if
(
num_nonnegative
>
num_dims
)
num_nonnegative
=
num_dims
;
num_nonnegative
=
num_dims
;
...
@@ -184,7 +211,7 @@ namespace dlib
...
@@ -184,7 +211,7 @@ namespace dlib
typename
sequence
<
vect_type
>::
kernel_2a
planes
;
typename
sequence
<
vect_type
>::
kernel_2a
planes
;
std
::
vector
<
scalar_type
>
bs
,
miss_count
;
std
::
vector
<
scalar_type
>
bs
,
miss_count
;
vect_type
new_plane
,
alpha
;
vect_type
new_plane
,
alpha
,
btemp
;
w
.
set_size
(
num_dims
,
1
);
w
.
set_size
(
num_dims
,
1
);
w
=
0
;
w
=
0
;
...
@@ -198,6 +225,12 @@ namespace dlib
...
@@ -198,6 +225,12 @@ namespace dlib
scalar_type
cp_obj
=
0
;
scalar_type
cp_obj
=
0
;
matrix
<
scalar_type
,
0
,
0
,
mem_manager_type
,
layout_type
>
K
,
Ktmp
;
matrix
<
scalar_type
,
0
,
0
,
mem_manager_type
,
layout_type
>
K
,
Ktmp
;
matrix
<
scalar_type
,
0
,
1
,
mem_manager_type
,
layout_type
>
lambda
,
d
;
if
(
lasso_lambda
!=
0
)
d
.
set_size
(
num_dims
);
else
d
.
set_size
(
num_nonnegative
);
d
=
lasso_lambda
*
ones_matrix
(
d
);
scalar_type
R_lower_bound
;
scalar_type
R_lower_bound
;
if
(
problem
.
risk_has_lower_bound
(
R_lower_bound
))
if
(
problem
.
risk_has_lower_bound
(
R_lower_bound
))
...
@@ -253,7 +286,7 @@ namespace dlib
...
@@ -253,7 +286,7 @@ namespace dlib
else
else
alpha
=
join_cols
(
alpha
,
zeros_matrix
<
scalar_type
>
(
1
,
1
));
alpha
=
join_cols
(
alpha
,
zeros_matrix
<
scalar_type
>
(
1
,
1
));
const
scalar_type
wnorm
=
0
.
5
*
trans
(
w
)
*
w
;
const
scalar_type
wnorm
=
0
.
5
*
ridge_lambda
*
trans
(
w
)
*
w
+
lasso_lambda
*
sum
(
abs
(
w
))
;
const
double
prior_part
=
have_prior
?
dot
(
w
,
prior
)
:
0
;
const
double
prior_part
=
have_prior
?
dot
(
w
,
prior
)
:
0
;
cur_obj
=
wnorm
+
C
*
cur_risk
+
prior_norm
-
prior_part
;
cur_obj
=
wnorm
+
C
*
cur_risk
+
prior_norm
-
prior_part
;
...
@@ -280,21 +313,36 @@ namespace dlib
...
@@ -280,21 +313,36 @@ namespace dlib
// solve the cutting plane subproblem for the next w. We solve it to an
// solve the cutting plane subproblem for the next w. We solve it to an
// accuracy that is related to how big the error gap is
// accuracy that is related to how big the error gap is. Also, we multiply
scalar_type
eps
=
std
::
min
<
scalar_type
>
(
sub_eps
,
0
.
1
*
(
cur_obj
-
cp_obj
))
;
// by ridge_lambda because the objective function for the QP we solve was
// implicitly scaled by ridge_lambda. That is, we want to ask the QP
// solver to solve the problem until the duality gap is 0.1 times smaller
// than what it is now. So the factor of ridge_lambda is necessary to make
// this happen.
scalar_type
eps
=
std
::
min
<
scalar_type
>
(
sub_eps
,
0
.
1
*
ridge_lambda
*
(
cur_obj
-
cp_obj
));
// just a sanity check
// just a sanity check
if
(
eps
<
1e-16
)
if
(
eps
<
1e-16
)
eps
=
1e-16
;
eps
=
1e-16
;
// Note that we warm start this optimization by using the alpha from the last
// Note that we warm start this optimization by using the alpha from the last
// iteration as the starting point.
// iteration as the starting point.
if
(
num_nonnegative
!=
0
)
if
(
lasso_lambda
!=
0
)
{
// copy planes into a matrix so we can call solve_qp4_using_smo()
matrix
<
scalar_type
,
0
,
0
,
mem_manager_type
,
layout_type
>
planes_mat
(
num_dims
,
planes
.
size
());
for
(
unsigned
long
i
=
0
;
i
<
planes
.
size
();
++
i
)
set_colm
(
planes_mat
,
i
)
=
planes
[
i
];
btemp
=
ridge_lambda
*
mat
(
bs
)
-
trans
(
planes_mat
)
*
d
;
solve_qp4_using_smo
(
planes_mat
,
K
,
btemp
,
d
,
alpha
,
lambda
,
eps
,
sub_max_iter
,
(
scalar_type
)(
2
*
lasso_lambda
));
}
else
if
(
num_nonnegative
!=
0
)
{
{
// copy planes into a matrix so we can call solve_qp4_using_smo()
// copy planes into a matrix so we can call solve_qp4_using_smo()
matrix
<
scalar_type
,
0
,
0
,
mem_manager_type
,
layout_type
>
planes_mat
(
num_nonnegative
,
planes
.
size
());
matrix
<
scalar_type
,
0
,
0
,
mem_manager_type
,
layout_type
>
planes_mat
(
num_nonnegative
,
planes
.
size
());
for
(
unsigned
long
i
=
0
;
i
<
planes
.
size
();
++
i
)
for
(
unsigned
long
i
=
0
;
i
<
planes
.
size
();
++
i
)
set_colm
(
planes_mat
,
i
)
=
colm
(
planes
[
i
],
0
,
num_nonnegative
);
set_colm
(
planes_mat
,
i
)
=
colm
(
planes
[
i
],
0
,
num_nonnegative
);
solve_qp4_using_smo
(
planes_mat
,
K
,
mat
(
bs
),
alph
a
,
eps
,
sub_max_iter
);
solve_qp4_using_smo
(
planes_mat
,
K
,
mat
(
bs
),
d
,
alpha
,
lambd
a
,
eps
,
sub_max_iter
);
}
}
else
else
{
{
...
@@ -305,8 +353,9 @@ namespace dlib
...
@@ -305,8 +353,9 @@ namespace dlib
w
=
-
alpha
(
0
)
*
planes
[
0
];
w
=
-
alpha
(
0
)
*
planes
[
0
];
for
(
unsigned
long
i
=
1
;
i
<
planes
.
size
();
++
i
)
for
(
unsigned
long
i
=
1
;
i
<
planes
.
size
();
++
i
)
w
-=
alpha
(
i
)
*
planes
[
i
];
w
-=
alpha
(
i
)
*
planes
[
i
];
// threshold the first num_nonnegative w elements if necessary.
if
(
lasso_lambda
!=
0
)
if
(
num_nonnegative
!=
0
)
w
=
(
lambda
-
d
+
w
)
/
ridge_lambda
;
else
if
(
num_nonnegative
!=
0
)
// threshold the first num_nonnegative w elements if necessary.
set_rowm
(
w
,
range
(
0
,
num_nonnegative
-
1
))
=
lowerbound
(
rowm
(
w
,
range
(
0
,
num_nonnegative
-
1
)),
0
);
set_rowm
(
w
,
range
(
0
,
num_nonnegative
-
1
))
=
lowerbound
(
rowm
(
w
,
range
(
0
,
num_nonnegative
-
1
)),
0
);
for
(
long
i
=
0
;
i
<
alpha
.
size
();
++
i
)
for
(
long
i
=
0
;
i
<
alpha
.
size
();
++
i
)
...
@@ -319,7 +368,7 @@ namespace dlib
...
@@ -319,7 +368,7 @@ namespace dlib
// Compute the lower bound on the true objective given to us by the cutting
// Compute the lower bound on the true objective given to us by the cutting
// plane subproblem.
// plane subproblem.
cp_obj
=
-
0
.
5
*
trans
(
w
)
*
w
+
trans
(
alpha
)
*
mat
(
bs
);
cp_obj
=
-
0
.
5
*
ridge_lambda
*
trans
(
w
)
*
w
+
trans
(
alpha
)
*
mat
(
bs
);
if
(
have_prior
)
if
(
have_prior
)
w
+=
prior
;
w
+=
prior
;
...
...
dlib/optimization/optimization_oca_abstract.h
View file @
de39ebdf
...
@@ -31,6 +31,13 @@ namespace dlib
...
@@ -31,6 +31,13 @@ namespace dlib
Where prior is a user supplied vector and R(w) has the same
Where prior is a user supplied vector and R(w) has the same
interpretation as above.
interpretation as above.
Or it can use the elastic net regularizer:
Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
Where lasso_lambda is a number in the range [0, 1) and controls
trade-off between doing L2 and L2 regularization. R(w) has the same
interpretation as above.
Note that the stopping condition must be provided by the user
Note that the stopping condition must be provided by the user
in the form of the optimization_status() function.
in the form of the optimization_status() function.
...
@@ -142,6 +149,13 @@ namespace dlib
...
@@ -142,6 +149,13 @@ namespace dlib
Where prior is a user supplied vector and R(w) has the same
Where prior is a user supplied vector and R(w) has the same
interpretation as above.
interpretation as above.
Or it can use the elastic net regularizer:
Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
Where lasso_lambda is a number in the range [0, 1) and controls
trade-off between doing L2 and L2 regularization. R(w) has the same
interpretation as above.
For a detailed discussion you should consult the following papers
For a detailed discussion you should consult the following papers
from the Journal of Machine Learning Research:
from the Journal of Machine Learning Research:
...
@@ -221,6 +235,39 @@ namespace dlib
...
@@ -221,6 +235,39 @@ namespace dlib
- returns the objective value at the solution #w
- returns the objective value at the solution #w
!*/
!*/
template
<
typename
matrix_type
>
typename
matrix_type
::
type
solve_with_elastic_net
(
const
oca_problem
<
matrix_type
>&
problem
,
matrix_type
&
w
,
scalar_type
lasso_lambda
,
unsigned
long
force_weight_to_1
=
std
::
numeric_limits
<
unsigned
long
>::
max
()
)
const
;
/*!
requires
- problem.get_c() > 0
- problem.get_num_dimensions() > 0
- 0 <= lasso_lambda < 1
ensures
- Solves the given oca problem and stores the solution in #w, but uses an
elastic net regularizer instead of the normal L2 regularizer. In
particular, this function solves:
Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
- The optimization algorithm runs until problem.optimization_status()
indicates it is time to stop.
- returns the objective value at the solution #w
- if (force_weight_to_1 < problem.get_num_dimensions()) then
- The optimizer enforces the following constraints:
- #w(force_weight_to_1) == 1
- for all i > force_weight_to_1:
- #w(i) == 0
- That is, the element in the weight vector at the index indicated
by force_weight_to_1 will have a value of 1 upon completion of
this function, while all subsequent elements of w will have
values of 0.
!*/
void
set_subproblem_epsilon
(
void
set_subproblem_epsilon
(
double
eps
double
eps
);
);
...
...
dlib/optimization/optimization_solve_qp_using_smo.h
View file @
de39ebdf
...
@@ -215,15 +215,20 @@ namespace dlib
...
@@ -215,15 +215,20 @@ namespace dlib
typename
EXP1
,
typename
EXP1
,
typename
EXP2
,
typename
EXP2
,
typename
EXP3
,
typename
EXP3
,
typename
T
,
long
NR
,
long
NC
,
typename
MM
,
typename
L
typename
EXP4
,
typename
T
,
long
NR
,
long
NC
,
typename
MM
,
typename
L
,
long
NR2
,
long
NC2
>
>
unsigned
long
solve_qp4_using_smo
(
unsigned
long
solve_qp4_using_smo
(
const
matrix_exp
<
EXP1
>&
A
,
const
matrix_exp
<
EXP1
>&
A
,
const
matrix_exp
<
EXP2
>&
Q
,
const
matrix_exp
<
EXP2
>&
Q
,
const
matrix_exp
<
EXP3
>&
b
,
const
matrix_exp
<
EXP3
>&
b
,
const
matrix_exp
<
EXP4
>&
d
,
matrix
<
T
,
NR
,
NC
,
MM
,
L
>&
alpha
,
matrix
<
T
,
NR
,
NC
,
MM
,
L
>&
alpha
,
matrix
<
T
,
NR2
,
NC2
,
MM
,
L
>&
lambda
,
T
eps
,
T
eps
,
unsigned
long
max_iter
unsigned
long
max_iter
,
T
max_lambda
=
std
::
numeric_limits
<
T
>::
infinity
()
)
)
{
{
// make sure requires clause is not broken
// make sure requires clause is not broken
...
@@ -251,6 +256,15 @@ namespace dlib
...
@@ -251,6 +256,15 @@ namespace dlib
<<
"
\n\t
eps: "
<<
eps
<<
"
\n\t
eps: "
<<
eps
<<
"
\n\t
max_iter: "
<<
max_iter
<<
"
\n\t
max_iter: "
<<
max_iter
);
);
DLIB_ASSERT
(
is_col_vector
(
d
)
==
true
&&
max_lambda
>=
0
&&
d
.
size
()
==
A
.
nr
(),
"
\t
void solve_qp4_using_smo()"
<<
"
\n\t
Invalid arguments were given to this function"
<<
"
\n\t
A.nr(): "
<<
A
.
nr
()
<<
"
\n\t
d.size(): "
<<
d
.
size
()
<<
"
\n\t
max_lambda: "
<<
max_lambda
);
const
T
C
=
sum
(
alpha
);
const
T
C
=
sum
(
alpha
);
...
@@ -263,9 +277,14 @@ namespace dlib
...
@@ -263,9 +277,14 @@ namespace dlib
solve_qp_using_smo() routine.
solve_qp_using_smo() routine.
*/
*/
const
bool
d_is_zero
=
d
==
zeros_matrix
(
d
);
// compute optimal lambda for current alpha
// compute optimal lambda for current alpha
matrix
<
T
,
NR
,
1
,
MM
,
L
>
lambda
=
A
*
alpha
;
if
(
d_is_zero
)
lambda
=
lowerbound
(
lambda
,
0
);
lambda
=
A
*
alpha
;
else
lambda
=
A
*
alpha
+
d
;
lambda
=
clamp
(
lambda
,
0
,
max_lambda
);
// Compute f'(alpha) (i.e. the gradient of f(alpha) with respect to alpha) for the current alpha.
// Compute f'(alpha) (i.e. the gradient of f(alpha) with respect to alpha) for the current alpha.
matrix
<
T
,
NR
,
NC
,
MM
,
L
>
df
=
Q
*
alpha
-
b
-
trans
(
A
)
*
lambda
;
matrix
<
T
,
NR
,
NC
,
MM
,
L
>
df
=
Q
*
alpha
-
b
-
trans
(
A
)
*
lambda
;
...
@@ -308,8 +327,11 @@ namespace dlib
...
@@ -308,8 +327,11 @@ namespace dlib
{
{
// compute optimal lambda and recheck the duality gap to make
// compute optimal lambda and recheck the duality gap to make
// sure we have really converged.
// sure we have really converged.
lambda
=
A
*
alpha
;
if
(
d_is_zero
)
lambda
=
lowerbound
(
lambda
,
0
);
lambda
=
A
*
alpha
;
else
lambda
=
A
*
alpha
+
d
;
lambda
=
clamp
(
lambda
,
0
,
max_lambda
);
df
=
Q
*
alpha
-
b
-
trans
(
A
)
*
lambda
;
df
=
Q
*
alpha
-
b
-
trans
(
A
)
*
lambda
;
if
(
trans
(
alpha
)
*
df
-
C
*
min
(
df
)
<
eps
)
if
(
trans
(
alpha
)
*
df
-
C
*
min
(
df
)
<
eps
)
...
@@ -347,8 +369,11 @@ namespace dlib
...
@@ -347,8 +369,11 @@ namespace dlib
if
((
iter
%
300
)
==
299
)
if
((
iter
%
300
)
==
299
)
{
{
// compute the optimal lambda for the current alpha
// compute the optimal lambda for the current alpha
lambda
=
A
*
alpha
;
if
(
d_is_zero
)
lambda
=
lowerbound
(
lambda
,
0
);
lambda
=
A
*
alpha
;
else
lambda
=
A
*
alpha
+
d
;
lambda
=
clamp
(
lambda
,
0
,
max_lambda
);
// Perform this form of the update every so often because doing so can help
// Perform this form of the update every so often because doing so can help
// avoid the buildup of numerical errors you get with the alternate update
// avoid the buildup of numerical errors you get with the alternate update
...
...
dlib/optimization/optimization_solve_qp_using_smo_abstract.h
View file @
de39ebdf
...
@@ -58,46 +58,56 @@ namespace dlib
...
@@ -58,46 +58,56 @@ namespace dlib
typename
EXP1
,
typename
EXP1
,
typename
EXP2
,
typename
EXP2
,
typename
EXP3
,
typename
EXP3
,
typename
T
,
long
NR
,
long
NC
,
typename
MM
,
typename
L
typename
T
,
long
NR
,
long
NC
,
typename
MM
,
typename
L
,
long
NR2
,
long
NC2
>
>
unsigned
long
solve_qp4_using_smo
(
unsigned
long
solve_qp4_using_smo
(
const
matrix_exp
<
EXP1
>&
A
,
const
matrix_exp
<
EXP1
>&
A
,
const
matrix_exp
<
EXP2
>&
Q
,
const
matrix_exp
<
EXP2
>&
Q
,
const
matrix_exp
<
EXP3
>&
b
,
const
matrix_exp
<
EXP3
>&
b
,
const
matrix_exp
<
EXP4
>&
d
,
matrix
<
T
,
NR
,
NC
,
MM
,
L
>&
alpha
,
matrix
<
T
,
NR
,
NC
,
MM
,
L
>&
alpha
,
matrix
<
T
,
NR2
,
NC2
,
MM
,
L
>&
lambda
,
T
eps
,
T
eps
,
unsigned
long
max_iter
unsigned
long
max_iter
,
T
max_lambda
=
std
::
numeric_limits
<
T
>::
infinity
()
);
);
/*!
/*!
requires
requires
- A.nc() == alpha.size()
- A.nc() == alpha.size()
- Q.nr() == Q.nc()
- Q.nr() == Q.nc()
- is_col_vector(b) == true
- is_col_vector(b) == true
- is_col_vector(d) == true
- is_col_vector(alpha) == true
- is_col_vector(alpha) == true
- b.size() == alpha.size() == Q.nr()
- b.size() == alpha.size() == Q.nr()
- d.size() == A.nr()
- alpha.size() > 0
- alpha.size() > 0
- min(alpha) >= 0
- min(alpha) >= 0
- eps > 0
- eps > 0
- max_iter > 0
- max_iter > 0
- max_lambda >= 0
ensures
ensures
- Let C == sum(alpha) (i.e. C is the sum of the alpha values you
- Let C == sum(alpha) (i.e. C is the sum of the alpha values you
supply to this function)
supply to this function)
- This function solves the following quadratic program:
- This function solves the following quadratic program:
Minimize: f(alpha,lambda) == 0.5*trans(alpha)*Q*alpha - trans(alpha)*b +
Minimize: f(alpha,lambda) == 0.5*trans(alpha)*Q*alpha - trans(alpha)*b +
0.5*trans(lambda)*lambda - trans(lambda)*A*alpha
0.5*trans(lambda)*lambda - trans(lambda)*A*alpha
- trans(lambda)*d
subject to the following constraints:
subject to the following constraints:
- sum(alpha) == C (i.e. the sum of alpha values doesn't change)
- sum(alpha) == C (i.e. the sum of alpha values doesn't change)
- min(alpha) >= 0 (i.e. all alpha values are nonnegative)
- min(alpha) >= 0 (i.e. all alpha values are nonnegative)
- min(lambda) >= 0 (i.e. all lambda values are nonnegative)
- min(lambda) >= 0 (i.e. all lambda values are nonnegative)
- max(lambda) <= max_lambda (i.e. all lambda values are less than max_lambda)
Where f is convex. This means that Q should be positive-semidefinite.
Where f is convex. This means that Q should be positive-semidefinite.
-
The solution to the above QP will be stored in #alpha. The optimal
-
If you don't want an upper limit on lambda then max_lambda can be set to
lambda is not output since its value is given by the following expression:
infinity.
lowerbound(A*alpha,0)
- The solution to the above QP will be stored in #alpha and #lambda.
- This function uses a simple implementation of the sequential minimal
- This function uses a simple implementation of the sequential minimal
optimization algorithm. It starts the algorithm with the given alpha
optimization algorithm. It starts the algorithm with the given alpha
and it works on the problem until the duality gap (i.e. how far away
and it works on the problem until the duality gap (i.e. how far away
we are from the optimum solution) is less than eps. So eps controls
we are from the optimum solution) is less than eps. So eps controls
how accurate the solution is and smaller values result in better solutions.
how accurate the solution is and smaller values result in better solutions.
The initial value of lambda is ignored since the optimal lambda can be
obtained via a simple closed form expression given alpha.
- At most max_iter iterations of optimization will be performed.
- At most max_iter iterations of optimization will be performed.
- returns the number of iterations performed. If this method fails to
- returns the number of iterations performed. If this method fails to
converge to eps accuracy then the number returned will be max_iter+1.
converge to eps accuracy then the number returned will be max_iter+1.
...
...
dlib/test/oca.cpp
View file @
de39ebdf
...
@@ -72,6 +72,14 @@ namespace
...
@@ -72,6 +72,14 @@ namespace
dlog
<<
LINFO
<<
"error: "
<<
max
(
abs
(
w
-
true_w
));
dlog
<<
LINFO
<<
"error: "
<<
max
(
abs
(
w
-
true_w
));
DLIB_TEST
(
max
(
abs
(
w
-
true_w
))
<
1e-10
);
DLIB_TEST
(
max
(
abs
(
w
-
true_w
))
<
1e-10
);
solver
.
solve_with_elastic_net
(
make_oca_problem_c_svm
<
w_type
>
(
2.0
,
3.0
,
mat
(
x
),
mat
(
y
),
false
,
1e-12
,
40
,
max_index_plus_one
(
x
)),
w
,
0.5
);
dlog
<<
LINFO
<<
trans
(
w
);
true_w
=
-
0.5
,
0.5
,
0
;
dlog
<<
LINFO
<<
"error: "
<<
max
(
abs
(
w
-
true_w
));
DLIB_TEST
(
max
(
abs
(
w
-
true_w
))
<
1e-10
);
print_spinner
();
w_type
prior
=
true_w
;
w_type
prior
=
true_w
;
solver
(
make_oca_problem_c_svm
<
w_type
>
(
20.0
,
30.0
,
mat
(
x
),
mat
(
y
),
false
,
1e-12
,
40
,
max_index_plus_one
(
x
)),
w
,
prior
);
solver
(
make_oca_problem_c_svm
<
w_type
>
(
20.0
,
30.0
,
mat
(
x
),
mat
(
y
),
false
,
1e-12
,
40
,
max_index_plus_one
(
x
)),
w
,
prior
);
dlog
<<
LINFO
<<
trans
(
w
);
dlog
<<
LINFO
<<
trans
(
w
);
...
...
dlib/test/opt_qp_solver.cpp
View file @
de39ebdf
...
@@ -99,10 +99,11 @@ namespace
...
@@ -99,10 +99,11 @@ namespace
const
double
C
=
2
;
const
double
C
=
2
;
matrix
<
double
,
0
,
1
>
alpha
(
2
),
true_alpha
(
2
);
matrix
<
double
,
0
,
1
>
alpha
(
2
),
true_alpha
(
2
)
,
d
(
3
),
lambda
;
alpha
=
C
/
2
,
C
/
2
;
alpha
=
C
/
2
,
C
/
2
;
d
=
0
;
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
alph
a
,
1e-9
,
800
);
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
d
,
alpha
,
lambd
a
,
1e-9
,
800
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
dlog
<<
LINFO
<<
"*******************************************************"
;
dlog
<<
LINFO
<<
"*******************************************************"
;
...
@@ -136,10 +137,11 @@ namespace
...
@@ -136,10 +137,11 @@ namespace
const
double
C
=
2
;
const
double
C
=
2
;
matrix
<
double
,
0
,
1
>
alpha
(
2
),
true_alpha
(
2
);
matrix
<
double
,
0
,
1
>
alpha
(
2
),
true_alpha
(
2
)
,
d
(
3
),
lambda
;
alpha
=
C
/
2
,
C
/
2
;
alpha
=
C
/
2
,
C
/
2
;
d
=
0
;
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
alph
a
,
1e-9
,
800
);
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
d
,
alpha
,
lambd
a
,
1e-9
,
800
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
dlog
<<
LINFO
<<
"*******************************************************"
;
dlog
<<
LINFO
<<
"*******************************************************"
;
...
@@ -173,10 +175,11 @@ namespace
...
@@ -173,10 +175,11 @@ namespace
const
double
C
=
2
;
const
double
C
=
2
;
matrix
<
double
,
0
,
1
>
alpha
(
2
),
true_alpha
(
2
);
matrix
<
double
,
0
,
1
>
alpha
(
2
),
true_alpha
(
2
)
,
d
(
3
),
lambda
;
alpha
=
C
/
2
,
C
/
2
;
alpha
=
C
/
2
,
C
/
2
;
d
=
0
;
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
alph
a
,
1e-9
,
800
);
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
d
,
alpha
,
lambd
a
,
1e-9
,
800
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
dlog
<<
LINFO
<<
"*******************************************************"
;
dlog
<<
LINFO
<<
"*******************************************************"
;
...
@@ -211,10 +214,11 @@ namespace
...
@@ -211,10 +214,11 @@ namespace
const
double
C
=
2
;
const
double
C
=
2
;
matrix
<
double
,
0
,
1
>
alpha
(
3
),
true_alpha
(
3
);
matrix
<
double
,
0
,
1
>
alpha
(
3
),
true_alpha
(
3
)
,
d
(
3
),
lambda
;
alpha
=
C
/
2
,
C
/
2
,
0
;
alpha
=
C
/
2
,
C
/
2
,
0
;
d
=
0
;
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
alph
a
,
1e-9
,
800
);
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
d
,
alpha
,
lambd
a
,
1e-9
,
800
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
...
@@ -249,10 +253,10 @@ namespace
...
@@ -249,10 +253,10 @@ namespace
const
double
C
=
2
;
const
double
C
=
2
;
matrix
<
double
,
0
,
1
>
alpha
(
2
),
true_alpha
(
2
);
matrix
<
double
,
0
,
1
>
alpha
(
2
),
true_alpha
(
2
)
,
d
(
3
),
lambda
;
alpha
=
C
/
2
,
C
/
2
;
alpha
=
C
/
2
,
C
/
2
;
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
alph
a
,
1e-9
,
800
);
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
d
,
alpha
,
lambd
a
,
1e-9
,
800
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
dlog
<<
LINFO
<<
"*******************************************************"
;
dlog
<<
LINFO
<<
"*******************************************************"
;
...
@@ -285,10 +289,10 @@ namespace
...
@@ -285,10 +289,10 @@ namespace
const
double
C
=
2
;
const
double
C
=
2
;
matrix
<
double
,
0
,
1
>
alpha
(
3
),
true_alpha
(
3
);
matrix
<
double
,
0
,
1
>
alpha
(
3
),
true_alpha
(
3
)
,
d
(
3
),
lambda
;
alpha
=
C
/
2
,
C
/
2
,
0
;
alpha
=
C
/
2
,
C
/
2
,
0
;
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
alph
a
,
1e-9
,
800
);
solve_qp4_using_smo
(
A
,
tmp
(
trans
(
A
)
*
A
),
b
,
d
,
alpha
,
lambd
a
,
1e-9
,
800
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
matrix
<
double
,
0
,
1
>
w
=
lowerbound
(
-
A
*
alpha
,
0
);
dlog
<<
LINFO
<<
"*******************************************************"
;
dlog
<<
LINFO
<<
"*******************************************************"
;
...
@@ -327,10 +331,11 @@ namespace
...
@@ -327,10 +331,11 @@ namespace
const
double
C
=
2
;
const
double
C
=
2
;
matrix
<
double
,
0
,
1
>
alpha
(
3
),
true_alpha
(
3
);
matrix
<
double
,
0
,
1
>
alpha
(
3
),
true_alpha
(
3
)
,
d
(
3
),
lambda
;
alpha
=
C
/
2
,
C
/
2
,
0
;
alpha
=
C
/
2
,
C
/
2
,
0
;
d
=
0
;
solve_qp4_using_smo
(
A
,
Q
,
b
,
alph
a
,
1e-9
,
800
);
solve_qp4_using_smo
(
A
,
Q
,
b
,
d
,
alpha
,
lambd
a
,
1e-9
,
800
);
dlog
<<
LINFO
<<
"*******************************************************"
;
dlog
<<
LINFO
<<
"*******************************************************"
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment