Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
50fe106d
Commit
50fe106d
authored
Dec 16, 2012
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added initial implementation of the svm_c_linear_dcd_trainer.
parent
afdb28f0
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
479 additions
and
0 deletions
+479
-0
svm_c_linear_dcd_trainer.h
dlib/svm/svm_c_linear_dcd_trainer.h
+479
-0
svm_c_linear_dcd_trainer_abstract.h
dlib/svm/svm_c_linear_dcd_trainer_abstract.h
+0
-0
No files found.
dlib/svm/svm_c_linear_dcd_trainer.h
0 → 100644
View file @
50fe106d
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SVm_C_LINEAR_DCD_TRAINER_H__
#define DLIB_SVm_C_LINEAR_DCD_TRAINER_H__
#include "svm_c_linear_dcd_trainer_abstract.h"
#include <cmath>
#include <limits>
#include "../matrix.h"
#include "../algs.h"
#include "../rand.h"
#include "function.h"
#include "kernel.h"
namespace
dlib
{
// ----------------------------------------------------------------------------------------
template
<
typename
K
>
class
svm_c_linear_dcd_trainer
{
public
:
typedef
K
kernel_type
;
typedef
typename
kernel_type
::
scalar_type
scalar_type
;
typedef
typename
kernel_type
::
sample_type
sample_type
;
typedef
typename
kernel_type
::
mem_manager_type
mem_manager_type
;
typedef
decision_function
<
kernel_type
>
trained_function_type
;
typedef
typename
decision_function
<
K
>::
sample_vector_type
sample_vector_type
;
typedef
typename
decision_function
<
K
>::
scalar_vector_type
scalar_vector_type
;
// You are getting a compiler error on this line because you supplied a non-linear
// kernel to the svm_c_linear_dcd_trainer object. You have to use one of the
// linear kernels with this trainer.
COMPILE_TIME_ASSERT
((
is_same_type
<
K
,
linear_kernel
<
sample_type
>
>::
value
||
is_same_type
<
K
,
sparse_linear_kernel
<
sample_type
>
>::
value
));
svm_c_linear_dcd_trainer
(
)
:
Cpos
(
1
),
Cneg
(
1
),
eps
(
0
.
1
),
max_iterations
(
10000
),
verbose
(
false
),
have_bias
(
true
),
last_weight_1
(
false
)
{
}
explicit
svm_c_linear_dcd_trainer
(
const
scalar_type
&
C_
)
:
Cpos
(
C_
),
Cneg
(
C_
),
eps
(
0
.
1
),
max_iterations
(
10000
),
verbose
(
false
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
0
<
C_
,
"
\t
svm_c_trainer::svm_c_linear_dcd_trainer(kernel,C)"
<<
"
\n\t
invalid inputs were given to this function"
<<
"
\n\t
C_: "
<<
C_
);
}
bool
includes_bias
(
)
const
{
return
have_bias
;
}
void
include_bias
(
bool
should_have_bias
)
{
have_bias
=
should_have_bias
;
}
bool
forces_last_weight_to_1
(
)
const
{
return
last_weight_1
;
}
void
force_last_weight_to_1
(
bool
should_last_weight_be_1
)
{
last_weight_1
=
should_last_weight_be_1
;
}
void
be_verbose
(
)
{
verbose
=
true
;
}
void
be_quiet
(
)
{
verbose
=
false
;
}
void
set_epsilon
(
scalar_type
eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
eps_
>
0
,
"
\t
void svm_c_linear_dcd_trainer::set_epsilon(eps_)"
<<
"
\n\t
invalid inputs were given to this function"
<<
"
\n\t
eps_: "
<<
eps_
);
eps
=
eps_
;
}
const
scalar_type
get_epsilon
(
)
const
{
return
eps
;
}
const
kernel_type
&
get_kernel
(
)
const
{
return
kernel_type
();
}
unsigned
long
get_max_iterations
(
)
const
{
return
max_iterations
;
}
void
set_max_iterations
(
unsigned
long
max_iter
)
{
max_iterations
=
max_iter
;
}
void
set_c
(
scalar_type
C
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
C
>
0
,
"
\t
void svm_c_linear_dcd_trainer::set_c()"
<<
"
\n\t
C must be greater than 0"
<<
"
\n\t
C: "
<<
C
<<
"
\n\t
this: "
<<
this
);
Cpos
=
C
;
Cneg
=
C
;
}
const
scalar_type
get_c_class1
(
)
const
{
return
Cpos
;
}
const
scalar_type
get_c_class2
(
)
const
{
return
Cneg
;
}
void
set_c_class1
(
scalar_type
C
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
C
>
0
,
"
\t
void svm_c_linear_dcd_trainer::set_c_class1()"
<<
"
\n\t
C must be greater than 0"
<<
"
\n\t
C: "
<<
C
<<
"
\n\t
this: "
<<
this
);
Cpos
=
C
;
}
void
set_c_class2
(
scalar_type
C
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
C
>
0
,
"
\t
void svm_c_linear_dcd_trainer::set_c_class2()"
<<
"
\n\t
C must be greater than 0"
<<
"
\n\t
C: "
<<
C
<<
"
\n\t
this: "
<<
this
);
Cneg
=
C
;
}
template
<
typename
in_sample_vector_type
,
typename
in_scalar_vector_type
>
const
decision_function
<
kernel_type
>
train
(
const
in_sample_vector_type
&
x
,
const
in_scalar_vector_type
&
y
)
const
{
scalar_vector_type
alpha
(
x
.
size
());
alpha
=
0
;
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
alpha
);
}
template
<
typename
in_sample_vector_type
,
typename
in_scalar_vector_type
>
const
decision_function
<
kernel_type
>
train
(
const
in_sample_vector_type
&
x
,
const
in_scalar_vector_type
&
y
,
scalar_vector_type
&
alpha
)
const
{
DLIB_CASSERT
(
static_cast
<
long
>
(
x
.
size
())
>=
alpha
.
size
(),
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y,alpha)"
<<
"
\n\t
invalid inputs were given to this function"
<<
"
\n\t
x.size(): "
<<
x
.
size
()
<<
"
\n\t
alpha.size(): "
<<
alpha
.
size
()
);
if
(
static_cast
<
long
>
(
x
.
size
())
>
alpha
.
size
())
{
// Make sure alpha has the same length as x. So pad with extra zeros if
// necessary to make this happen.
alpha
=
join_cols
(
alpha
,
zeros_matrix
<
scalar_type
>
(
1
,
x
.
size
()
-
alpha
.
size
()));
}
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
alpha
);
}
private
:
// ------------------------------------------------------------------------------------
template
<
typename
in_sample_vector_type
,
typename
in_scalar_vector_type
>
const
decision_function
<
kernel_type
>
do_train
(
const
in_sample_vector_type
&
x
,
const
in_scalar_vector_type
&
y
,
scalar_vector_type
&
alpha
)
const
{
// TODO, requires labels are all +1 or -1. But we don't have to see both
// types.
// make sure requires clause is not broken
DLIB_ASSERT
(
is_learning_problem
(
x
,
y
)
==
true
,
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y)"
<<
"
\n\t
invalid inputs were given to this function"
<<
"
\n\t
x.size(): "
<<
x
.
size
()
<<
"
\n\t
y.size(): "
<<
y
.
size
()
<<
"
\n\t
is_learning_problem(x,y): "
<<
is_learning_problem
(
x
,
y
)
);
const
long
dims
=
max_index_plus_one
(
x
);
// TODO, return an opaque object instead of alpha. Also, the object
// needs to verify that the trainer has the same settings from one
// call to the next.
std
::
vector
<
long
>
index
(
x
.
size
());
scalar_vector_type
Q
(
x
.
size
());
scalar_vector_type
w
;
if
(
have_bias
)
w
.
set_size
(
dims
+
1
);
else
w
.
set_size
(
dims
);
w
=
0
;
if
(
last_weight_1
)
w
(
dims
-
1
)
=
1
;
long
ii
=
0
;
for
(
long
i
=
0
;
i
<
alpha
.
size
();
++
i
)
{
index
[
ii
]
=
i
;
Q
(
ii
)
=
dlib
::
dot
(
x
(
i
),
x
(
i
));
if
(
have_bias
)
{
Q
(
ii
)
+=
1
;
++
ii
;
}
else
if
(
Q
(
ii
)
!=
0
)
{
++
ii
;
}
}
// What we are doing here is ignoring x elements that have 0 norm. We
// Do this because they are impossible to classify and this also avoids
// a division by zero problem later on in the code.
const
long
max_possible_active
=
ii
;
dlib
::
rand
rnd
;
long
active_size
=
max_possible_active
;
scalar_type
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
scalar_type
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
// main loop
for
(
unsigned
long
iter
=
0
;
iter
<
max_iterations
;
++
iter
)
{
scalar_type
PG_max
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
scalar_type
PG_min
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
// randomly shuffle the indices
for
(
long
i
=
0
;
i
<
active_size
;
++
i
)
{
// pick a random index >= i
const
long
j
=
i
+
rnd
.
get_random_32bit_number
()
%
(
active_size
-
i
);
std
::
swap
(
index
[
i
],
index
[
j
]);
}
// for all the active training samples
for
(
long
ii
=
0
;
ii
<
active_size
;
++
ii
)
{
const
long
i
=
index
[
ii
];
const
scalar_type
G
=
y
(
i
)
*
dot
(
w
,
x
(
i
))
-
1
;
const
scalar_type
C
=
(
y
(
i
)
>
0
)
?
Cpos
:
Cneg
;
scalar_type
PG
=
0
;
if
(
alpha
(
i
)
==
0
)
{
if
(
G
>
PG_max_prev
)
{
// shrink the active set of training examples
--
active_size
;
std
::
swap
(
index
[
ii
],
index
[
active_size
]);
--
ii
;
continue
;
}
if
(
G
<
0
)
PG
=
G
;
}
else
if
(
alpha
(
i
)
==
C
)
{
if
(
G
<
PG_min_prev
)
{
// shrink the active set of training examples
--
active_size
;
std
::
swap
(
index
[
ii
],
index
[
active_size
]);
--
ii
;
continue
;
}
if
(
G
>
0
)
PG
=
G
;
}
else
{
PG
=
G
;
}
if
(
PG
>
PG_max
)
PG_max
=
PG
;
if
(
PG
<
PG_min
)
PG_min
=
PG
;
// if PG != 0
if
(
std
::
abs
(
PG
)
>
1e-12
)
{
const
scalar_type
alpha_old
=
alpha
(
i
);
alpha
(
i
)
=
std
::
min
(
std
::
max
(
alpha
(
i
)
-
G
/
Q
(
i
),
(
scalar_type
)
0
.
0
),
C
);
const
scalar_type
delta
=
(
alpha
(
i
)
-
alpha_old
)
*
y
(
i
);
add_to
(
w
,
x
(
i
),
delta
);
if
(
have_bias
)
w
(
w
.
size
()
-
1
)
-=
delta
;
if
(
last_weight_1
)
w
(
dims
-
1
)
=
1
;
}
}
if
(
verbose
)
{
using
namespace
std
;
cout
<<
"gap: "
<<
PG_max
-
PG_min
<<
endl
;
cout
<<
"active_size: "
<<
active_size
<<
endl
;
cout
<<
"iter: "
<<
iter
<<
endl
;
cout
<<
endl
;
}
if
(
PG_max
-
PG_min
<=
eps
)
{
// stop if we are within eps tolerance and the last iteration
// was over all the samples
if
(
active_size
==
max_possible_active
)
break
;
// Turn of shrinking on the next iteration. We will stop if the
// tolerance is still <= eps when shrinking is off.
active_size
=
max_possible_active
;
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
}
else
{
PG_max_prev
=
PG_max
;
PG_min_prev
=
PG_min
;
if
(
PG_max_prev
<=
0
)
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
if
(
PG_min_prev
>=
0
)
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
}
}
// put the solution into a decision function and then return it
decision_function
<
kernel_type
>
df
;
if
(
have_bias
)
df
.
b
=
w
(
w
.
size
()
-
1
);
else
df
.
b
=
0
;
df
.
basis_vectors
.
set_size
(
1
);
// Copy the plane normal into the output basis vector. The output vector might be a
// sparse vector container so we need to use this special kind of copy to handle that case.
// As an aside, the reason for using max_index_plus_one() and not just w.size()-1 is because
// doing it this way avoids an inane warning from gcc that can occur in some cases.
assign
(
df
.
basis_vectors
(
0
),
colm
(
w
,
0
,
dims
));
df
.
alpha
.
set_size
(
1
);
df
.
alpha
(
0
)
=
1
;
return
df
;
}
scalar_type
dot
(
const
scalar_vector_type
&
w
,
const
sample_type
&
sample
)
const
{
if
(
have_bias
)
{
const
long
w_size_m1
=
w
.
size
()
-
1
;
return
dlib
::
dot
(
colm
(
w
,
0
,
w_size_m1
),
sample
)
-
w
(
w_size_m1
);
}
else
{
return
dlib
::
dot
(
w
,
sample
);
}
}
// ------------------------------------------------------------------------------------
scalar_type
Cpos
;
scalar_type
Cneg
;
scalar_type
eps
;
unsigned
long
max_iterations
;
bool
verbose
;
bool
have_bias
;
// having a bias means we pretend all x vectors have an extra element which is always -1.
bool
last_weight_1
;
};
// end of class svm_c_linear_dcd_trainer
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SVm_C_LINEAR_DCD_TRAINER_H__
dlib/svm/svm_c_linear_dcd_trainer_abstract.h
0 → 100644
View file @
50fe106d
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment