Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
52e35c31
Commit
52e35c31
authored
Dec 17, 2012
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Made this object properly warm-startable
parent
34a9e4f6
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
160 additions
and
78 deletions
+160
-78
svm_c_linear_dcd_trainer.h
dlib/svm/svm_c_linear_dcd_trainer.h
+160
-78
No files found.
dlib/svm/svm_c_linear_dcd_trainer.h
View file @
52e35c31
...
...
@@ -207,6 +207,128 @@ namespace dlib
Cneg
=
C
;
}
class
optimizer_state
{
friend
class
svm_c_linear_dcd_trainer
;
public
:
optimizer_state
()
:
did_init
(
false
)
{}
private
:
template
<
typename
in_sample_vector_type
>
void
init
(
const
in_sample_vector_type
&
x
,
bool
have_bias_
,
bool
last_weight_1_
)
{
const
long
new_dims
=
max_index_plus_one
(
x
);
long
new_idx
=
0
;
if
(
did_init
)
{
DLIB_CASSERT
(
have_bias_
==
have_bias
&&
last_weight_1_
==
last_weight_1
,
""
);
DLIB_CASSERT
(
new_dims
>=
dims
,
""
);
DLIB_CASSERT
(
x
.
size
()
>=
static_cast
<
long
>
(
alpha
.
size
()),
""
);
// make sure we amortize the cost of growing the alpha vector.
if
(
alpha
.
capacity
()
<
static_cast
<
unsigned
long
>
(
x
.
size
()))
alpha
.
reserve
(
x
.
size
()
*
2
);
new_idx
=
alpha
.
size
();
// Make sure alpha has the same length as x. So pad with extra zeros if
// necessary to make this happen.
alpha
.
resize
(
x
.
size
(),
0
);
if
(
new_dims
!=
dims
)
{
// The only valid way the dimensions can be different here is if
// you are using a sparse vector type. This is because we might
// have had training samples which just happened to not include all
// the features previously. Therefore, max_index_plus_one() would
// have given too low of a result. But for dense vectors it is
// definitely a user error if the dimensions don't match.
DLIB_CASSERT
(
is_matrix
<
sample_type
>::
value
==
false
,
""
);
// extend w by the right number of elements
if
(
have_bias
)
{
// Splice some zeros into the w vector so it will have the
// right length. Here we are being careful to move the bias
// weight to the end of the resulting vector.
w
=
join_cols
(
join_cols
(
colm
(
w
,
0
,
dims
),
zeros_matrix
<
scalar_type
>
(
1
,
new_dims
-
dims
)),
uniform_matrix
<
scalar_type
>
(
1
,
1
,
w
(
dims
))
);
}
else
{
// Just concatenate the right number of zeros.
w
=
join_cols
(
w
,
zeros_matrix
<
scalar_type
>
(
1
,
new_dims
-
dims
));
}
dims
=
new_dims
;
}
}
else
{
did_init
=
true
;
have_bias
=
have_bias_
;
last_weight_1
=
last_weight_1_
;
dims
=
new_dims
;
alpha
.
resize
(
x
.
size
());
index
.
reserve
(
x
.
size
());
Q
.
reserve
(
x
.
size
());
if
(
have_bias
)
w
.
set_size
(
dims
+
1
);
else
w
.
set_size
(
dims
);
w
=
0
;
}
for
(
long
i
=
new_idx
;
i
<
x
.
size
();
++
i
)
{
Q
.
push_back
(
dlib
::
dot
(
x
(
i
),
x
(
i
)));
if
(
have_bias
)
{
index
.
push_back
(
i
);
Q
.
back
()
+=
1
;
}
else
if
(
Q
.
back
()
!=
0
)
{
index
.
push_back
(
i
);
}
}
if
(
last_weight_1
)
w
(
dims
-
1
)
=
1
;
}
bool
did_init
;
bool
have_bias
;
bool
last_weight_1
;
std
::
vector
<
scalar_type
>
alpha
;
scalar_vector_type
w
;
std
::
vector
<
scalar_type
>
Q
;
std
::
vector
<
long
>
index
;
long
dims
;
dlib
::
rand
rnd
;
};
template
<
typename
in_sample_vector_type
,
typename
in_scalar_vector_type
...
...
@@ -216,9 +338,8 @@ namespace dlib
const
in_scalar_vector_type
&
y
)
const
{
scalar_vector_type
alpha
(
x
.
size
());
alpha
=
0
;
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
alpha
);
optimizer_state
state
;
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
state
);
}
template
<
...
...
@@ -228,24 +349,10 @@ namespace dlib
const
decision_function
<
kernel_type
>
train
(
const
in_sample_vector_type
&
x
,
const
in_scalar_vector_type
&
y
,
scalar_vector_type
&
alpha
optimizer_state
&
state
)
const
{
DLIB_CASSERT
(
static_cast
<
long
>
(
x
.
size
())
>=
alpha
.
size
(),
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y,alpha)"
<<
"
\n\t
invalid inputs were given to this function"
<<
"
\n\t
x.size(): "
<<
x
.
size
()
<<
"
\n\t
alpha.size(): "
<<
alpha
.
size
()
);
if
(
static_cast
<
long
>
(
x
.
size
())
>
alpha
.
size
())
{
// Make sure alpha has the same length as x. So pad with extra zeros if
// necessary to make this happen.
alpha
=
join_cols
(
alpha
,
zeros_matrix
<
scalar_type
>
(
1
,
x
.
size
()
-
alpha
.
size
()));
}
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
alpha
);
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
state
);
}
private
:
...
...
@@ -259,12 +366,9 @@ namespace dlib
const
decision_function
<
kernel_type
>
do_train
(
const
in_sample_vector_type
&
x
,
const
in_scalar_vector_type
&
y
,
scalar_vector_type
&
alpha
optimizer_state
&
state
)
const
{
// TODO, requires labels are all +1 or -1. But we don't have to see both
// types.
// make sure requires clause is not broken
DLIB_ASSERT
(
is_learning_problem
(
x
,
y
)
==
true
,
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y)"
...
...
@@ -273,50 +377,25 @@ namespace dlib
<<
"
\n\t
y.size(): "
<<
y
.
size
()
<<
"
\n\t
is_learning_problem(x,y): "
<<
is_learning_problem
(
x
,
y
)
);
const
long
dims
=
max_index_plus_one
(
x
);
// TODO, return an opaque object instead of alpha. Also, the object
// needs to verify that the trainer has the same settings from one
// call to the next.
std
::
vector
<
long
>
index
(
x
.
size
());
scalar_vector_type
Q
(
x
.
size
());
scalar_vector_type
w
;
if
(
have_bias
)
w
.
set_size
(
dims
+
1
);
else
w
.
set_size
(
dims
);
w
=
0
;
if
(
last_weight_1
)
w
(
dims
-
1
)
=
1
;
long
ii
=
0
;
for
(
long
i
=
0
;
i
<
alpha
.
size
();
++
i
)
#if ENABLE_ASSERTS
for
(
long
i
=
0
;
i
<
x
.
size
();
++
i
)
{
index
[
ii
]
=
i
;
Q
(
ii
)
=
dlib
::
dot
(
x
(
i
),
x
(
i
));
if
(
have_bias
)
{
Q
(
ii
)
+=
1
;
++
ii
;
}
else
if
(
Q
(
ii
)
!=
0
)
{
++
ii
;
}
DLIB_ASSERT
(
y
(
i
)
==
+
1
||
y
(
i
)
==
-
1
,
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y)"
<<
"
\n\t
invalid inputs were given to this function"
<<
"
\n\t
y("
<<
i
<<
"): "
<<
y
(
i
)
);
}
#endif
// What we are doing here is ignoring x elements that have 0 norm. We
// Do this because they are impossible to classify and this also avoids
// a division by zero problem later on in the code.
const
long
max_possible_active
=
ii
;
state
.
init
(
x
,
have_bias
,
last_weight_1
);
dlib
::
rand
rnd
;
long
active_size
=
max_possible_active
;
std
::
vector
<
scalar_type
>&
alpha
=
state
.
alpha
;
scalar_vector_type
&
w
=
state
.
w
;
std
::
vector
<
long
>&
index
=
state
.
index
;
const
long
dims
=
state
.
dims
;
unsigned
long
active_size
=
index
.
size
();
scalar_type
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
scalar_type
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
...
...
@@ -328,15 +407,15 @@ namespace dlib
scalar_type
PG_min
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
// randomly shuffle the indices
for
(
long
i
=
0
;
i
<
active_size
;
++
i
)
for
(
unsigned
long
i
=
0
;
i
<
active_size
;
++
i
)
{
// pick a random index >= i
const
long
j
=
i
+
rnd
.
get_random_32bit_number
()
%
(
active_size
-
i
);
const
long
j
=
i
+
state
.
rnd
.
get_random_32bit_number
()
%
(
active_size
-
i
);
std
::
swap
(
index
[
i
],
index
[
j
]);
}
// for all the active training samples
for
(
long
ii
=
0
;
ii
<
active_size
;
++
ii
)
for
(
unsigned
long
ii
=
0
;
ii
<
active_size
;
++
ii
)
{
const
long
i
=
index
[
ii
];
...
...
@@ -344,7 +423,7 @@ namespace dlib
const
scalar_type
C
=
(
y
(
i
)
>
0
)
?
Cpos
:
Cneg
;
scalar_type
PG
=
0
;
if
(
alpha
(
i
)
==
0
)
if
(
alpha
[
i
]
==
0
)
{
if
(
G
>
PG_max_prev
)
{
...
...
@@ -358,7 +437,7 @@ namespace dlib
if
(
G
<
0
)
PG
=
G
;
}
else
if
(
alpha
(
i
)
==
C
)
else
if
(
alpha
[
i
]
==
C
)
{
if
(
G
<
PG_min_prev
)
{
...
...
@@ -385,9 +464,9 @@ namespace dlib
// if PG != 0
if
(
std
::
abs
(
PG
)
>
1e-12
)
{
const
scalar_type
alpha_old
=
alpha
(
i
)
;
alpha
(
i
)
=
std
::
min
(
std
::
max
(
alpha
(
i
)
-
G
/
Q
(
i
)
,
(
scalar_type
)
0
.
0
),
C
);
const
scalar_type
delta
=
(
alpha
(
i
)
-
alpha_old
)
*
y
(
i
);
const
scalar_type
alpha_old
=
alpha
[
i
]
;
alpha
[
i
]
=
std
::
min
(
std
::
max
(
alpha
[
i
]
-
G
/
state
.
Q
[
i
]
,
(
scalar_type
)
0
.
0
),
C
);
const
scalar_type
delta
=
(
alpha
[
i
]
-
alpha_old
)
*
y
(
i
);
add_to
(
w
,
x
(
i
),
delta
);
if
(
have_bias
)
w
(
w
.
size
()
-
1
)
-=
delta
;
...
...
@@ -411,12 +490,12 @@ namespace dlib
{
// stop if we are within eps tolerance and the last iteration
// was over all the samples
if
(
active_size
==
max_possible_active
)
if
(
active_size
==
index
.
size
()
)
break
;
// Turn of shrinking on the next iteration. We will stop if the
// tolerance is still <= eps when shrinking is off.
active_size
=
max_possible_active
;
active_size
=
index
.
size
()
;
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
}
...
...
@@ -429,7 +508,11 @@ namespace dlib
if
(
PG_min_prev
>=
0
)
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
}
}
}
// end of main optimization loop
// put the solution into a decision function and then return it
decision_function
<
kernel_type
>
df
;
...
...
@@ -439,10 +522,9 @@ namespace dlib
df
.
b
=
0
;
df
.
basis_vectors
.
set_size
(
1
);
// Copy the plane normal into the output basis vector. The output vector might be a
// sparse vector container so we need to use this special kind of copy to handle that case.
// As an aside, the reason for using max_index_plus_one() and not just w.size()-1 is because
// doing it this way avoids an inane warning from gcc that can occur in some cases.
// Copy the plane normal into the output basis vector. The output vector might
// be a sparse vector container so we need to use this special kind of copy to
// handle that case.
assign
(
df
.
basis_vectors
(
0
),
colm
(
w
,
0
,
dims
));
df
.
alpha
.
set_size
(
1
);
df
.
alpha
(
0
)
=
1
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment