Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
168574bd
Commit
168574bd
authored
Apr 19, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added visit_layer_parameter_gradients() and also fixed a silly synchronization
error in the multi-gpu training code.
parent
d31723ff
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
5 deletions
+138
-5
core.h
dlib/dnn/core.h
+67
-0
core_abstract.h
dlib/dnn/core_abstract.h
+34
-0
trainer.h
dlib/dnn/trainer.h
+37
-5
No files found.
dlib/dnn/core.h
View file @
168574bd
...
@@ -3049,6 +3049,73 @@ namespace dlib
...
@@ -3049,6 +3049,73 @@ namespace dlib
impl
::
vlp_loop
<
0
,
net_type
::
num_layers
>::
visit
(
comp_i
,
net
,
v
);
impl
::
vlp_loop
<
0
,
net_type
::
num_layers
>::
visit
(
comp_i
,
net
,
v
);
}
}
// ----------------------------------------------------------------------------------------
namespace
impl
{
template
<
size_t
i
,
size_t
num
>
struct
vlpg_loop
{
template
<
typename
T
,
typename
U
>
static
typename
std
::
enable_if
<!
is_add_layer
<
U
>::
value
>::
type
invoke_functor
(
T
&&
,
size_t
&
,
U
&&
)
{
// intentionally left empty
}
template
<
typename
T
,
typename
U
>
static
typename
std
::
enable_if
<
is_add_layer
<
U
>::
value
>::
type
invoke_functor
(
T
&&
v
,
size_t
&
comp_i
,
U
&&
l
)
{
v
(
comp_i
,
l
.
get_parameter_gradient
());
++
comp_i
;
}
template
<
typename
net_type
,
typename
visitor
>
static
void
visit
(
size_t
comp_i
,
net_type
&
net
,
visitor
&&
v
)
{
invoke_functor
(
v
,
comp_i
,
layer
<
i
>
(
net
));
vlpg_loop
<
i
+
1
,
num
>::
visit
(
comp_i
,
net
,
v
);
}
};
template
<
size_t
num
>
struct
vlpg_loop
<
num
,
num
>
{
template
<
typename
net_type
,
typename
visitor
>
static
void
visit
(
size_t
,
net_type
&
,
visitor
&&
)
{
// Base case of recursion. Don't do anything.
}
};
}
template
<
typename
net_type
,
typename
visitor
>
void
visit_layer_parameter_gradients
(
net_type
&
net
,
visitor
v
)
{
size_t
comp_i
=
0
;
impl
::
vlpg_loop
<
0
,
net_type
::
num_layers
>::
visit
(
comp_i
,
net
,
v
);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
}
}
...
...
dlib/dnn/core_abstract.h
View file @
168574bd
...
@@ -1348,6 +1348,40 @@ namespace dlib
...
@@ -1348,6 +1348,40 @@ namespace dlib
- When v() is called, the first argument is always < net_type::num_computational_layers.
- When v() is called, the first argument is always < net_type::num_computational_layers.
!*/
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
net_type
,
typename
visitor
>
void
visit_layer_parameter_gradients
(
net_type
&
net
,
visitor
v
);
/*!
requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, tensor& t)
ensures
- Loops over all the computational layers (i.e. layers with parameters, as
opposed to loss, tag, or input layers) in net and passes their parameter
gradients to v(). To be specific, this function essentially performs the
following:
size_t computational_layer_idx = 0;
for (size_t i = 0; i < net_type::num_layers; ++i)
{
if (layer<i>(net) is a computational layer)
{
v(computational_layer_idx, layer<i>(net).get_parameter_gradient());
++computational_layer_idx;
}
}
- When v() is called, the first argument is always < net_type::num_computational_layers.
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
struct
layer_test_results
struct
layer_test_results
...
...
dlib/dnn/trainer.h
View file @
168574bd
...
@@ -501,9 +501,12 @@ namespace dlib
...
@@ -501,9 +501,12 @@ namespace dlib
std
::
vector
<
std
::
future
<
double
>>
losses
(
devices
.
size
());
std
::
vector
<
std
::
future
<
double
>>
losses
(
devices
.
size
());
std
::
vector
<
std
::
future
<
void
>>
update_futs
(
devices
.
size
());
std
::
vector
<
std
::
future
<
void
>>
update_futs
(
devices
.
size
());
std
::
vector
<
matrix
<
float
>>
param_buffer
(
net_type
::
num_computational_layers
);
std
::
vector
<
matrix
<
float
>>
param_buffer
(
net_type
::
num_computational_layers
);
std
::
vector
<
matrix
<
float
>>
param_grad_buffer
(
net_type
::
num_computational_layers
);
size_t
iteration
=
0
;
while
(
job_pipe
.
dequeue
(
next_job
))
while
(
job_pipe
.
dequeue
(
next_job
))
{
{
++
iteration
;
// Call compute_parameter_gradients() and update_parameters() but pick the
// Call compute_parameter_gradients() and update_parameters() but pick the
// right version for unsupervised or supervised training based on the type
// right version for unsupervised or supervised training based on the type
// of label_type.
// of label_type.
...
@@ -517,28 +520,57 @@ namespace dlib
...
@@ -517,28 +520,57 @@ namespace dlib
// gradient updates between devices. So we do that now.
// gradient updates between devices. So we do that now.
if
(
devices
.
size
()
>
1
)
if
(
devices
.
size
()
>
1
)
{
{
for
(
auto
&&
p
:
param_buffer
)
for
(
auto
&&
p
:
param_
grad_
buffer
)
p
=
0
;
p
=
0
;
// now average all the parameter gradients
// now average all the parameter gradients
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
{
visit_layer_parameter
s
(
devices
[
i
]
->
net
,
[
&
param
_buffer
](
size_t
j
,
tensor
&
t
)
{
visit_layer_parameter
_gradients
(
devices
[
i
]
->
net
,
[
&
param_grad
_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
if
(
t
.
size
()
!=
0
)
param
_buffer
[
j
]
+=
mat
(
t
);
param_grad
_buffer
[
j
]
+=
mat
(
t
);
});
});
}
}
// and then assign the parameter gradients back to all the networks
// and then assign the parameter gradients back to all the networks
const
float
scale
=
1
.
0
f
/
devices
.
size
();
const
float
scale
=
1
.
0
f
/
devices
.
size
();
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
{
visit_layer_parameter
s
(
devices
[
i
]
->
net
,
[
scale
,
&
param
_buffer
](
size_t
j
,
tensor
&
t
)
{
visit_layer_parameter
_gradients
(
devices
[
i
]
->
net
,
[
scale
,
&
param_grad
_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
if
(
t
.
size
()
!=
0
)
{
{
t
=
param_buffer
[
j
]
*
scale
;
t
=
param_
grad_
buffer
[
j
]
*
scale
;
t
.
async_copy_to_device
();
t
.
async_copy_to_device
();
}
}
});
});
}
}
// Evey now and then force all the parameters to be the same just to
// make sure they aren't drifting apart due to any non-deterministic
// behavior on the GPU.
if
(
iteration
%
5000
==
1
)
{
for
(
auto
&&
p
:
param_buffer
)
p
=
0
;
// now average all the parameters
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
visit_layer_parameters
(
devices
[
i
]
->
net
,
[
&
param_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
param_buffer
[
j
]
+=
mat
(
t
);
});
}
// and then assign the parameters back to all the networks.
const
float
scale
=
1
.
0
f
/
devices
.
size
();
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
visit_layer_parameters
(
devices
[
i
]
->
net
,
[
scale
,
&
param_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
{
t
=
param_buffer
[
j
]
*
scale
;
t
.
async_copy_to_device
();
}
});
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment