Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
c1433b3d
Commit
c1433b3d
authored
Nov 13, 2015
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upgrade the layer interface so that you can implement layers that operate
in-place.
parent
69490292
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
506 additions
and
64 deletions
+506
-64
core.h
dlib/dnn/core.h
+411
-41
core_abstract.h
dlib/dnn/core_abstract.h
+1
-0
layers.h
dlib/dnn/layers.h
+13
-11
layers_abstract.h
dlib/dnn/layers_abstract.h
+81
-12
No files found.
dlib/dnn/core.h
View file @
c1433b3d
...
...
@@ -61,7 +61,171 @@ namespace dlib
return
std
::
make_tuple
(
std
::
get
<
indices
>
(
item
)...);
}
}
template
<
typename
T
>
struct
alwaysbool
{
typedef
bool
type
;
};
resizable_tensor
&
rt
();
// The significance of a layer's backward method requiring forward's outputs is
// that such as layer can't have an in-place layer stacked on top of it because
// in-place layers overwrite the output of the layer they sit on top of.
template
<
typename
layer_type
,
typename
SUBNET
>
constexpr
auto
backward_requires_forward_output
(
layer_type
&
layer
,
SUBNET
&
sub
)
->
typename
alwaysbool
<
decltype
(
layer
.
backward
(
rt
(),
rt
(),
sub
,
rt
()))
>::
type
{
return
true
;
}
template
<
typename
layer_type
,
typename
SUBNET
>
constexpr
auto
backward_requires_forward_output
(
layer_type
&
layer
,
SUBNET
&
sub
)
->
typename
alwaysbool
<
decltype
(
layer
.
backward
(
rt
(),
sub
,
rt
()))
>::
type
{
return
false
;
}
template
<
typename
layer_type
,
typename
SUBNET
>
constexpr
auto
backward_requires_forward_output
(
layer_type
&
layer
,
SUBNET
&
sub
)
->
typename
alwaysbool
<
decltype
(
layer
.
backward_inplace
(
rt
(),
rt
(),
sub
.
get_gradient_input
(),
rt
()))
>::
type
{
return
true
;
}
template
<
typename
layer_type
,
typename
SUBNET
>
constexpr
auto
has_inplace_backward
(
layer_type
&
layer
,
SUBNET
&
sub
)
->
typename
alwaysbool
<
decltype
(
layer
.
backward
(
rt
(),
rt
(),
sub
,
rt
()))
>::
type
{
return
false
;
}
template
<
typename
layer_type
,
typename
SUBNET
>
constexpr
auto
has_inplace_backward
(
layer_type
&
layer
,
SUBNET
&
sub
)
->
typename
alwaysbool
<
decltype
(
layer
.
backward
(
rt
(),
sub
,
rt
()))
>::
type
{
return
false
;
}
template
<
typename
layer_type
,
typename
SUBNET
>
constexpr
auto
has_inplace_backward
(
layer_type
&
layer
,
SUBNET
&
sub
)
->
typename
alwaysbool
<
decltype
(
layer
.
backward_inplace
(
rt
(),
rt
(),
sub
.
get_gradient_input
(),
rt
()))
>::
type
{
return
true
;
}
template
<
typename
layer_type
,
typename
SUBNET
>
constexpr
auto
is_inplace_layer
(
layer_type
&
layer
,
const
SUBNET
&
sub
)
->
typename
alwaysbool
<
decltype
(
layer
.
forward
(
sub
,
rt
()))
>::
type
{
return
false
;
}
template
<
typename
layer_type
,
typename
SUBNET
>
constexpr
auto
is_inplace_layer
(
layer_type
&
layer
,
const
SUBNET
&
sub
)
->
typename
alwaysbool
<
decltype
(
layer
.
forward_inplace
(
sub
.
get_output
(),
rt
()))
>::
type
{
return
true
;
}
template
<
typename
layer_type
,
typename
SUBNET
>
auto
call_layer_backward
(
layer_type
&
layer
,
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
)
->
decltype
(
layer
.
backward
(
computed_output
,
gradient_input
,
sub
,
params_grad
))
{
layer
.
backward
(
computed_output
,
gradient_input
,
sub
,
params_grad
);
}
template
<
typename
layer_type
,
typename
SUBNET
>
auto
call_layer_backward
(
layer_type
&
layer
,
const
tensor
&
,
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
)
->
decltype
(
layer
.
backward
(
gradient_input
,
sub
,
params_grad
))
{
layer
.
backward
(
gradient_input
,
sub
,
params_grad
);
}
template
<
typename
layer_type
,
typename
SUBNET
>
auto
call_layer_backward
(
layer_type
&
layer
,
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
)
->
decltype
(
layer
.
backward_inplace
(
computed_output
,
gradient_input
,
sub
.
get_gradient_input
(),
params_grad
))
{
layer
.
backward_inplace
(
computed_output
,
gradient_input
,
sub
.
get_gradient_input
(),
params_grad
);
}
template
<
typename
layer_type
,
typename
SUBNET
>
auto
call_layer_forward
(
layer_type
&
layer
,
const
SUBNET
&
sub
,
tensor
&
data_output
)
->
decltype
(
layer
.
forward
(
sub
,
rt
()))
{
// This overload of call_layer_forward() is here because this template
// naturally gets instantiated but only on code paths that never get executed.
// So rather than writing a bunch of hard to read template magic around call
// sites we just have this overload that doesn't do anything (and an assert to
// make sure that's the case).
DLIB_CASSERT
(
false
,
"This should never happen"
);
}
template
<
typename
layer_type
,
typename
SUBNET
>
auto
call_layer_forward
(
layer_type
&
layer
,
const
SUBNET
&
sub
,
resizable_tensor
&
data_output
)
->
decltype
(
layer
.
forward
(
sub
,
data_output
))
{
layer
.
forward
(
sub
,
data_output
);
}
template
<
typename
layer_type
,
typename
SUBNET
>
auto
call_layer_forward
(
layer_type
&
layer
,
const
SUBNET
&
sub
,
tensor
&
data_output
)
->
decltype
(
layer
.
forward_inplace
(
sub
.
get_output
(),
data_output
))
{
layer
.
forward_inplace
(
sub
.
get_output
(),
data_output
);
}
template
<
typename
layer_type
,
typename
SUBNET
>
auto
call_layer_forward
(
layer_type
&
layer
,
const
SUBNET
&
sub
,
resizable_tensor
&
data_output
)
->
decltype
(
layer
.
forward_inplace
(
sub
.
get_output
(),
data_output
))
{
if
(
!
have_same_dimensions
(
data_output
,
sub
.
get_output
()))
data_output
.
copy_size
(
sub
.
get_output
());
layer
.
forward_inplace
(
sub
.
get_output
(),
data_output
);
}
}
// end namespace impl
template
<
typename
Head
,
typename
...
Tail
>
std
::
tuple
<
Tail
...
>
tuple_tail
(
...
...
@@ -162,7 +326,7 @@ namespace dlib
namespace
dimpl
{
template
<
typename
T
,
typename
enabled
=
void
>
template
<
typename
T
,
bool
is_first
=
true
,
typename
enabled
=
void
>
class
subnet_wrapper
{
/*!
...
...
@@ -173,6 +337,13 @@ namespace dlib
objects to the layer callbacks those callbacks won't be able to
interact with the subnetworks in a way other than specified
by the SUBNET interface spec.
We also allow the top layer of a subnet_wrapper stack to call the
private_get_output() and private_get_gradient_input() functions. This
way, layers that have had their output/gradient overwritten by in-place
layers can only be accessed from the in-place layers that sit directly
on top of them since those in-place layers are the only layers that
know how to interact with them properly.
!*/
public
:
...
...
@@ -185,7 +356,31 @@ namespace dlib
};
template
<
typename
T
>
class
subnet_wrapper
<
T
,
typename
std
::
enable_if
<
is_nonloss_layer_type
<
T
>::
value
>::
type
>
class
subnet_wrapper
<
T
,
true
,
typename
std
::
enable_if
<
is_nonloss_layer_type
<
T
>::
value
>::
type
>
{
public
:
subnet_wrapper
(
const
subnet_wrapper
&
)
=
delete
;
subnet_wrapper
&
operator
=
(
const
subnet_wrapper
&
)
=
delete
;
typedef
T
wrapped_type
;
const
static
size_t
num_layers
=
T
::
num_layers
;
subnet_wrapper
(
T
&
l_
)
:
l
(
l_
),
subnetwork
(
l
.
subnet
())
{}
const
tensor
&
get_output
()
const
{
return
l
.
private_get_output
();
}
tensor
&
get_gradient_input
()
{
return
l
.
private_get_gradient_input
();
}
const
subnet_wrapper
<
typename
T
::
subnet_type
>&
subnet
()
const
{
subnetwork
;
}
subnet_wrapper
<
typename
T
::
subnet_type
>&
subnet
()
{
subnetwork
;
}
private
:
T
&
l
;
subnet_wrapper
<
typename
T
::
subnet_type
,
false
>
subnetwork
;
};
template
<
typename
T
>
class
subnet_wrapper
<
T
,
false
,
typename
std
::
enable_if
<
is_nonloss_layer_type
<
T
>::
value
>::
type
>
{
public
:
...
...
@@ -231,8 +426,11 @@ namespace dlib
add_layer
(
)
:
this_layer_setup_called
(
false
),
gradient_input_is_stale
(
true
)
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{
if
(
this_layer_operates_inplace
())
subnetwork
.
disable_output_and_gradient_getters
();
}
add_layer
(
const
add_layer
&
)
=
default
;
...
...
@@ -242,6 +440,8 @@ namespace dlib
template
<
typename
T
,
typename
U
,
typename
E
>
friend
class
add_layer
;
template
<
typename
T
,
bool
is_first
,
typename
E
>
friend
class
dimpl
::
subnet_wrapper
;
// Allow copying networks from one to another as long as their corresponding
// layers can be constructed from each other.
...
...
@@ -253,9 +453,12 @@ namespace dlib
details
(
item
.
layer_details
()),
this_layer_setup_called
(
item
.
this_layer_setup_called
),
gradient_input_is_stale
(
item
.
gradient_input_is_stale
),
get_output_and_gradient_input_disabled
(
item
.
get_output_and_gradient_input_disabled
),
x_grad
(
item
.
x_grad
),
cached_output
(
item
.
cached_output
)
{
if
(
this_layer_operates_inplace
())
subnetwork
.
disable_output_and_gradient_getters
();
}
template
<
typename
...
T
>
...
...
@@ -266,8 +469,11 @@ namespace dlib
details
(
layer_det
),
subnetwork
(
std
::
forward
<
T
>
(
args
)...),
this_layer_setup_called
(
false
),
gradient_input_is_stale
(
true
)
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{
if
(
this_layer_operates_inplace
())
subnetwork
.
disable_output_and_gradient_getters
();
}
template
<
typename
...
T
>
...
...
@@ -278,8 +484,11 @@ namespace dlib
details
(
std
::
move
(
layer_det
)),
subnetwork
(
std
::
forward
<
T
>
(
args
)...),
this_layer_setup_called
(
false
),
gradient_input_is_stale
(
true
)
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{
if
(
this_layer_operates_inplace
())
subnetwork
.
disable_output_and_gradient_getters
();
}
template
<
typename
...
T
,
typename
...
U
>
...
...
@@ -290,8 +499,11 @@ namespace dlib
details
(
std
::
get
<
0
>
(
layer_det
)),
subnetwork
(
tuple_tail
(
layer_det
),
std
::
forward
<
T
>
(
args
)...),
this_layer_setup_called
(
false
),
gradient_input_is_stale
(
true
)
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{
if
(
this_layer_operates_inplace
())
subnetwork
.
disable_output_and_gradient_getters
();
}
template
<
typename
...
T
,
typename
...
U
>
...
...
@@ -343,21 +555,54 @@ namespace dlib
details
.
setup
(
wsub
);
this_layer_setup_called
=
true
;
}
details
.
forward
(
wsub
,
cached_output
);
if
(
this_layer_operates_inplace
())
impl
::
call_layer_forward
(
details
,
wsub
,
private_get_output
());
else
impl
::
call_layer_forward
(
details
,
wsub
,
cached_output
);
gradient_input_is_stale
=
true
;
return
get_output
();
return
private_
get_output
();
}
const
tensor
&
get_output
()
const
{
return
cached_output
;
}
tensor
&
get_gradient_input
()
private
:
tensor
&
private_get_output
()
const
{
if
(
gradient_input_is_stale
)
if
(
const_cast
<
add_layer
&>
(
*
this
).
this_layer_operates_inplace
())
return
subnetwork
.
private_get_output
();
else
return
const_cast
<
resizable_tensor
&>
(
cached_output
);
}
tensor
&
private_get_gradient_input
()
{
if
(
this_layer_operates_inplace
())
{
gradient_input_is_stale
=
false
;
x_grad
.
copy_size
(
get_output
());
x_grad
=
0
;
return
subnetwork
.
private_get_gradient_input
();
}
return
x_grad
;
else
{
if
(
gradient_input_is_stale
)
{
gradient_input_is_stale
=
false
;
x_grad
.
copy_size
(
private_get_output
());
x_grad
=
0
;
}
return
x_grad
;
}
}
void
disable_output_and_gradient_getters
(
)
{
get_output_and_gradient_input_disabled
=
true
;
}
public
:
const
tensor
&
get_output
()
const
{
if
(
get_output_and_gradient_input_disabled
)
throw
dlib
::
error
(
"Accessing this layer's get_output() is disabled because an in-place layer has been stacked on top of it."
);
return
private_get_output
();
}
tensor
&
get_gradient_input
()
{
if
(
get_output_and_gradient_input_disabled
)
throw
dlib
::
error
(
"Accessing this layer's get_gradient_input() is disabled because an in-place layer has been stacked on top of it."
);
return
private_get_gradient_input
();
}
template
<
typename
solver_type
>
...
...
@@ -365,11 +610,13 @@ namespace dlib
{
dimpl
::
subnet_wrapper
<
subnet_type
>
wsub
(
subnetwork
);
params_grad
.
copy_size
(
details
.
get_layer_params
());
details
.
backward
(
get_output
(),
get_gradient_input
(),
wsub
,
static_cast
<
tensor
&>
(
params_grad
));
impl
::
call_layer_backward
(
details
,
private_get_output
(),
private_get_gradient_input
(),
wsub
,
static_cast
<
tensor
&>
(
params_grad
));
// Don't try to adjust the parameters if this layer doesn't have any.
if
(
params_grad
.
size
()
!=
0
)
solvers
.
top
()(
details
,
static_cast
<
const
tensor
&>
(
params_grad
));
subnetwork
.
update
(
x
,
solvers
.
pop
());
gradient_input_is_stale
=
true
;
}
const
subnet_type
&
subnet
()
const
{
return
subnetwork
;
}
...
...
@@ -396,6 +643,7 @@ namespace dlib
serialize
(
item
.
details
,
out
);
serialize
(
item
.
this_layer_setup_called
,
out
);
serialize
(
item
.
gradient_input_is_stale
,
out
);
serialize
(
item
.
get_output_and_gradient_input_disabled
,
out
);
serialize
(
item
.
x_grad
,
out
);
serialize
(
item
.
cached_output
,
out
);
}
...
...
@@ -410,18 +658,34 @@ namespace dlib
deserialize
(
item
.
details
,
in
);
deserialize
(
item
.
this_layer_setup_called
,
in
);
deserialize
(
item
.
gradient_input_is_stale
,
in
);
deserialize
(
item
.
get_output_and_gradient_input_disabled
,
in
);
deserialize
(
item
.
x_grad
,
in
);
deserialize
(
item
.
cached_output
,
in
);
}
private
:
bool
this_layer_operates_inplace
(
)
{
// This layer can run in-place if it's an in-place capable layer and also if
// the layer it's on top of doesn't need it's own output tensor (since in-place
// layers overwrite that tensor)
return
impl
::
is_inplace_layer
(
details
,
subnetwork
)
&&
!
subnetwork
.
this_layer_requires_forward_output
();
}
bool
this_layer_requires_forward_output
(
)
{
return
impl
::
backward_requires_forward_output
(
details
,
subnetwork
);
}
void
swap
(
add_layer
&
item
)
{
std
::
swap
(
subnetwork
,
item
.
subnetwork
);
std
::
swap
(
details
,
item
.
details
);
std
::
swap
(
this_layer_setup_called
,
item
.
this_layer_setup_called
);
std
::
swap
(
gradient_input_is_stale
,
item
.
gradient_input_is_stale
);
std
::
swap
(
get_output_and_gradient_input_disabled
,
item
.
get_output_and_gradient_input_disabled
);
std
::
swap
(
x_grad
,
item
.
x_grad
);
std
::
swap
(
cached_output
,
item
.
cached_output
);
}
...
...
@@ -431,6 +695,10 @@ namespace dlib
LAYER_DETAILS
details
;
bool
this_layer_setup_called
;
bool
gradient_input_is_stale
;
bool
get_output_and_gradient_input_disabled
;
// Note that if this_layer_operates_inplace()==true then x_grad and cached_output
// are not used at all. Instead, this layer uses these variables from the lower
// layer.
resizable_tensor
x_grad
;
resizable_tensor
cached_output
;
...
...
@@ -461,7 +729,8 @@ namespace dlib
add_layer
(
)
:
this_layer_setup_called
(
false
),
gradient_input_is_stale
(
true
)
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{}
add_layer
(
const
add_layer
&
)
=
default
;
...
...
@@ -471,6 +740,8 @@ namespace dlib
template
<
typename
T
,
typename
U
,
typename
E
>
friend
class
add_layer
;
template
<
typename
T
,
bool
is_first
,
typename
E
>
friend
class
dimpl
::
subnet_wrapper
;
// Allow copying networks from one to another as long as their corresponding
// layers can be constructed from each other.
...
...
@@ -482,6 +753,7 @@ namespace dlib
details
(
item
.
layer_details
()),
this_layer_setup_called
(
item
.
this_layer_setup_called
),
gradient_input_is_stale
(
item
.
gradient_input_is_stale
),
get_output_and_gradient_input_disabled
(
false
),
x_grad
(
item
.
x_grad
),
cached_output
(
item
.
cached_output
)
{
...
...
@@ -492,7 +764,8 @@ namespace dlib
)
:
details
(
layer_det
),
this_layer_setup_called
(
false
),
gradient_input_is_stale
(
true
)
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{}
add_layer
(
...
...
@@ -500,7 +773,8 @@ namespace dlib
)
:
details
(
std
::
move
(
layer_det
)),
this_layer_setup_called
(
false
),
gradient_input_is_stale
(
true
)
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{}
add_layer
(
...
...
@@ -510,7 +784,8 @@ namespace dlib
details
(
std
::
move
(
layer_det
)),
input_layer
(
std
::
move
(
il
)),
this_layer_setup_called
(
false
),
gradient_input_is_stale
(
true
)
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{}
add_layer
(
...
...
@@ -577,33 +852,50 @@ namespace dlib
details
.
setup
(
wsub
);
this_layer_setup_called
=
true
;
}
details
.
forward
(
wsub
,
cached_output
);
impl
::
call_layer_forward
(
details
,
wsub
,
cached_output
);
gradient_input_is_stale
=
true
;
return
get_output
();
return
private_
get_output
();
}
const
tensor
&
get_output
()
const
{
return
cached_output
;
}
tensor
&
get_gradient_input
()
private
:
tensor
&
private_get_output
()
const
{
return
const_cast
<
resizable_tensor
&>
(
cached_output
);
}
tensor
&
private_get_gradient_input
()
{
if
(
gradient_input_is_stale
)
{
gradient_input_is_stale
=
false
;
x_grad
.
copy_size
(
get_output
());
x_grad
.
copy_size
(
private_
get_output
());
x_grad
=
0
;
}
return
x_grad
;
}
void
disable_output_and_gradient_getters
(
)
{
get_output_and_gradient_input_disabled
=
true
;
}
public
:
const
tensor
&
get_output
()
const
{
if
(
get_output_and_gradient_input_disabled
)
throw
dlib
::
error
(
"Accessing this layer's get_output() is disabled because an in-place layer has been stacked on top of it."
);
return
private_get_output
();
}
tensor
&
get_gradient_input
()
{
if
(
get_output_and_gradient_input_disabled
)
throw
dlib
::
error
(
"Accessing this layer's get_gradient_input() is disabled because an in-place layer has been stacked on top of it."
);
return
private_get_gradient_input
();
}
template
<
typename
solver_type
>
void
update
(
const
tensor
&
x
,
sstack
<
solver_type
,
num_layers
>&
solvers
)
{
subnet_wrapper
wsub
(
x
,
grad_final_ignored
);
params_grad
.
copy_size
(
details
.
get_layer_params
());
details
.
backward
(
get_output
(),
get_gradient_input
(),
wsub
,
static_cast
<
tensor
&>
(
params_grad
));
impl
::
call_layer_backward
(
details
,
private_get_output
(),
private_get_gradient_input
(),
wsub
,
static_cast
<
tensor
&>
(
params_grad
));
// Don't try to adjust the parameters if this layer doesn't have any.
if
(
params_grad
.
size
()
!=
0
)
solvers
.
top
()(
details
,
static_cast
<
const
tensor
&>
(
params_grad
));
gradient_input_is_stale
=
true
;
}
const
subnet_type
&
subnet
()
const
{
return
input_layer
;
}
...
...
@@ -630,6 +922,7 @@ namespace dlib
serialize
(
item
.
details
,
out
);
serialize
(
item
.
this_layer_setup_called
,
out
);
serialize
(
item
.
gradient_input_is_stale
,
out
);
serialize
(
item
.
get_output_and_gradient_input_disabled
,
out
);
serialize
(
item
.
x_grad
,
out
);
serialize
(
item
.
cached_output
,
out
);
}
...
...
@@ -644,12 +937,20 @@ namespace dlib
deserialize
(
item
.
details
,
in
);
deserialize
(
item
.
this_layer_setup_called
,
in
);
deserialize
(
item
.
gradient_input_is_stale
,
in
);
deserialize
(
item
.
get_output_and_gradient_input_disabled
,
in
);
deserialize
(
item
.
x_grad
,
in
);
deserialize
(
item
.
cached_output
,
in
);
}
private
:
bool
this_layer_requires_forward_output
(
)
{
subnet_wrapper
wsub
(
grad_final_ignored
,
grad_final_ignored
);
return
impl
::
backward_requires_forward_output
(
details
,
wsub
);
}
class
subnet_wrapper
{
public
:
...
...
@@ -685,6 +986,7 @@ namespace dlib
std
::
swap
(
details
,
item
.
details
);
std
::
swap
(
this_layer_setup_called
,
item
.
this_layer_setup_called
);
std
::
swap
(
gradient_input_is_stale
,
item
.
gradient_input_is_stale
);
std
::
swap
(
get_output_and_gradient_input_disabled
,
item
.
get_output_and_gradient_input_disabled
);
std
::
swap
(
x_grad
,
item
.
x_grad
);
std
::
swap
(
cached_output
,
item
.
cached_output
);
}
...
...
@@ -693,6 +995,7 @@ namespace dlib
LAYER_DETAILS
details
;
bool
this_layer_setup_called
;
bool
gradient_input_is_stale
;
bool
get_output_and_gradient_input_disabled
;
resizable_tensor
x_grad
;
resizable_tensor
cached_output
;
...
...
@@ -1493,10 +1796,13 @@ namespace dlib
}
tensor
&
get_mutable_output
()
{
return
output
;
}
const
tensor
&
get_output
()
const
{
return
output
;
}
const
tensor
&
private_get_output
()
const
{
return
get_output
();
}
const
test_layer_subnet
&
subnet
()
const
{
init_sub
();
return
*
subnetwork
;
}
tensor
&
get_gradient_input
()
{
return
gradient_input
;
}
tensor
&
private_get_gradient_input
()
{
return
get_gradient_input
();
}
test_layer_subnet
&
subnet
()
{
init_sub
();
return
*
subnetwork
;
}
...
...
@@ -1578,7 +1884,7 @@ namespace dlib
// (since we do a lazy layer creation thing based on calls to subnet() inside
// test_layer_subnet).
l
.
setup
(
subnetwork
);
l
.
forward
(
subnetwork
,
output
);
impl
::
call_layer_forward
(
l
,
subnetwork
,
output
);
resizable_tensor
input_grad
;
input_grad
.
copy_size
(
output
);
...
...
@@ -1605,11 +1911,71 @@ namespace dlib
// comparing them to a central differences approximation.
resizable_tensor
params_grad
;
params_grad
.
copy_size
(
l
.
get_layer_params
());
//
Set the params grad to something crazy so that it's very obvious if it doesn't
// get fully assigned.
//
But first, set the params grad to something crazy so that it's very obvious if
//
it doesn't
get fully assigned.
params_grad
=
std
::
numeric_limits
<
float
>::
infinity
();
l
.
backward
(
output
,
input_grad
,
subnetwork
,
params_grad
);
impl
::
call_layer_backward
(
l
,
output
,
input_grad
,
subnetwork
,
params_grad
);
static_assert
(
impl
::
is_inplace_layer
(
l
,
subnetwork
)
==
impl
::
has_inplace_backward
(
l
,
subnetwork
),
"Layer not defined correctly. forward and backward methods must either both be in-place or both out-of-place. "
);
// Make sure the outputs of forward() and backward() are the same when they are run
// in in-place mode.
if
(
impl
::
is_inplace_layer
(
l
,
subnetwork
))
{
test_layer_subnet
subnetwork2
(
rnd
);
layer_details_type
ll
(
l
);
ll
.
setup
(
subnetwork2
);
resizable_tensor
ip_out
;
impl
::
call_layer_forward
(
ll
,
subnetwork2
,
ip_out
);
impl
::
call_layer_forward
(
ll
,
subnetwork2
,
subnetwork2
.
get_mutable_output
());
const
auto
forward_error
=
max
(
abs
(
mat
(
ip_out
)
-
mat
(
subnetwork2
.
get_output
())));
if
(
forward_error
>
0
.
00001
)
{
using
namespace
std
;
sout
<<
"This layer is supposed to support in-place computations but the output of forward_inplace()
\n
"
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
forward_error
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
resizable_tensor
params_grad
;
params_grad
.
copy_size
(
ll
.
get_layer_params
());
params_grad
=
std
::
numeric_limits
<
float
>::
infinity
();
resizable_tensor
input_grad
;
input_grad
.
copy_size
(
ip_out
);
fill_with_gassuan_random_numbers
(
input_grad
,
rnd
);
resizable_tensor
params_grad1
,
params_grad2
,
data_grad1
,
data_grad2
;
params_grad1
=
params_grad
;
params_grad2
=
params_grad
;
// Now call backward() and make sure it works as well.
subnetwork2
.
get_gradient_input
()
=
9999
;
impl
::
call_layer_backward
(
ll
,
ip_out
,
input_grad
,
subnetwork2
,
params_grad1
);
data_grad1
=
subnetwork2
.
get_gradient_input
();
subnetwork2
.
get_gradient_input
()
=
mat
(
input_grad
);
impl
::
call_layer_backward
(
ll
,
ip_out
,
subnetwork2
.
get_gradient_input
(),
subnetwork2
,
params_grad2
);
data_grad2
=
subnetwork2
.
get_gradient_input
();
if
(
params_grad
.
size
()
!=
0
)
{
const
auto
backward_param_error
=
max
(
abs
(
mat
(
params_grad1
)
-
mat
(
params_grad2
)));
if
(
backward_param_error
>
0
.
00001
)
{
using
namespace
std
;
sout
<<
"This layer is supposed to support in-place computations but the output of backward_inplace()
\n
"
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
backward_param_error
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
}
const
auto
backward_data_error
=
max
(
abs
(
mat
(
data_grad1
)
-
mat
(
data_grad2
)));
if
(
backward_data_error
>
0
.
00001
)
{
using
namespace
std
;
sout
<<
"This layer is supposed to support in-place computations but the output of backward_inplace()
\n
"
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
backward_data_error
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
}
// ==================================================================
// first validate the way the parameter gradients are computed
...
...
@@ -1622,9 +1988,10 @@ namespace dlib
eps
=
base_eps
;
const
float
oldval
=
l1
.
get_layer_params
().
host
()[
i
];
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
+
eps
;
l1
.
forward
(
subnetwork
,
out2
);
impl
::
call_layer_forward
(
l1
,
subnetwork
,
out2
);
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
-
eps
;
l1
.
forward
(
subnetwork
,
out3
);
impl
::
call_layer_forward
(
l1
,
subnetwork
,
out3
);
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
;
// Compute a reference derivative via a central differences approximation and
// compare it to the one output by the layer and make sure they match.
...
...
@@ -1635,8 +2002,8 @@ namespace dlib
{
using
namespace
std
;
sout
<<
"Gradient error in parameter #"
<<
i
<<
". Relative error: "
<<
relative_error
<<
endl
;
sout
<<
"expected derivative:
"
<<
reference_derivative
<<
endl
;
sout
<<
"output derivative: "
<<
output_derivative
<<
endl
;
sout
<<
"expected derivative: "
<<
reference_derivative
<<
endl
;
sout
<<
"output derivative:
"
<<
output_derivative
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
...
...
@@ -1651,21 +2018,24 @@ namespace dlib
if
(
eps
==
0
)
eps
=
base_eps
;
subnetwork
.
get_output_element
(
i
)
=
oldval
+
eps
;
l
.
forward
(
subnetwork
,
out2
);
impl
::
call_layer_forward
(
l
,
subnetwork
,
out2
);
subnetwork
.
get_output_element
(
i
)
=
oldval
-
eps
;
l
.
forward
(
subnetwork
,
out3
);
impl
::
call_layer_forward
(
l
,
subnetwork
,
out3
);
subnetwork
.
get_output_element
(
i
)
=
oldval
;
// Compute a reference derivative via a central differences approximation and
// compare it to the one output by the layer and make sure they match.
double
reference_derivative
=
(
dot
(
out2
,
input_grad
)
-
dot
(
out3
,
input_grad
))
/
(
2
*
eps
);
double
output_derivative
=
subnetwork
.
get_gradient_input_element
(
i
)
-
initial_gradient_input
[
i
];
double
output_derivative
=
subnetwork
.
get_gradient_input_element
(
i
);
if
(
!
impl
::
is_inplace_layer
(
l
,
subnetwork
))
output_derivative
-=
initial_gradient_input
[
i
];
double
relative_error
=
(
reference_derivative
-
output_derivative
)
/
(
reference_derivative
+
1e-100
);
if
(
std
::
abs
(
relative_error
)
>
0
.
01
)
{
using
namespace
std
;
sout
<<
"Gradient error in data variable #"
<<
i
<<
". Relative error: "
<<
relative_error
<<
endl
;
sout
<<
"expected derivative:
"
<<
reference_derivative
<<
endl
;
sout
<<
"output derivative: "
<<
output_derivative
<<
endl
;
sout
<<
"expected derivative: "
<<
reference_derivative
<<
endl
;
sout
<<
"output derivative:
"
<<
output_derivative
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
}
...
...
dlib/dnn/core_abstract.h
View file @
c1433b3d
...
...
@@ -389,6 +389,7 @@ namespace dlib
ensures
- Back propagates the error gradient, get_gradient_input(), through this
network and uses the provided solvers to update the network parameters.
- All elements of #get_gradient_input() are set to 0.
!*/
void
clean
(
...
...
dlib/dnn/layers.h
View file @
c1433b3d
...
...
@@ -36,7 +36,7 @@ namespace dlib
}
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
)
void
backward
(
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
)
{
// TODO
}
...
...
@@ -89,7 +89,7 @@ namespace dlib
}
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
,
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
)
void
backward
(
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
)
{
// compute the gradient of the parameters.
params_grad
=
trans
(
mat
(
sub
.
get_output
()))
*
mat
(
gradient_input
);
...
...
@@ -145,20 +145,22 @@ namespace dlib
{
}
template
<
typename
SUBNET
>
void
forward
(
const
SUBNET
&
sub
,
resizable_tensor
&
output
)
void
forward_inplace
(
const
tensor
&
input
,
tensor
&
output
)
{
output
.
copy_size
(
sub
.
get_output
());
output
=
lowerbound
(
mat
(
sub
.
get_output
()),
0
);
output
=
lowerbound
(
mat
(
input
),
0
);
}
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
,
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
)
void
backward_inplace
(
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
tensor
&
data_grad
,
tensor
&
params_grad
)
{
const
float
*
grad
=
gradient_input
.
host
();
const
float
*
in
=
sub
.
get_output
()
.
host
();
float
*
out
=
sub
.
get_gradient_input
()
.
host
();
for
(
unsigned
long
i
=
0
;
i
<
sub
.
get_output
()
.
size
();
++
i
)
const
float
*
in
=
computed_output
.
host
();
float
*
out
=
data_grad
.
host
();
for
(
unsigned
long
i
=
0
;
i
<
computed_output
.
size
();
++
i
)
{
if
(
in
[
i
]
>
0
)
out
[
i
]
=
grad
[
i
];
...
...
dlib/dnn/layers_abstract.h
View file @
c1433b3d
...
...
@@ -91,12 +91,28 @@ namespace dlib
produces an output tensor. You create an entire deep network by composing
these functions. Importantly, you are able to use a wide range of
different functions to accommodate the task you are trying to accomplish.
Dlib includes a number of common layer types but if you want to define your
own then you simply implement a class with the same interface as
EXAMPLE_LAYER_.
Therefore, dlib includes a number of common layer types but if you want to
define your own then you simply implement a class with the same interface
as
EXAMPLE_LAYER_.
Note that there is no dlib::EXAMPLE_LAYER_ type. It is shown here purely
to document the interface that a layer object must implement.
The central work of defining a layer is implementing the forward and backward
methods. When you do this you have three options:
- Implement the forward() and backward() methods according to the
specification shown below. Do not implement forward_inplace() and
backward_inplace().
- Implement the forward() and backward() methods according to the
specification shown below, except exclude the computed_output
parameter from backward(). Doing this will allow dlib to make some
layers execute in-place and therefore run a little faster and use
less memory. Do not implement forward_inplace() and
backward_inplace().
- Implement the forward_inplace() and backward_inplace() methods
according to the specification shown below. Do not implement
forward() and backward(). These in-place methods allow some types of
layers to be implemented more efficiently.
!*/
public
:
...
...
@@ -152,7 +168,7 @@ namespace dlib
template
<
typename
SUBNET
>
void
forward
(
const
SUBNET
&
sub
,
resizable_tensor
&
output
resizable_tensor
&
data_
output
);
/*!
requires
...
...
@@ -160,14 +176,14 @@ namespace dlib
- setup() has been called.
ensures
- Runs the output of the subnetwork through this layer and stores the
output into #output. In particular, forward() can use any of the outputs
in sub (e.g. sub.get_output(), sub.subnet().get_output(), etc.) to
compute whatever it wants.
results into #data_output. In particular, forward() can use any of the
outputs in sub (e.g. sub.get_output(), sub.subnet().get_output(), etc.)
to
compute whatever it wants.
!*/
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
computed_output
,
const
tensor
&
computed_output
,
// this parameter is optional
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
...
...
@@ -189,7 +205,7 @@ namespace dlib
These gradients are stored into #sub and #params_grad, respectively. To be
precise, the gradients are taken of a function f(sub,get_layer_params())
which is defined thusly:
- Recalling that computed_output is a function of
sub and get_layer_params()
- Recalling that computed_output is a function of
both sub and get_layer_params(),
since it is the result of calling forward(sub,computed_output):
let f(sub,get_layer_params()) == dot(computed_output, gradient_input)
Then we define the following gradient vectors:
...
...
@@ -207,6 +223,59 @@ namespace dlib
- layer<I>(sub).get_gradient_input() += DATA_GRADIENT_I
!*/
void
forward_inplace
(
const
tensor
&
data_input
,
tensor
&
data_output
);
/*!
requires
- have_same_dimensions(data_input,data_output) == true
- setup() has been called.
ensures
- Runs the data_input tensor though this layer and stores the output into
#data_output.
- This function supports in-place operation, i.e. having
is_same_object(data_input, data_output)==true
!*/
void
backward_inplace
(
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
tensor
&
data_grad
,
tensor
&
params_grad
);
/*!
requires
- setup() has been called.
- computed_output is the tensor resulting from the most recent call to
forward_inplace(). This means that backward_inplace() is allowed to
cache intermediate results computed during forward_inplace() and use them
for the backward computation.
- have_same_dimensions(gradient_input, data_grad) == true
- have_same_dimensions(gradient_input, computed_output) == true
- have_same_dimensions(params_grad, get_layer_params()) == true
ensures
- This function supports in-place operation, i.e. having
is_same_object(gradient_input, data_grad)==true
- This function outputs the gradients of this layer with respect to the
input data from a sublayer and also with respect to this layer's parameters.
These gradients are stored into #data_grad and #params_grad, respectively. To be
precise, the gradients are taken of a function f(data_input,get_layer_params())
which is defined thusly:
- Recalling that computed_output is a function of both the input to
forward_inplace() and get_layer_params(), since it is the result of
calling forward_inplace(data_input,computed_output):
let f(data_input,get_layer_params()) == dot(computed_output, gradient_input)
Then we define the following gradient vectors:
- PARAMETER_GRADIENT == gradient of f(data_input,get_layer_params()) with
respect to get_layer_params().
- DATA_GRADIENT == gradient of f(data_input,get_layer_params()) with respect
to data_input.
Finally, backward_inplace() outputs these gradients by performing:
- params_grad = PARAMETER_GRADIENT
- data_grad = DATA_GRADIENT
!*/
const
tensor
&
get_layer_params
(
)
const
;
/*!
...
...
@@ -277,7 +346,7 @@ namespace dlib
template
<
typename
SUBNET
>
void
setup
(
const
SUBNET
&
sub
);
template
<
typename
SUBNET
>
void
forward
(
const
SUBNET
&
sub
,
resizable_tensor
&
output
);
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
);
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
);
const
tensor
&
get_layer_params
()
const
;
tensor
&
get_layer_params
();
/*!
...
...
@@ -313,8 +382,8 @@ namespace dlib
);
template
<
typename
SUBNET
>
void
setup
(
const
SUBNET
&
sub
);
template
<
typename
SUBNET
>
void
forward
(
const
SUBNET
&
sub
,
resizable_
tensor
&
output
);
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
);
void
forward_inplace
(
const
tensor
&
input
,
tensor
&
output
);
void
backward_inplace
(
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
tensor
&
data_grad
,
tensor
&
params_grad
);
const
tensor
&
get_layer_params
()
const
;
tensor
&
get_layer_params
();
/*!
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment