Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
e7d713cf
Commit
e7d713cf
authored
Nov 17, 2017
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added softmax_all layer.
parent
11145541
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
401 additions
and
42 deletions
+401
-42
cpu_dlib.cpp
dlib/dnn/cpu_dlib.cpp
+87
-42
cpu_dlib.h
dlib/dnn/cpu_dlib.h
+13
-0
cudnn_dlibapi.cpp
dlib/dnn/cudnn_dlibapi.cpp
+54
-0
cudnn_dlibapi.h
dlib/dnn/cudnn_dlibapi.h
+13
-0
layers.h
dlib/dnn/layers.h
+64
-0
layers_abstract.h
dlib/dnn/layers_abstract.h
+36
-0
tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+27
-0
tensor_tools.h
dlib/dnn/tensor_tools.h
+38
-0
dnn.cpp
dlib/test/dnn.cpp
+69
-0
No files found.
dlib/dnn/cpu_dlib.cpp
View file @
e7d713cf
...
...
@@ -1223,32 +1223,36 @@ namespace dlib
// -----------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------
namespace
ttimpl
{
void
softmax
(
const
long
num_locations
,
const
long
num_channels
,
tensor
&
dest
,
const
tensor
&
src
)
{
DLIB_ASSERT
(
num_channels
*
num_locations
==
src
.
nr
()
*
src
.
nc
()
*
src
.
k
());
DLIB_CASSERT
(
have_same_dimensions
(
dest
,
src
));
const
auto
d
=
dest
.
host
();
const
auto
s
=
src
.
host
();
const
long
num
=
src
.
nr
()
*
src
.
nc
();
// Note that we subtract out the max values in each channel before applying
// exp() to avoid numeric overflow in the subsequent computations. Doing this
// doesn't change the resulting output, it just makes it more numerically
// stable.
for
(
long
n
=
0
;
n
<
src
.
num_samples
();
++
n
)
{
auto
ss
=
s
+
num
*
src
.
k
()
*
n
;
auto
dd
=
d
+
num
*
src
.
k
()
*
n
;
for
(
long
i
=
0
;
i
<
num
;
++
i
)
auto
ss
=
s
+
num
_locations
*
num_channels
*
n
;
auto
dd
=
d
+
num
_locations
*
num_channels
*
n
;
for
(
long
i
=
0
;
i
<
num
_locations
;
++
i
)
{
float
max_val
=
-
std
::
numeric_limits
<
float
>::
infinity
();
for
(
long
k
=
0
;
k
<
src
.
k
()
;
++
k
)
max_val
=
std
::
max
(
max_val
,
ss
[
k
*
num
]);
for
(
long
k
=
0
;
k
<
num_channels
;
++
k
)
max_val
=
std
::
max
(
max_val
,
ss
[
k
*
num
_locations
]);
for
(
long
k
=
0
;
k
<
src
.
k
()
;
++
k
)
dd
[
k
*
num
]
=
std
::
exp
(
ss
[
k
*
num
]
-
max_val
);
for
(
long
k
=
0
;
k
<
num_channels
;
++
k
)
dd
[
k
*
num
_locations
]
=
std
::
exp
(
ss
[
k
*
num_locations
]
-
max_val
);
++
ss
;
++
dd
;
...
...
@@ -1258,67 +1262,108 @@ namespace dlib
// Now normalize each channel so they sum to 1.
for
(
long
n
=
0
;
n
<
src
.
num_samples
();
++
n
)
{
const
auto
dd
=
d
+
num
*
src
.
k
()
*
n
;
for
(
long
r
=
0
;
r
<
src
.
nr
();
++
r
)
const
auto
dd
=
d
+
num
_locations
*
num_channels
*
n
;
for
(
long
i
=
0
;
i
<
num_locations
;
++
i
)
{
for
(
long
c
=
0
;
c
<
src
.
nc
();
++
c
)
{
const
auto
ddd
=
dd
+
r
*
src
.
nc
()
+
c
;
const
auto
ddd
=
dd
+
i
;
float
temp
=
0
;
for
(
long
k
=
0
;
k
<
src
.
k
();
++
k
)
temp
+=
ddd
[
k
*
num
];
for
(
long
k
=
0
;
k
<
src
.
k
();
++
k
)
ddd
[
k
*
num
]
/=
temp
;
}
float
temp
=
0
;
for
(
long
k
=
0
;
k
<
num_channels
;
++
k
)
temp
+=
ddd
[
k
*
num_locations
];
for
(
long
k
=
0
;
k
<
num_channels
;
++
k
)
ddd
[
k
*
num_locations
]
/=
temp
;
}
}
}
void
softmax_gradient
(
const
long
num_locations
,
const
long
num_channels
,
tensor
&
grad
,
const
tensor
&
dest
,
const
tensor
&
gradient_input
)
{
DLIB_ASSERT
(
num_channels
*
num_locations
==
grad
.
nr
()
*
grad
.
nc
()
*
grad
.
k
());
DLIB_CASSERT
(
have_same_dimensions
(
grad
,
dest
));
DLIB_CASSERT
(
have_same_dimensions
(
grad
,
gradient_input
));
const
auto
d
=
dest
.
host
();
const
auto
g
=
grad
.
host
();
const
auto
in
=
gradient_input
.
host
();
const
long
num
=
grad
.
nr
()
*
grad
.
nc
();
for
(
long
n
=
0
;
n
<
grad
.
num_samples
();
++
n
)
{
const
auto
d2
=
d
+
num
*
grad
.
k
()
*
n
;
const
auto
g2
=
g
+
num
*
grad
.
k
()
*
n
;
const
auto
in2
=
in
+
num
*
grad
.
k
()
*
n
;
for
(
long
r
=
0
;
r
<
grad
.
nr
();
++
r
)
const
auto
d2
=
d
+
num
_locations
*
num_channels
*
n
;
const
auto
g2
=
g
+
num
_locations
*
num_channels
*
n
;
const
auto
in2
=
in
+
num
_locations
*
num_channels
*
n
;
for
(
long
i
=
0
;
i
<
num_locations
;
++
i
)
{
for
(
long
c
=
0
;
c
<
grad
.
nc
();
++
c
)
const
auto
d3
=
d2
+
i
;
const
auto
g3
=
g2
+
i
;
const
auto
in3
=
in2
+
i
;
float
temp
=
0
;
for
(
long
k
=
0
;
k
<
num_channels
;
++
k
)
temp
+=
-
d3
[
k
*
num_locations
]
*
in3
[
k
*
num_locations
];
if
(
is_same_object
(
gradient_input
,
grad
))
{
const
auto
d3
=
d2
+
r
*
grad
.
nc
()
+
c
;
const
auto
g3
=
g2
+
r
*
grad
.
nc
()
+
c
;
const
auto
in3
=
in2
+
r
*
grad
.
nc
()
+
c
;
float
temp
=
0
;
for
(
long
k
=
0
;
k
<
grad
.
k
();
++
k
)
temp
+=
-
d3
[
k
*
num
]
*
in3
[
k
*
num
];
if
(
is_same_object
(
gradient_input
,
grad
))
{
for
(
long
k
=
0
;
k
<
grad
.
k
();
++
k
)
g3
[
k
*
num
]
=
d3
[
k
*
num
]
*
(
temp
+
in3
[
k
*
num
]);
}
else
{
for
(
long
k
=
0
;
k
<
grad
.
k
();
++
k
)
g3
[
k
*
num
]
+=
d3
[
k
*
num
]
*
(
temp
+
in3
[
k
*
num
]);
}
for
(
long
k
=
0
;
k
<
num_channels
;
++
k
)
g3
[
k
*
num_locations
]
=
d3
[
k
*
num_locations
]
*
(
temp
+
in3
[
k
*
num_locations
]);
}
else
{
for
(
long
k
=
0
;
k
<
num_channels
;
++
k
)
g3
[
k
*
num_locations
]
+=
d3
[
k
*
num_locations
]
*
(
temp
+
in3
[
k
*
num_locations
]);
}
}
}
}
}
// ----------------------------------------------------------------------------------------
void
softmax
(
tensor
&
dest
,
const
tensor
&
src
)
{
DLIB_CASSERT
(
have_same_dimensions
(
dest
,
src
));
ttimpl
::
softmax
(
src
.
nr
()
*
src
.
nc
(),
src
.
k
(),
dest
,
src
);
}
void
softmax_gradient
(
tensor
&
grad
,
const
tensor
&
dest
,
const
tensor
&
gradient_input
)
{
DLIB_CASSERT
(
have_same_dimensions
(
grad
,
dest
));
DLIB_CASSERT
(
have_same_dimensions
(
grad
,
gradient_input
));
ttimpl
::
softmax_gradient
(
grad
.
nr
()
*
grad
.
nc
(),
grad
.
k
(),
grad
,
dest
,
gradient_input
);
}
// ------------------------------------------------------------------------------------
void
softmax_all
(
tensor
&
dest
,
const
tensor
&
src
)
{
DLIB_CASSERT
(
have_same_dimensions
(
dest
,
src
));
ttimpl
::
softmax
(
1
,
src
.
nr
()
*
src
.
nc
()
*
src
.
k
(),
dest
,
src
);
}
void
softmax_all_gradient
(
tensor
&
grad
,
const
tensor
&
dest
,
const
tensor
&
gradient_input
)
{
DLIB_CASSERT
(
have_same_dimensions
(
grad
,
dest
));
DLIB_CASSERT
(
have_same_dimensions
(
grad
,
gradient_input
));
ttimpl
::
softmax_gradient
(
1
,
grad
.
nr
()
*
grad
.
nc
()
*
grad
.
k
(),
grad
,
dest
,
gradient_input
);
}
// ------------------------------------------------------------------------------------
...
...
dlib/dnn/cpu_dlib.h
View file @
e7d713cf
...
...
@@ -248,6 +248,19 @@ namespace dlib
const
tensor
&
gradient_input
);
// ------------------------------------------------------------------------------------
void
softmax_all
(
tensor
&
dest
,
const
tensor
&
src
);
void
softmax_all_gradient
(
tensor
&
grad
,
const
tensor
&
dest
,
const
tensor
&
gradient_input
);
// ------------------------------------------------------------------------------------
void
sigmoid
(
...
...
dlib/dnn/cudnn_dlibapi.cpp
View file @
e7d713cf
...
...
@@ -1385,6 +1385,60 @@ namespace dlib
grad
.
device
()));
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
void
softmax_all
(
tensor
&
dest
,
const
tensor
&
src
)
{
DLIB_CASSERT
(
have_same_dimensions
(
dest
,
src
));
if
(
src
.
size
()
==
0
)
return
;
const
float
alpha
=
1
;
const
float
beta
=
0
;
CHECK_CUDNN
(
cudnnSoftmaxForward
(
context
(),
CUDNN_SOFTMAX_ACCURATE
,
CUDNN_SOFTMAX_MODE_INSTANCE
,
&
alpha
,
descriptor
(
src
),
src
.
device
(),
&
beta
,
descriptor
(
dest
),
dest
.
device
()));
}
void
softmax_all_gradient
(
tensor
&
grad
,
const
tensor
&
dest
,
const
tensor
&
gradient_input
)
{
DLIB_CASSERT
(
have_same_dimensions
(
dest
,
gradient_input
)
==
true
&&
have_same_dimensions
(
dest
,
grad
)
==
true
);
if
(
dest
.
size
()
==
0
)
return
;
const
float
alpha
=
1
;
const
float
beta
=
is_same_object
(
grad
,
gradient_input
)
?
0
:
1
;
CHECK_CUDNN
(
cudnnSoftmaxBackward
(
context
(),
CUDNN_SOFTMAX_ACCURATE
,
CUDNN_SOFTMAX_MODE_INSTANCE
,
&
alpha
,
descriptor
(
dest
),
dest
.
device
(),
descriptor
(
gradient_input
),
gradient_input
.
device
(),
&
beta
,
descriptor
(
grad
),
grad
.
device
()));
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
...
...
dlib/dnn/cudnn_dlibapi.h
View file @
e7d713cf
...
...
@@ -387,6 +387,19 @@ namespace dlib
is_same_object(grad, gradient_input)==true
!*/
// ------------------------------------------------------------------------------------
void
softmax_all
(
tensor
&
dest
,
const
tensor
&
src
);
void
softmax_all_gradient
(
tensor
&
grad
,
const
tensor
&
dest
,
const
tensor
&
gradient_input
);
// ------------------------------------------------------------------------------------
void
sigmoid
(
...
...
dlib/dnn/layers.h
View file @
e7d713cf
...
...
@@ -2610,6 +2610,70 @@ namespace dlib
using
softmax
=
add_layer
<
softmax_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
softmax_all_
{
public
:
softmax_all_
()
{
}
template
<
typename
SUBNET
>
void
setup
(
const
SUBNET
&
/*sub*/
)
{
}
void
forward_inplace
(
const
tensor
&
input
,
tensor
&
output
)
{
tt
::
softmax_all
(
output
,
input
);
}
void
backward_inplace
(
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
tensor
&
data_grad
,
tensor
&
)
{
tt
::
softmax_all_gradient
(
data_grad
,
computed_output
,
gradient_input
);
}
const
tensor
&
get_layer_params
()
const
{
return
params
;
}
tensor
&
get_layer_params
()
{
return
params
;
}
friend
void
serialize
(
const
softmax_all_
&
,
std
::
ostream
&
out
)
{
serialize
(
"softmax_all_"
,
out
);
}
friend
void
deserialize
(
softmax_all_
&
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"softmax_all_"
)
throw
serialization_error
(
"Unexpected version '"
+
version
+
"' found while deserializing dlib::softmax_all_."
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
softmax_all_
&
)
{
out
<<
"softmax_all"
;
return
out
;
}
friend
void
to_xml
(
const
softmax_all_
&
/*item*/
,
std
::
ostream
&
out
)
{
out
<<
"<softmax_all/>
\n
"
;
}
private
:
resizable_tensor
params
;
};
template
<
typename
SUBNET
>
using
softmax_all
=
add_layer
<
softmax_all_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
namespace
impl
{
template
<
template
<
typename
>
class
TAG_TYPE
,
template
<
typename
>
class
...
TAG_TYPES
>
...
...
dlib/dnn/layers_abstract.h
View file @
e7d713cf
...
...
@@ -2116,6 +2116,42 @@ namespace dlib
template
<
typename
SUBNET
>
using
softmax
=
add_layer
<
softmax_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
softmax_all_
{
/*!
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
defined above. In particular, it defines a softmax layer. To be precise,
we define the softmax function s(x) as:
s(x) == exp(x)/sum(exp(x))
where x is a vector. Then this layer treats its input tensor as a
collection of tensor::num_samples() vectors and applies s() to each vector
in the tensor. Therefore, there are logically tensor::num_samples()
invocations of s().
!*/
public
:
softmax_all_
(
);
template
<
typename
SUBNET
>
void
setup
(
const
SUBNET
&
sub
);
void
forward_inplace
(
const
tensor
&
input
,
tensor
&
output
);
void
backward_inplace
(
const
tensor
&
computed_output
,
const
tensor
&
gradient_input
,
tensor
&
data_grad
,
tensor
&
params_grad
);
const
tensor
&
get_layer_params
()
const
;
tensor
&
get_layer_params
();
/*!
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_
interface. Note that this layer doesn't have any parameters, so the tensor
returned by get_layer_params() is always empty.
!*/
};
template
<
typename
SUBNET
>
using
softmax_all
=
add_layer
<
softmax_all_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
template
<
...
...
dlib/dnn/tensor_tools.cpp
View file @
e7d713cf
...
...
@@ -741,6 +741,33 @@ namespace dlib { namespace tt
#endif
}
// ----------------------------------------------------------------------------------------
void
softmax_all
(
tensor
&
dest
,
const
tensor
&
src
)
{
#ifdef DLIB_USE_CUDA
cuda
::
softmax_all
(
dest
,
src
);
#else
cpu
::
softmax_all
(
dest
,
src
);
#endif
}
void
softmax_all_gradient
(
tensor
&
grad
,
const
tensor
&
dest
,
const
tensor
&
gradient_input
)
{
#ifdef DLIB_USE_CUDA
cuda
::
softmax_all_gradient
(
grad
,
dest
,
gradient_input
);
#else
cpu
::
softmax_all_gradient
(
grad
,
dest
,
gradient_input
);
#endif
}
// ----------------------------------------------------------------------------------------
void
sigmoid
(
...
...
dlib/dnn/tensor_tools.h
View file @
e7d713cf
...
...
@@ -1216,6 +1216,44 @@ namespace dlib { namespace tt
is_same_object(grad, gradient_input)==true
!*/
// ----------------------------------------------------------------------------------------
void
softmax_all
(
tensor
&
dest
,
const
tensor
&
src
);
/*!
requires
- have_same_dimensions(dest, src) == true
ensures
- Note that the softmax function is a vector valued function:
s(x) == exp(x)/sum(exp(x))
- Computes the softmax function on src and writes the results to dest. The
softmax is computed over the entire tensor with one invocation of s(). So
unlike softmax() which computes many s() evaluations, one for each spatial
location, softmax_all() calls s() once for the entire tensor.
- This function supports in-place operation, i.e. having
is_same_object(dest, src)==true
!*/
void
softmax_all_gradient
(
tensor
&
grad
,
const
tensor
&
dest
,
const
tensor
&
gradient_input
);
/*!
requires
- have_same_dimensions(dest,gradient_input) == true
- have_same_dimensions(dest,grad) == true
- is_same_object(grad, dest)==false
ensures
- We interpret dest as the output of softmax_all(dest,SRC) for some SRC tensor.
Then let f(SRC) == dot(gradient_input,dest) Then this function computes the
gradient of f() with respect to SRC and assigns it to grad.
- This function supports in-place operation, i.e. having
is_same_object(grad, gradient_input)==true
!*/
// ----------------------------------------------------------------------------------------
void
sigmoid
(
...
...
dlib/test/dnn.cpp
View file @
e7d713cf
...
...
@@ -153,6 +153,68 @@ namespace
auto
grad_error
=
compare_gradients
(
src_grad
,
grad_src
);
dlog
<<
LINFO
<<
"src error: "
<<
grad_error
;
DLIB_TEST
(
grad_error
<
0.001
);
#ifdef DLIB_USE_CUDA
resizable_tensor
src1
=
src
;
resizable_tensor
src2
=
src
;
resizable_tensor
dest1
,
dest2
;
dest1
.
copy_size
(
src
);
dest2
.
copy_size
(
src
);
cuda
::
softmax_all
(
dest1
,
src1
);
cpu
::
softmax_all
(
dest2
,
src2
);
DLIB_TEST_MSG
(
max
(
abs
(
mat
(
dest1
)
-
mat
(
dest2
)))
<
1e-5
,
max
(
abs
(
mat
(
dest1
)
-
mat
(
dest2
))));
#endif
}
void
test_softmax_all
()
{
using
namespace
dlib
::
tt
;
print_spinner
();
const
long
nr
=
3
;
const
long
nc
=
3
;
resizable_tensor
src
(
5
,
5
,
nr
,
nr
),
dest
(
5
,
5
,
nr
,
nc
),
gradient_input
(
5
,
5
,
nr
,
nc
);
tt
::
tensor_rand
rnd
;
rnd
.
fill_uniform
(
src
);
rnd
.
fill_uniform
(
dest
);
// fill like this as a test of the assignment operator.
gradient_input
=
matrix_cast
<
float
>
(
gaussian_randm
(
5
,
5
*
nr
*
nc
,
2
));
auto
grad_src
=
[
&
](
long
idx
)
{
auto
f
=
[
&
](
float
eps
)
{
const
float
old
=
src
.
host
()[
idx
];
src
.
host
()[
idx
]
+=
eps
;
tt
::
softmax_all
(
dest
,
src
);
float
result
=
dot
(
gradient_input
,
dest
);
src
.
host
()[
idx
]
=
old
;
return
result
;
};
const
float
eps
=
0.01
;
return
(
f
(
+
eps
)
-
f
(
-
eps
))
/
(
2
*
eps
);
};
resizable_tensor
src_grad
;
src_grad
.
copy_size
(
src
);
src_grad
=
0
;
tt
::
softmax_all
(
dest
,
src
);
softmax_all_gradient
(
src_grad
,
dest
,
gradient_input
);
auto
grad_error
=
compare_gradients
(
src_grad
,
grad_src
);
dlog
<<
LINFO
<<
"src error: "
<<
grad_error
;
DLIB_TEST
(
grad_error
<
0.001
);
#ifdef DLIB_USE_CUDA
resizable_tensor
src1
=
src
;
resizable_tensor
src2
=
src
;
resizable_tensor
dest1
,
dest2
;
dest1
.
copy_size
(
src
);
dest2
.
copy_size
(
src
);
cuda
::
softmax_all
(
dest1
,
src1
);
cpu
::
softmax_all
(
dest2
,
src2
);
DLIB_TEST_MSG
(
max
(
abs
(
mat
(
dest1
)
-
mat
(
dest2
)))
<
1e-5
,
max
(
abs
(
mat
(
dest1
)
-
mat
(
dest2
))));
#endif
}
void
test_batch_normalize
()
...
...
@@ -1701,6 +1763,12 @@ namespace
auto
res
=
test_layer
(
l
);
DLIB_TEST_MSG
(
res
,
res
);
}
{
print_spinner
();
softmax_all_
l
;
auto
res
=
test_layer
(
l
);
DLIB_TEST_MSG
(
res
,
res
);
}
}
// ----------------------------------------------------------------------------------------
...
...
@@ -2988,6 +3056,7 @@ namespace
test_avg_pool
(
4
,
5
,
40
,
50
,
0
,
1
);
test_tanh
();
test_softmax
();
test_softmax_all
();
test_sigmoid
();
test_batch_normalize
();
test_batch_normalize_conv
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment