Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
7f77ec65
Commit
7f77ec65
authored
May 22, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Made the batch normalization epsilon user settable rather than being hard coded.
parent
b92b226c
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
146 additions
and
57 deletions
+146
-57
cpu_dlib.cpp
dlib/dnn/cpu_dlib.cpp
+28
-12
cpu_dlib.h
dlib/dnn/cpu_dlib.h
+6
-0
cudnn_dlibapi.cpp
dlib/dnn/cudnn_dlibapi.cpp
+30
-14
cudnn_dlibapi.h
dlib/dnn/cudnn_dlibapi.h
+6
-0
layers.h
dlib/dnn/layers.h
+23
-10
layers_abstract.h
dlib/dnn/layers_abstract.h
+17
-1
tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+18
-12
tensor_tools.h
dlib/dnn/tensor_tools.h
+18
-8
No files found.
dlib/dnn/cpu_dlib.cpp
View file @
7f77ec65
...
...
@@ -531,6 +531,7 @@ namespace dlib
// -----------------------------------------------------------------------------------
void
batch_normalize_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -546,7 +547,8 @@ namespace dlib
gamma
.
k
()
==
src
.
k
()
&&
have_same_dimensions
(
gamma
,
beta
)
&&
have_same_dimensions
(
gamma
,
running_means
)
&&
have_same_dimensions
(
gamma
,
running_variances
),
have_same_dimensions
(
gamma
,
running_variances
)
&&
eps
>
0
,
"
\n
gamma.num_samples(): "
<<
gamma
.
num_samples
()
<<
"
\n
gamma.k(): "
<<
gamma
.
k
()
<<
"
\n
gamma.nr(): "
<<
gamma
.
nr
()
<<
...
...
@@ -565,7 +567,8 @@ namespace dlib
"
\n
running_variances.nc(): "
<<
running_variances
.
nc
()
<<
"
\n
src.k(): "
<<
src
.
k
()
<<
"
\n
src.nr(): "
<<
src
.
nr
()
<<
"
\n
src.nc(): "
<<
src
.
nc
()
"
\n
src.nc(): "
<<
src
.
nc
()
<<
"
\n
eps: "
<<
eps
);
dest
.
copy_size
(
src
);
...
...
@@ -581,7 +584,7 @@ namespace dlib
{
for
(
long
k
=
0
;
k
<
num
;
++
k
)
{
*
d
=
g
[
k
]
*
(
*
s
-
m
[
k
])
/
std
::
sqrt
(
v
[
k
]
+
dlib
::
tt
::
BATCH_NORM_EPS
)
+
b
[
k
];
*
d
=
g
[
k
]
*
(
*
s
-
m
[
k
])
/
std
::
sqrt
(
v
[
k
]
+
eps
)
+
b
[
k
];
++
d
;
++
s
;
}
...
...
@@ -589,6 +592,7 @@ namespace dlib
}
void
batch_normalize
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -609,7 +613,8 @@ namespace dlib
beta
.
num_samples
()
==
1
&&
gamma
.
nr
()
==
beta
.
nr
()
&&
beta
.
nr
()
==
src
.
nr
()
&&
gamma
.
nc
()
==
beta
.
nc
()
&&
beta
.
nc
()
==
src
.
nc
()
&&
gamma
.
k
()
==
beta
.
k
()
&&
beta
.
k
()
==
src
.
k
(),
gamma
.
k
()
==
beta
.
k
()
&&
beta
.
k
()
==
src
.
k
()
&&
eps
>
0
,
"
\n
gamma.num_samples(): "
<<
gamma
.
num_samples
()
<<
"
\n
gamma.k(): "
<<
gamma
.
k
()
<<
"
\n
gamma.nr(): "
<<
gamma
.
nr
()
<<
...
...
@@ -620,7 +625,8 @@ namespace dlib
"
\n
beta.nc(): "
<<
beta
.
nc
()
<<
"
\n
src.k(): "
<<
src
.
k
()
<<
"
\n
src.nr(): "
<<
src
.
nr
()
<<
"
\n
src.nc(): "
<<
src
.
nc
()
"
\n
src.nc(): "
<<
src
.
nc
()
<<
"
\n
eps: "
<<
eps
);
dest
.
copy_size
(
src
);
...
...
@@ -662,7 +668,7 @@ namespace dlib
else
rvar
[
i
]
=
(
1
-
averaging_factor
)
*
rvar
[
i
]
+
scale
*
averaging_factor
*
actual_var
;
p_invstds
[
i
]
=
1.0
f
/
std
::
sqrt
(
actual_var
+
dlib
::
tt
::
BATCH_NORM_EPS
);
p_invstds
[
i
]
=
1.0
f
/
std
::
sqrt
(
actual_var
+
eps
);
}
p_src
=
src
.
host
();
...
...
@@ -689,6 +695,7 @@ namespace dlib
}
void
batch_normalize_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -709,6 +716,7 @@ namespace dlib
DLIB_CASSERT
(
num
==
beta_grad
.
size
(),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
gradient_input
,
src
),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
gradient_input
,
src_grad
),
""
);
DLIB_CASSERT
(
eps
>
0
,
""
);
beta_grad
=
0
;
gamma_grad
=
0
;
...
...
@@ -784,6 +792,7 @@ namespace dlib
// ----------------------------------------------------------------------------------------
void
batch_normalize_conv_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -799,7 +808,8 @@ namespace dlib
gamma
.
k
()
==
src
.
k
()
&&
have_same_dimensions
(
gamma
,
beta
)
&&
have_same_dimensions
(
gamma
,
running_means
)
&&
have_same_dimensions
(
gamma
,
running_variances
),
have_same_dimensions
(
gamma
,
running_variances
)
&&
eps
>
0
,
"
\n
gamma.num_samples(): "
<<
gamma
.
num_samples
()
<<
"
\n
gamma.k(): "
<<
gamma
.
k
()
<<
"
\n
gamma.nr(): "
<<
gamma
.
nr
()
<<
...
...
@@ -818,7 +828,8 @@ namespace dlib
"
\n
running_variances.nc(): "
<<
running_variances
.
nc
()
<<
"
\n
src.k(): "
<<
src
.
k
()
<<
"
\n
src.nr(): "
<<
src
.
nr
()
<<
"
\n
src.nc(): "
<<
src
.
nc
()
"
\n
src.nc(): "
<<
src
.
nc
()
<<
"
\n
eps: "
<<
eps
);
dest
.
copy_size
(
src
);
...
...
@@ -834,7 +845,7 @@ namespace dlib
{
for
(
long
k
=
0
;
k
<
src
.
k
();
++
k
)
{
const
float
invstd
=
1.0
f
/
std
::
sqrt
(
v
[
k
]
+
dlib
::
tt
::
BATCH_NORM_EPS
);
const
float
invstd
=
1.0
f
/
std
::
sqrt
(
v
[
k
]
+
eps
);
for
(
long
j
=
0
;
j
<
num
;
++
j
)
{
*
d
=
g
[
k
]
*
(
*
s
-
m
[
k
])
*
invstd
+
b
[
k
];
...
...
@@ -846,6 +857,7 @@ namespace dlib
}
void
batch_normalize_conv
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -868,7 +880,8 @@ namespace dlib
beta
.
nr
()
==
1
&&
gamma
.
nc
()
==
1
&&
beta
.
nc
()
==
1
&&
gamma
.
k
()
==
beta
.
k
()
&&
beta
.
k
()
==
src
.
k
(),
gamma
.
k
()
==
beta
.
k
()
&&
beta
.
k
()
==
src
.
k
()
&&
eps
>
0
,
"
\n
gamma.num_samples(): "
<<
gamma
.
num_samples
()
<<
"
\n
gamma.k(): "
<<
gamma
.
k
()
<<
"
\n
gamma.nr(): "
<<
gamma
.
nr
()
<<
...
...
@@ -879,7 +892,8 @@ namespace dlib
"
\n
beta.nc(): "
<<
beta
.
nc
()
<<
"
\n
src.k(): "
<<
src
.
k
()
<<
"
\n
src.nr(): "
<<
src
.
nr
()
<<
"
\n
src.nc(): "
<<
src
.
nc
()
"
\n
src.nc(): "
<<
src
.
nc
()
<<
"
\n
eps: "
<<
eps
);
dest
.
copy_size
(
src
);
...
...
@@ -927,7 +941,7 @@ namespace dlib
else
rvar
[
k
]
=
(
1
-
averaging_factor
)
*
rvar
[
k
]
+
scale
*
averaging_factor
*
actual_var
;
p_invstds
[
k
]
=
1.0
f
/
std
::
sqrt
(
actual_var
+
dlib
::
tt
::
BATCH_NORM_EPS
);
p_invstds
[
k
]
=
1.0
f
/
std
::
sqrt
(
actual_var
+
eps
);
}
p_src
=
src
.
host
();
...
...
@@ -955,6 +969,7 @@ namespace dlib
}
void
batch_normalize_conv_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -975,6 +990,7 @@ namespace dlib
DLIB_CASSERT
(
src
.
k
()
==
beta_grad
.
size
(),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
gradient_input
,
src
),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
gradient_input
,
src_grad
),
""
);
DLIB_CASSERT
(
eps
>
0
,
""
);
beta_grad
=
0
;
gamma_grad
=
0
;
...
...
dlib/dnn/cpu_dlib.h
View file @
7f77ec65
...
...
@@ -131,6 +131,7 @@ namespace dlib
// -----------------------------------------------------------------------------------
void
batch_normalize_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -140,6 +141,7 @@ namespace dlib
);
void
batch_normalize
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -152,6 +154,7 @@ namespace dlib
);
void
batch_normalize_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -163,6 +166,7 @@ namespace dlib
);
void
batch_normalize_conv_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -172,6 +176,7 @@ namespace dlib
);
void
batch_normalize_conv
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -184,6 +189,7 @@ namespace dlib
);
void
batch_normalize_conv_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
dlib/dnn/cudnn_dlibapi.cpp
View file @
7f77ec65
...
...
@@ -338,6 +338,7 @@ namespace dlib
// ------------------------------------------------------------------------------------
void
batch_normalize_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -353,7 +354,8 @@ namespace dlib
gamma
.
k
()
==
src
.
k
()
&&
have_same_dimensions
(
gamma
,
beta
)
&&
have_same_dimensions
(
gamma
,
running_means
)
&&
have_same_dimensions
(
gamma
,
running_variances
),
have_same_dimensions
(
gamma
,
running_variances
)
&&
eps
>
0
,
"
\n
gamma.num_samples(): "
<<
gamma
.
num_samples
()
<<
"
\n
gamma.k(): "
<<
gamma
.
k
()
<<
"
\n
gamma.nr(): "
<<
gamma
.
nr
()
<<
...
...
@@ -372,7 +374,8 @@ namespace dlib
"
\n
running_variances.nc(): "
<<
running_variances
.
nc
()
<<
"
\n
src.k(): "
<<
src
.
k
()
<<
"
\n
src.nr(): "
<<
src
.
nr
()
<<
"
\n
src.nc(): "
<<
src
.
nc
()
"
\n
src.nc(): "
<<
src
.
nc
()
<<
"
\n
eps: "
<<
eps
);
const
float
in_scale
=
1
;
const
float
out_scale
=
0
;
...
...
@@ -393,10 +396,11 @@ namespace dlib
beta
.
device
(),
running_means
.
device
(),
running_variances
.
device
(),
dlib
::
tt
::
BATCH_NORM_EPS
));
eps
));
}
void
batch_normalize
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -417,7 +421,8 @@ namespace dlib
beta
.
num_samples
()
==
1
&&
gamma
.
nr
()
==
beta
.
nr
()
&&
beta
.
nr
()
==
src
.
nr
()
&&
gamma
.
nc
()
==
beta
.
nc
()
&&
beta
.
nc
()
==
src
.
nc
()
&&
gamma
.
k
()
==
beta
.
k
()
&&
beta
.
k
()
==
src
.
k
(),
gamma
.
k
()
==
beta
.
k
()
&&
beta
.
k
()
==
src
.
k
()
&&
eps
>
0
,
"
\n
gamma.num_samples(): "
<<
gamma
.
num_samples
()
<<
"
\n
gamma.k(): "
<<
gamma
.
k
()
<<
"
\n
gamma.nr(): "
<<
gamma
.
nr
()
<<
...
...
@@ -428,7 +433,8 @@ namespace dlib
"
\n
beta.nc(): "
<<
beta
.
nc
()
<<
"
\n
src.k(): "
<<
src
.
k
()
<<
"
\n
src.nr(): "
<<
src
.
nr
()
<<
"
\n
src.nc(): "
<<
src
.
nc
()
"
\n
src.nc(): "
<<
src
.
nc
()
<<
"
\n
eps: "
<<
eps
);
const
float
in_scale
=
1
;
...
...
@@ -455,12 +461,13 @@ namespace dlib
averaging_factor
,
running_means
.
device
(),
running_variances
.
device
(),
dlib
::
tt
::
BATCH_NORM_EPS
,
eps
,
means
.
device
(),
invstds
.
device
()));
}
void
batch_normalize_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -480,6 +487,7 @@ namespace dlib
DLIB_CASSERT
(
num
==
beta_grad
.
size
(),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
gradient_input
,
src
),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
gradient_input
,
src_grad
),
""
);
DLIB_CASSERT
(
eps
>
0
,
""
);
const
float
in_scale
=
1
;
const
float
out_scale
=
1
;
...
...
@@ -503,7 +511,7 @@ namespace dlib
gamma
.
device
(),
gamma_grad
.
device
(),
beta_grad
.
device
(),
dlib
::
tt
::
BATCH_NORM_EPS
,
eps
,
means
.
device
(),
invstds
.
device
()));
}
...
...
@@ -511,6 +519,7 @@ namespace dlib
// ------------------------------------------------------------------------------------
void
batch_normalize_conv_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -526,7 +535,8 @@ namespace dlib
gamma
.
k
()
==
src
.
k
()
&&
have_same_dimensions
(
gamma
,
beta
)
&&
have_same_dimensions
(
gamma
,
running_means
)
&&
have_same_dimensions
(
gamma
,
running_variances
),
have_same_dimensions
(
gamma
,
running_variances
)
&&
eps
>
0
,
"
\n
gamma.num_samples(): "
<<
gamma
.
num_samples
()
<<
"
\n
gamma.k(): "
<<
gamma
.
k
()
<<
"
\n
gamma.nr(): "
<<
gamma
.
nr
()
<<
...
...
@@ -545,7 +555,8 @@ namespace dlib
"
\n
running_variances.nc(): "
<<
running_variances
.
nc
()
<<
"
\n
src.k(): "
<<
src
.
k
()
<<
"
\n
src.nr(): "
<<
src
.
nr
()
<<
"
\n
src.nc(): "
<<
src
.
nc
()
"
\n
src.nc(): "
<<
src
.
nc
()
<<
"
\n
eps: "
<<
eps
);
const
float
in_scale
=
1
;
const
float
out_scale
=
0
;
...
...
@@ -566,10 +577,11 @@ namespace dlib
beta
.
device
(),
running_means
.
device
(),
running_variances
.
device
(),
dlib
::
tt
::
BATCH_NORM_EPS
));
eps
));
}
void
batch_normalize_conv
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -592,7 +604,8 @@ namespace dlib
beta
.
nr
()
==
1
&&
gamma
.
nc
()
==
1
&&
beta
.
nc
()
==
1
&&
gamma
.
k
()
==
beta
.
k
()
&&
beta
.
k
()
==
src
.
k
(),
gamma
.
k
()
==
beta
.
k
()
&&
beta
.
k
()
==
src
.
k
()
&&
eps
>
0
,
"
\n
gamma.num_samples(): "
<<
gamma
.
num_samples
()
<<
"
\n
gamma.k(): "
<<
gamma
.
k
()
<<
"
\n
gamma.nr(): "
<<
gamma
.
nr
()
<<
...
...
@@ -603,7 +616,8 @@ namespace dlib
"
\n
beta.nc(): "
<<
beta
.
nc
()
<<
"
\n
src.k(): "
<<
src
.
k
()
<<
"
\n
src.nr(): "
<<
src
.
nr
()
<<
"
\n
src.nc(): "
<<
src
.
nc
()
"
\n
src.nc(): "
<<
src
.
nc
()
<<
"
\n
eps: "
<<
eps
);
const
float
in_scale
=
1
;
const
float
out_scale
=
0
;
...
...
@@ -629,12 +643,13 @@ namespace dlib
averaging_factor
,
running_means
.
device
(),
running_variances
.
device
(),
dlib
::
tt
::
BATCH_NORM_EPS
,
eps
,
means
.
device
(),
invstds
.
device
()));
}
void
batch_normalize_conv_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -653,6 +668,7 @@ namespace dlib
DLIB_CASSERT
(
src
.
k
()
==
beta_grad
.
size
(),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
gradient_input
,
src
),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
gradient_input
,
src_grad
),
""
);
DLIB_CASSERT
(
eps
>
0
,
""
);
const
float
in_scale
=
1
;
const
float
out_scale
=
1
;
...
...
@@ -676,7 +692,7 @@ namespace dlib
gamma
.
device
(),
gamma_grad
.
device
(),
beta_grad
.
device
(),
dlib
::
tt
::
BATCH_NORM_EPS
,
eps
,
means
.
device
(),
invstds
.
device
()));
}
...
...
dlib/dnn/cudnn_dlibapi.h
View file @
7f77ec65
...
...
@@ -135,6 +135,7 @@ namespace dlib
// ------------------------------------------------------------------------------------
void
batch_normalize_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -144,6 +145,7 @@ namespace dlib
);
void
batch_normalize
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -156,6 +158,7 @@ namespace dlib
);
void
batch_normalize_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -169,6 +172,7 @@ namespace dlib
// ------------------------------------------------------------------------------------
void
batch_normalize_conv_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -178,6 +182,7 @@ namespace dlib
);
void
batch_normalize_conv
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -190,6 +195,7 @@ namespace dlib
);
void
batch_normalize_conv_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
dlib/dnn/layers.h
View file @
7f77ec65
...
...
@@ -650,23 +650,30 @@ namespace dlib
FC_MODE
=
1
};
const
double
DEFAULT_BATCH_NORM_EPS
=
0
.
00001
;
template
<
layer_mode
mode
>
class
bn_
{
public
:
explicit
bn_
(
unsigned
long
window_size
)
:
explicit
bn_
(
unsigned
long
window_size
,
double
eps_
=
DEFAULT_BATCH_NORM_EPS
)
:
num_updates
(
0
),
running_stats_window_size
(
window_size
),
learning_rate_multiplier
(
1
),
weight_decay_multiplier
(
0
)
weight_decay_multiplier
(
0
),
eps
(
eps_
)
{}
bn_
()
:
bn_
(
1000
)
{}
layer_mode
get_mode
()
const
{
return
mode
;
}
unsigned
long
get_running_stats_window_size
()
const
{
return
running_stats_window_size
;
}
double
get_eps
()
const
{
return
eps
;
}
double
get_learning_rate_multiplier
()
const
{
return
learning_rate_multiplier
;
}
double
get_weight_decay_multiplier
()
const
{
return
weight_decay_multiplier
;
}
...
...
@@ -713,16 +720,16 @@ namespace dlib
if
(
num_updates
<
running_stats_window_size
)
++
num_updates
;
if
(
mode
==
FC_MODE
)
tt
::
batch_normalize
(
output
,
means
,
invstds
,
decay
,
running_means
,
running_variances
,
sub
.
get_output
(),
g
,
b
);
tt
::
batch_normalize
(
eps
,
output
,
means
,
invstds
,
decay
,
running_means
,
running_variances
,
sub
.
get_output
(),
g
,
b
);
else
tt
::
batch_normalize_conv
(
output
,
means
,
invstds
,
decay
,
running_means
,
running_variances
,
sub
.
get_output
(),
g
,
b
);
tt
::
batch_normalize_conv
(
eps
,
output
,
means
,
invstds
,
decay
,
running_means
,
running_variances
,
sub
.
get_output
(),
g
,
b
);
}
else
// we are running in testing mode so we just linearly scale the input tensor.
{
if
(
mode
==
FC_MODE
)
tt
::
batch_normalize_inference
(
output
,
sub
.
get_output
(),
g
,
b
,
running_means
,
running_variances
);
tt
::
batch_normalize_inference
(
eps
,
output
,
sub
.
get_output
(),
g
,
b
,
running_means
,
running_variances
);
else
tt
::
batch_normalize_conv_inference
(
output
,
sub
.
get_output
(),
g
,
b
,
running_means
,
running_variances
);
tt
::
batch_normalize_conv_inference
(
eps
,
output
,
sub
.
get_output
(),
g
,
b
,
running_means
,
running_variances
);
}
}
...
...
@@ -733,9 +740,9 @@ namespace dlib
auto
g_grad
=
gamma
(
params_grad
,
0
);
auto
b_grad
=
beta
(
params_grad
,
gamma
.
size
());
if
(
mode
==
FC_MODE
)
tt
::
batch_normalize_gradient
(
gradient_input
,
means
,
invstds
,
sub
.
get_output
(),
g
,
sub
.
get_gradient_input
(),
g_grad
,
b_grad
);
tt
::
batch_normalize_gradient
(
eps
,
gradient_input
,
means
,
invstds
,
sub
.
get_output
(),
g
,
sub
.
get_gradient_input
(),
g_grad
,
b_grad
);
else
tt
::
batch_normalize_conv_gradient
(
gradient_input
,
means
,
invstds
,
sub
.
get_output
(),
g
,
sub
.
get_gradient_input
(),
g_grad
,
b_grad
);
tt
::
batch_normalize_conv_gradient
(
eps
,
gradient_input
,
means
,
invstds
,
sub
.
get_output
(),
g
,
sub
.
get_gradient_input
(),
g_grad
,
b_grad
);
}
const
tensor
&
get_layer_params
()
const
{
return
params
;
}
...
...
@@ -758,6 +765,7 @@ namespace dlib
serialize
(
item
.
running_stats_window_size
,
out
);
serialize
(
item
.
learning_rate_multiplier
,
out
);
serialize
(
item
.
weight_decay_multiplier
,
out
);
serialize
(
item
.
eps
,
out
);
}
friend
void
deserialize
(
bn_
&
item
,
std
::
istream
&
in
)
...
...
@@ -798,12 +806,13 @@ namespace dlib
// We also need to flip the running_variances around since the previous
// format saved the inverse standard deviations instead of variances.
item
.
running_variances
=
1
.
0
f
/
squared
(
mat
(
item
.
running_variances
))
-
tt
::
BATCH_NORM_EPS
;
item
.
running_variances
=
1
.
0
f
/
squared
(
mat
(
item
.
running_variances
))
-
DEFAULT_
BATCH_NORM_EPS
;
}
else
if
(
version
==
"bn_con2"
||
version
==
"bn_fc2"
)
{
deserialize
(
item
.
learning_rate_multiplier
,
in
);
deserialize
(
item
.
weight_decay_multiplier
,
in
);
deserialize
(
item
.
eps
,
in
);
}
else
{
...
...
@@ -811,6 +820,8 @@ namespace dlib
// implicitly 1.
item
.
learning_rate_multiplier
=
1
;
item
.
weight_decay_multiplier
=
1
;
item
.
eps
=
DEFAULT_BATCH_NORM_EPS
;
}
}
...
...
@@ -820,6 +831,7 @@ namespace dlib
out
<<
"bn_con "
;
else
out
<<
"bn_fc "
;
out
<<
" eps="
<<
item
.
eps
;
out
<<
" learning_rate_mult="
<<
item
.
learning_rate_multiplier
;
out
<<
" weight_decay_mult="
<<
item
.
weight_decay_multiplier
;
return
out
;
...
...
@@ -837,6 +849,7 @@ namespace dlib
unsigned
long
running_stats_window_size
;
double
learning_rate_multiplier
;
double
weight_decay_multiplier
;
double
eps
;
};
template
<
typename
SUBNET
>
...
...
@@ -1273,7 +1286,7 @@ namespace dlib
auto
sg
=
gamma
(
temp
,
0
);
auto
sb
=
beta
(
temp
,
gamma
.
size
());
g
=
pointwise_multiply
(
mat
(
sg
),
1
.
0
f
/
sqrt
(
mat
(
item
.
running_variances
)
+
tt
::
BATCH_NORM_EPS
));
g
=
pointwise_multiply
(
mat
(
sg
),
1
.
0
f
/
sqrt
(
mat
(
item
.
running_variances
)
+
item
.
get_eps
()
));
b
=
mat
(
sb
)
-
pointwise_multiply
(
mat
(
g
),
mat
(
item
.
running_means
));
}
...
...
dlib/dnn/layers_abstract.h
View file @
7f77ec65
...
...
@@ -818,6 +818,8 @@ namespace dlib
FC_MODE
=
1
// fully connected mode
};
const
double
DEFAULT_BATCH_NORM_EPS
=
0
.
00001
;
template
<
layer_mode
mode
>
...
...
@@ -857,17 +859,22 @@ namespace dlib
- #get_running_stats_window_size() == 1000
- #get_learning_rate_multiplier() == 1
- #get_weight_decay_multiplier() == 0
- #get_eps() == tt::DEFAULT_BATCH_NORM_EPS
!*/
explicit
bn_
(
unsigned
long
window_size
unsigned
long
window_size
,
double
eps
=
tt
::
DEFAULT_BATCH_NORM_EPS
);
/*!
requires
- eps > 0
ensures
- #get_mode() == mode
- #get_running_stats_window_size() == window_size
- #get_learning_rate_multiplier() == 1
- #get_weight_decay_multiplier() == 0
- #get_eps() == eps
!*/
layer_mode
get_mode
(
...
...
@@ -886,6 +893,15 @@ namespace dlib
normalization after a convolutional layer you should use CONV_MODE.
!*/
double
get_eps
(
)
const
;
/*!
ensures
- When doing batch normalization, we are dividing by the standard
deviation. This epsilon value returned by this function is added to the
variance to prevent the division from dividing by zero.
!*/
unsigned
long
get_running_stats_window_size
(
)
const
;
/*!
...
...
dlib/dnn/tensor_tools.cpp
View file @
7f77ec65
...
...
@@ -337,6 +337,7 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void
batch_normalize_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -346,13 +347,14 @@ namespace dlib { namespace tt
)
{
#ifdef DLIB_USE_CUDA
cuda
::
batch_normalize_inference
(
dest
,
src
,
gamma
,
beta
,
running_means
,
running_variances
);
cuda
::
batch_normalize_inference
(
eps
,
dest
,
src
,
gamma
,
beta
,
running_means
,
running_variances
);
#else
cpu
::
batch_normalize_inference
(
dest
,
src
,
gamma
,
beta
,
running_means
,
running_variances
);
cpu
::
batch_normalize_inference
(
eps
,
dest
,
src
,
gamma
,
beta
,
running_means
,
running_variances
);
#endif
}
void
batch_normalize
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
vars
,
...
...
@@ -365,13 +367,14 @@ namespace dlib { namespace tt
)
{
#ifdef DLIB_USE_CUDA
cuda
::
batch_normalize
(
dest
,
means
,
vars
,
averaging_factor
,
running_means
,
running_variances
,
src
,
gamma
,
beta
);
cuda
::
batch_normalize
(
eps
,
dest
,
means
,
vars
,
averaging_factor
,
running_means
,
running_variances
,
src
,
gamma
,
beta
);
#else
cpu
::
batch_normalize
(
dest
,
means
,
vars
,
averaging_factor
,
running_means
,
running_variances
,
src
,
gamma
,
beta
);
cpu
::
batch_normalize
(
eps
,
dest
,
means
,
vars
,
averaging_factor
,
running_means
,
running_variances
,
src
,
gamma
,
beta
);
#endif
}
void
batch_normalize_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -384,15 +387,16 @@ namespace dlib { namespace tt
{
#ifdef DLIB_USE_CUDA
cuda
::
batch_normalize_gradient
(
gradient_input
,
means
,
invstds
,
src
,
gamma
,
src_grad
,
gamma_grad
,
beta_grad
);
cuda
::
batch_normalize_gradient
(
eps
,
gradient_input
,
means
,
invstds
,
src
,
gamma
,
src_grad
,
gamma_grad
,
beta_grad
);
#else
cpu
::
batch_normalize_gradient
(
gradient_input
,
means
,
invstds
,
src
,
gamma
,
src_grad
,
gamma_grad
,
beta_grad
);
cpu
::
batch_normalize_gradient
(
eps
,
gradient_input
,
means
,
invstds
,
src
,
gamma
,
src_grad
,
gamma_grad
,
beta_grad
);
#endif
}
// ----------------------------------------------------------------------------------------
void
batch_normalize_conv_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -402,13 +406,14 @@ namespace dlib { namespace tt
)
{
#ifdef DLIB_USE_CUDA
cuda
::
batch_normalize_conv_inference
(
dest
,
src
,
gamma
,
beta
,
running_means
,
running_variances
);
cuda
::
batch_normalize_conv_inference
(
eps
,
dest
,
src
,
gamma
,
beta
,
running_means
,
running_variances
);
#else
cpu
::
batch_normalize_conv_inference
(
dest
,
src
,
gamma
,
beta
,
running_means
,
running_variances
);
cpu
::
batch_normalize_conv_inference
(
eps
,
dest
,
src
,
gamma
,
beta
,
running_means
,
running_variances
);
#endif
}
void
batch_normalize_conv
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
vars
,
...
...
@@ -421,13 +426,14 @@ namespace dlib { namespace tt
)
{
#ifdef DLIB_USE_CUDA
cuda
::
batch_normalize_conv
(
dest
,
means
,
vars
,
averaging_factor
,
running_means
,
running_variances
,
src
,
gamma
,
beta
);
cuda
::
batch_normalize_conv
(
eps
,
dest
,
means
,
vars
,
averaging_factor
,
running_means
,
running_variances
,
src
,
gamma
,
beta
);
#else
cpu
::
batch_normalize_conv
(
dest
,
means
,
vars
,
averaging_factor
,
running_means
,
running_variances
,
src
,
gamma
,
beta
);
cpu
::
batch_normalize_conv
(
eps
,
dest
,
means
,
vars
,
averaging_factor
,
running_means
,
running_variances
,
src
,
gamma
,
beta
);
#endif
}
void
batch_normalize_conv_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -440,9 +446,9 @@ namespace dlib { namespace tt
{
#ifdef DLIB_USE_CUDA
cuda
::
batch_normalize_conv_gradient
(
gradient_input
,
means
,
invstds
,
src
,
gamma
,
src_grad
,
gamma_grad
,
beta_grad
);
cuda
::
batch_normalize_conv_gradient
(
eps
,
gradient_input
,
means
,
invstds
,
src
,
gamma
,
src_grad
,
gamma_grad
,
beta_grad
);
#else
cpu
::
batch_normalize_conv_gradient
(
gradient_input
,
means
,
invstds
,
src
,
gamma
,
src_grad
,
gamma_grad
,
beta_grad
);
cpu
::
batch_normalize_conv_gradient
(
eps
,
gradient_input
,
means
,
invstds
,
src
,
gamma
,
src_grad
,
gamma_grad
,
beta_grad
);
#endif
}
...
...
dlib/dnn/tensor_tools.h
View file @
7f77ec65
...
...
@@ -370,9 +370,8 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
const
double
BATCH_NORM_EPS
=
0
.
00001
;
void
batch_normalize_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -382,6 +381,7 @@ namespace dlib { namespace tt
);
/*!
requires
- eps > 0
- gamma.num_samples() == 1
- gamma.nr() == src.nr()
- gamma.nc() == src.nc()
...
...
@@ -393,11 +393,12 @@ namespace dlib { namespace tt
- Linearly transforms src as a call to batch_normalize() would if src had means
and variances as given by running_means and running_variances. That is, this
function performs:
dest = gamma*(src-running_means)/sqrt(running_variances+
BATCH_NORM_EPS
) + beta
dest = gamma*(src-running_means)/sqrt(running_variances+
eps
) + beta
Note that it does it in a pointwise fashion over the samples in src.
!*/
void
batch_normalize
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -410,6 +411,7 @@ namespace dlib { namespace tt
);
/*!
requires
- eps > 0
- src.num_samples() > 1
- gamma.num_samples() == 1
- beta.num_samples() == 1
...
...
@@ -435,6 +437,7 @@ namespace dlib { namespace tt
!*/
void
batch_normalize_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -446,8 +449,9 @@ namespace dlib { namespace tt
);
/*!
requires
- eps > 0
- invstds and means should be the output of a call to
batch_normalize(dest,means,invstds,src,gamma,beta)
batch_normalize(
eps,
dest,means,invstds,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1
...
...
@@ -461,7 +465,7 @@ namespace dlib { namespace tt
- have_same_dimensions(invstds, gamma) == true
ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize(dest,means,invstds,src,gamma,beta))
batch_normalize(
eps,
dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad.
- Assigns the gradient of f() with respect to gamma to #gamma_grad.
- Assigns the gradient of f() with respect to beta to #beta_grad.
...
...
@@ -470,6 +474,7 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void
batch_normalize_conv_inference
(
const
double
eps
,
resizable_tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
gamma
,
...
...
@@ -479,6 +484,7 @@ namespace dlib { namespace tt
);
/*!
requires
- eps > 0
- gamma.num_samples() == 1
- gamma.nr() == 1
- gamma.nc() == 1
...
...
@@ -490,12 +496,13 @@ namespace dlib { namespace tt
- Linearly transforms src as a call to batch_normalize_conv() would if src had
means and variances as given by running_means and running_variances. That
is, this function performs:
dest = gamma*(src-running_means)/sqrt(running_variances+
BATCH_NORM_EPS
) + beta
dest = gamma*(src-running_means)/sqrt(running_variances+
eps
) + beta
Note that it does this in a pointwise fashion over the samples, rows, and
columns in src.
!*/
void
batch_normalize_conv
(
const
double
eps
,
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
invstds
,
...
...
@@ -508,6 +515,7 @@ namespace dlib { namespace tt
);
/*!
requires
- eps > 0
- src.num_samples() > 1
- gamma.num_samples()==gamma.nr()==gamma.nc() == 1
- beta.num_samples() ==beta.nr() ==gamma.nc() == 1
...
...
@@ -529,6 +537,7 @@ namespace dlib { namespace tt
!*/
void
batch_normalize_conv_gradient
(
const
double
eps
,
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
invstds
,
...
...
@@ -540,8 +549,9 @@ namespace dlib { namespace tt
);
/*!
requires
- eps > 0
- invstds and means should be the output of a call to
batch_normalize_conv(dest,means,invstds,src,gamma,beta)
batch_normalize_conv(
eps,
dest,means,invstds,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1
...
...
@@ -553,7 +563,7 @@ namespace dlib { namespace tt
- have_same_dimensions(invstds, gamma) == true
ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize_conv(dest,means,invstds,src,gamma,beta))
batch_normalize_conv(
eps,
dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad.
- Assigns the gradient of f() with respect to gamma to #gamma_grad.
- Assigns the gradient of f() with respect to beta to #beta_grad.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment