Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
32125dea
Commit
32125dea
authored
Nov 16, 2015
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimized batch normalization code
parent
273a21cf
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
84 additions
and
86 deletions
+84
-86
cpu_dlib.cpp
dlib/dnn/cpu_dlib.cpp
+40
-42
cpu_dlib.h
dlib/dnn/cpu_dlib.h
+4
-4
cuda_dlib.h
dlib/dnn/cuda_dlib.h
+20
-20
tensor_tools.h
dlib/dnn/tensor_tools.h
+20
-20
No files found.
dlib/dnn/cpu_dlib.cpp
View file @
32125dea
...
...
@@ -87,7 +87,7 @@ namespace dlib
void
batch_normalize
(
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
var
s
,
resizable_tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
const
tensor
&
beta
...
...
@@ -115,12 +115,12 @@ namespace dlib
dest
.
copy_size
(
src
);
means
.
set_size
(
1
,
src
.
k
(),
src
.
nr
(),
src
.
nc
());
var
s
.
set_size
(
1
,
src
.
k
(),
src
.
nr
(),
src
.
nc
());
invstd
s
.
set_size
(
1
,
src
.
k
(),
src
.
nr
(),
src
.
nc
());
// first compute means and
var
s
// first compute means and
invstd
s
means
=
0
;
var
s
=
0
;
const
auto
p_
vars
=
var
s
.
host
();
invstd
s
=
0
;
const
auto
p_
invstds
=
invstd
s
.
host
();
const
auto
p_means
=
means
.
host
();
auto
p_src
=
src
.
host
();
const
long
num
=
src
.
k
()
*
src
.
nr
()
*
src
.
nc
();
...
...
@@ -131,23 +131,23 @@ namespace dlib
{
float
val
=
p_src
[
n
*
num
+
i
];
p_means
[
i
]
+=
val
;
p_
var
s
[
i
]
+=
val
*
val
;
p_
invstd
s
[
i
]
+=
val
*
val
;
}
}
means
/=
src
.
num_samples
();
var
s
/=
src
.
num_samples
();
invstd
s
/=
src
.
num_samples
();
// copy data back to host
var
s
.
host
();
means
.
host
();
invstd
s
.
host
();
means
.
host
();
const
float
eps
=
0.00001
;
p_src
=
src
.
host
();
// compute variances
for
(
long
i
=
0
;
i
<
num
;
++
i
)
{
p_vars
[
i
]
=
p_vars
[
i
]
-
p_means
[
i
]
*
p_means
[
i
];
auto
actual_var
=
p_invstds
[
i
]
-
p_means
[
i
]
*
p_means
[
i
];
p_invstds
[
i
]
=
1.0
/
std
::
sqrt
(
actual_var
+
eps
);
}
// TODO, must match eps in batch_normalize_gradient() so make this a shared variable.
const
float
eps
=
0.00001
;
p_src
=
src
.
host
();
auto
p_dest
=
dest
.
host
();
const
auto
p_gamma
=
gamma
.
host
();
...
...
@@ -156,7 +156,7 @@ namespace dlib
{
for
(
long
i
=
0
;
i
<
num
;
++
i
)
{
*
p_dest
=
(
*
p_src
-
p_means
[
i
])
/
std
::
sqrt
(
p_vars
[
i
]
+
eps
)
;
*
p_dest
=
(
*
p_src
-
p_means
[
i
])
*
p_invstds
[
i
]
;
*
p_dest
=
(
*
p_dest
)
*
p_gamma
[
i
]
+
p_beta
[
i
];
++
p_src
;
++
p_dest
;
...
...
@@ -167,7 +167,7 @@ namespace dlib
void
batch_normalize_gradient
(
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
var
s
,
const
tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
tensor
&
src_grad
,
...
...
@@ -175,11 +175,10 @@ namespace dlib
tensor
&
beta_grad
)
{
const
float
eps
=
0.00001
;
const
long
num
=
src
.
k
()
*
src
.
nr
()
*
src
.
nc
();
DLIB_CASSERT
(
num
==
means
.
size
(),
""
);
DLIB_CASSERT
(
num
==
var
s
.
size
(),
""
);
DLIB_CASSERT
(
num
==
invstd
s
.
size
(),
""
);
DLIB_CASSERT
(
num
==
gamma
.
size
(),
""
);
DLIB_CASSERT
(
num
==
gamma_grad
.
size
(),
""
);
DLIB_CASSERT
(
num
==
beta_grad
.
size
(),
""
);
...
...
@@ -190,11 +189,11 @@ namespace dlib
const
auto
p_gamma
=
gamma
.
host
();
const
auto
p_gamma_grad
=
gamma_grad
.
host
();
const
auto
p_beta_grad
=
beta_grad
.
host
();
const
auto
p_
vars
=
var
s
.
host
();
const
auto
p_
invstds
=
invstd
s
.
host
();
const
auto
p_means
=
means
.
host
();
resizable_tensor
dvars
,
dmeans
;
dvars
.
copy_size
(
var
s
);
dvars
.
copy_size
(
invstd
s
);
dmeans
.
copy_size
(
means
);
dvars
=
0
;
dmeans
=
0
;
...
...
@@ -205,13 +204,13 @@ namespace dlib
{
for
(
long
i
=
0
;
i
<
num
;
++
i
)
{
const
float
x_hat
=
(
*
p_src
-
p_means
[
i
])
/
std
::
sqrt
(
p_vars
[
i
]
+
eps
)
;
const
float
x_hat
=
(
*
p_src
-
p_means
[
i
])
*
p_invstds
[
i
]
;
p_beta_grad
[
i
]
+=
*
p_grad
;
p_gamma_grad
[
i
]
+=
(
*
p_grad
)
*
x_hat
;
const
float
dx
=
*
p_grad
*
p_gamma
[
i
];
p_dvars
[
i
]
+=
dx
*
(
*
p_src
-
p_means
[
i
])
*
-
0.5
*
std
::
pow
(
p_
vars
[
i
]
+
eps
,
-
3.0
f
/
2
);
p_dvars
[
i
]
+=
dx
*
(
*
p_src
-
p_means
[
i
])
*
-
0.5
*
std
::
pow
(
p_
invstds
[
i
],
3.0
f
);
++
p_grad
;
++
p_src
;
...
...
@@ -226,7 +225,7 @@ namespace dlib
{
const
float
dx
=
*
p_grad
*
p_gamma
[
i
];
p_dmeans
[
i
]
+=
dx
*-
1
/
std
::
sqrt
(
p_vars
[
i
]
+
eps
)
+
p_dvars
[
i
]
*
-
2
*
(
*
p_src
-
p_means
[
i
])
/
src
.
num_samples
();
p_dmeans
[
i
]
+=
dx
*-
p_invstds
[
i
]
+
p_dvars
[
i
]
*
-
2
*
(
*
p_src
-
p_means
[
i
])
/
src
.
num_samples
();
++
p_grad
;
++
p_src
;
...
...
@@ -241,7 +240,7 @@ namespace dlib
{
const
float
dx
=
*
p_grad
*
p_gamma
[
i
];
*
p_src_grad
+=
dx
/
std
::
sqrt
(
p_vars
[
i
]
+
eps
)
+
*
p_src_grad
+=
dx
*
p_invstds
[
i
]
+
p_dvars
[
i
]
*
2
*
(
*
p_src
-
p_means
[
i
])
/
src
.
num_samples
()
+
p_dmeans
[
i
]
/
src
.
num_samples
();
...
...
@@ -258,7 +257,7 @@ namespace dlib
void
batch_normalize_conv
(
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
var
s
,
resizable_tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
const
tensor
&
beta
...
...
@@ -288,12 +287,12 @@ namespace dlib
dest
.
copy_size
(
src
);
means
.
set_size
(
1
,
src
.
k
());
var
s
.
set_size
(
1
,
src
.
k
());
invstd
s
.
set_size
(
1
,
src
.
k
());
// first compute means and
var
s
// first compute means and
invstd
s
means
=
0
;
var
s
=
0
;
const
auto
p_
vars
=
var
s
.
host
();
invstd
s
=
0
;
const
auto
p_
invstds
=
invstd
s
.
host
();
const
auto
p_means
=
means
.
host
();
const
auto
p_gamma
=
gamma
.
host
();
const
auto
p_beta
=
beta
.
host
();
...
...
@@ -307,25 +306,25 @@ namespace dlib
for
(
long
i
=
0
;
i
<
num
;
++
i
)
{
p_means
[
k
]
+=
*
p_src
;
p_
var
s
[
k
]
+=
(
*
p_src
)
*
(
*
p_src
);
p_
invstd
s
[
k
]
+=
(
*
p_src
)
*
(
*
p_src
);
++
p_src
;
}
}
}
means
/=
src
.
num_samples
()
*
num
;
var
s
/=
src
.
num_samples
()
*
num
;
invstd
s
/=
src
.
num_samples
()
*
num
;
// copy data back to host
var
s
.
host
();
means
.
host
();
invstd
s
.
host
();
means
.
host
();
const
float
eps
=
0.00001
;
p_src
=
src
.
host
();
// compute variances
for
(
long
k
=
0
;
k
<
src
.
k
();
++
k
)
{
p_vars
[
k
]
=
p_vars
[
k
]
-
p_means
[
k
]
*
p_means
[
k
];
auto
actual_var
=
p_invstds
[
k
]
-
p_means
[
k
]
*
p_means
[
k
];
p_invstds
[
k
]
=
1.0
/
std
::
sqrt
(
actual_var
+
eps
);
}
// TODO, must match eps in batch_normalize_gradient() so make this a shared variable.
const
float
eps
=
0.00001
;
p_src
=
src
.
host
();
auto
p_dest
=
dest
.
host
();
for
(
long
n
=
0
;
n
<
src
.
num_samples
();
++
n
)
...
...
@@ -334,7 +333,7 @@ namespace dlib
{
for
(
long
i
=
0
;
i
<
num
;
++
i
)
{
*
p_dest
=
(
*
p_src
-
p_means
[
k
])
/
std
::
sqrt
(
p_vars
[
k
]
+
eps
)
;
*
p_dest
=
(
*
p_src
-
p_means
[
k
])
*
p_invstds
[
k
]
;
*
p_dest
=
(
*
p_dest
)
*
p_gamma
[
k
]
+
p_beta
[
k
];
++
p_src
;
++
p_dest
;
...
...
@@ -346,7 +345,7 @@ namespace dlib
void
batch_normalize_conv_gradient
(
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
var
s
,
const
tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
tensor
&
src_grad
,
...
...
@@ -354,11 +353,10 @@ namespace dlib
tensor
&
beta_grad
)
{
const
float
eps
=
0.00001
;
const
long
num
=
src
.
nr
()
*
src
.
nc
();
DLIB_CASSERT
(
src
.
k
()
==
means
.
size
(),
""
);
DLIB_CASSERT
(
src
.
k
()
==
var
s
.
size
(),
""
);
DLIB_CASSERT
(
src
.
k
()
==
invstd
s
.
size
(),
""
);
DLIB_CASSERT
(
src
.
k
()
==
gamma
.
size
(),
""
);
DLIB_CASSERT
(
src
.
k
()
==
gamma_grad
.
size
(),
""
);
DLIB_CASSERT
(
src
.
k
()
==
beta_grad
.
size
(),
""
);
...
...
@@ -369,11 +367,11 @@ namespace dlib
const
auto
p_gamma
=
gamma
.
host
();
const
auto
p_gamma_grad
=
gamma_grad
.
host
();
const
auto
p_beta_grad
=
beta_grad
.
host
();
const
auto
p_
vars
=
var
s
.
host
();
const
auto
p_
invstds
=
invstd
s
.
host
();
const
auto
p_means
=
means
.
host
();
resizable_tensor
dvars
,
dmeans
;
dvars
.
copy_size
(
var
s
);
dvars
.
copy_size
(
invstd
s
);
dmeans
.
copy_size
(
means
);
dvars
=
0
;
dmeans
=
0
;
...
...
@@ -386,13 +384,13 @@ namespace dlib
{
for
(
long
i
=
0
;
i
<
num
;
++
i
)
{
const
float
x_hat
=
(
*
p_src
-
p_means
[
k
])
/
std
::
sqrt
(
p_vars
[
k
]
+
eps
)
;
const
float
x_hat
=
(
*
p_src
-
p_means
[
k
])
*
p_invstds
[
k
]
;
p_beta_grad
[
k
]
+=
*
p_grad
;
p_gamma_grad
[
k
]
+=
(
*
p_grad
)
*
x_hat
;
const
float
dx
=
*
p_grad
*
p_gamma
[
k
];
p_dvars
[
k
]
+=
dx
*
(
*
p_src
-
p_means
[
k
])
*
-
0.5
*
std
::
pow
(
p_
vars
[
k
]
+
eps
,
-
3.0
f
/
2
);
p_dvars
[
k
]
+=
dx
*
(
*
p_src
-
p_means
[
k
])
*
-
0.5
*
std
::
pow
(
p_
invstds
[
k
],
3.0
f
);
++
p_grad
;
++
p_src
;
...
...
@@ -410,7 +408,7 @@ namespace dlib
{
const
float
dx
=
*
p_grad
*
p_gamma
[
k
];
p_dmeans
[
k
]
+=
dx
*-
1
/
std
::
sqrt
(
p_vars
[
k
]
+
eps
)
+
p_dvars
[
k
]
*
-
2
*
(
*
p_src
-
p_means
[
k
])
/
src
.
num_samples
()
/
num
;
p_dmeans
[
k
]
+=
-
dx
*
p_invstds
[
k
]
+
p_dvars
[
k
]
*
-
2
*
(
*
p_src
-
p_means
[
k
])
/
src
.
num_samples
()
/
num
;
++
p_grad
;
++
p_src
;
...
...
@@ -428,7 +426,7 @@ namespace dlib
{
const
float
dx
=
*
p_grad
*
p_gamma
[
k
];
*
p_src_grad
+=
dx
/
std
::
sqrt
(
p_vars
[
k
]
+
eps
)
+
*
p_src_grad
+=
dx
*
p_invstds
[
k
]
+
p_dvars
[
k
]
*
2
*
(
*
p_src
-
p_means
[
k
])
/
src
.
num_samples
()
/
num
+
p_dmeans
[
k
]
/
src
.
num_samples
()
/
num
;
...
...
dlib/dnn/cpu_dlib.h
View file @
32125dea
...
...
@@ -43,7 +43,7 @@ namespace dlib
void
batch_normalize
(
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
var
s
,
resizable_tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
const
tensor
&
beta
...
...
@@ -52,7 +52,7 @@ namespace dlib
void
batch_normalize_gradient
(
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
var
s
,
const
tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
tensor
&
src_grad
,
...
...
@@ -63,7 +63,7 @@ namespace dlib
void
batch_normalize_conv
(
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
var
s
,
resizable_tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
const
tensor
&
beta
...
...
@@ -72,7 +72,7 @@ namespace dlib
void
batch_normalize_conv_gradient
(
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
var
s
,
const
tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
tensor
&
src_grad
,
...
...
dlib/dnn/cuda_dlib.h
View file @
32125dea
...
...
@@ -74,7 +74,7 @@ namespace dlib
void
batch_normalize
(
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
var
s
,
resizable_tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
const
tensor
&
beta
...
...
@@ -90,19 +90,19 @@ namespace dlib
ensures
- have_same_dimensions(#dest, src) == true
- #means.num_samples() == 1
- #
var
s.num_samples() == 1
- means.nr() ==
var
s.nr() == src.nr()
- means.nc() ==
var
s.nc() == src.nc()
- means.k() ==
var
s.k() == src.k()
- #
invstd
s.num_samples() == 1
- means.nr() ==
invstd
s.nr() == src.nr()
- means.nc() ==
invstd
s.nc() == src.nc()
- means.k() ==
invstd
s.k() == src.k()
- #src == the batch normalized version of src.
- #means == the mean values of the contents of src.
- #
vars == the variance values of the contents of src
.
- #
invstds == 1/(the standard deviation values of the contents of src)
.
!*/
void
batch_normalize_gradient
(
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
var
s
,
const
tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
tensor
&
src_grad
,
...
...
@@ -111,8 +111,8 @@ namespace dlib
);
/*!
requires
-
var
s and means should be the output of a call to
batch_normalize(dest,means,
var
s,src,gamma,beta)
-
invstd
s and means should be the output of a call to
batch_normalize(dest,means,
invstd
s,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1
...
...
@@ -123,10 +123,10 @@ namespace dlib
- gamma.nc() == src.nc()
- gamma.k() == src.k()
- have_same_dimensions(means, gamma) == true
- have_same_dimensions(
var
s, gamma) == true
- have_same_dimensions(
invstd
s, gamma) == true
ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize(dest,means,
var
s,src,gamma,beta))
batch_normalize(dest,means,
invstd
s,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad.
...
...
@@ -135,7 +135,7 @@ namespace dlib
void
batch_normalize_conv
(
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
var
s
,
resizable_tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
const
tensor
&
beta
...
...
@@ -149,17 +149,17 @@ namespace dlib
ensures
- have_same_dimensions(#dest, src) == true
- #means.num_samples()==means.nr()==means.nc() == 1
- #
vars.num_samples() ==vars.nr() ==var
s.nc() == 1
- means.k() ==
var
s.k() == src.k()
- #
invstds.num_samples() ==invstds.nr() ==invstd
s.nc() == 1
- means.k() ==
invstd
s.k() == src.k()
- #src == the batch normalized version of src.
- #means == the mean values of the contents of src.
- #
vars == the variance values of the contents of src
.
- #
invstds == 1/(the standard deviation values of the contents of src)
.
!*/
void
batch_normalize_conv_gradient
(
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
var
s
,
const
tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
tensor
&
src_grad
,
...
...
@@ -168,8 +168,8 @@ namespace dlib
);
/*!
requires
-
var
s and means should be the output of a call to
batch_normalize_conv(dest,means,
var
s,src,gamma,beta)
-
invstd
s and means should be the output of a call to
batch_normalize_conv(dest,means,
invstd
s,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1
...
...
@@ -178,10 +178,10 @@ namespace dlib
- have_same_dimensions(gamma, beta_grad) == true
- gamma.k() == src.k()
- have_same_dimensions(means, gamma) == true
- have_same_dimensions(
var
s, gamma) == true
- have_same_dimensions(
invstd
s, gamma) == true
ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize_conv(dest,means,
var
s,src,gamma,beta))
batch_normalize_conv(dest,means,
invstd
s,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad.
...
...
dlib/dnn/tensor_tools.h
View file @
32125dea
...
...
@@ -150,7 +150,7 @@ namespace dlib { namespace tt
void
batch_normalize
(
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
var
s
,
resizable_tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
const
tensor
&
beta
...
...
@@ -166,13 +166,13 @@ namespace dlib { namespace tt
ensures
- have_same_dimensions(#dest, src) == true
- #means.num_samples() == 1
- #
var
s.num_samples() == 1
- means.nr() ==
var
s.nr() == src.nr()
- means.nc() ==
var
s.nc() == src.nc()
- means.k() ==
var
s.k() == src.k()
- #
invstd
s.num_samples() == 1
- means.nr() ==
invstd
s.nr() == src.nr()
- means.nc() ==
invstd
s.nc() == src.nc()
- means.k() ==
invstd
s.k() == src.k()
- #src == the batch normalized version of src.
- #means == the mean values of the contents of src.
- #
vars == the variance values of the contents of src
.
- #
invstds == 1/(the standard deviation values of the contents of src)
.
!*/
// ----------------------------------------------------------------------------------------
...
...
@@ -180,7 +180,7 @@ namespace dlib { namespace tt
void
batch_normalize_gradient
(
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
var
s
,
const
tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
tensor
&
src_grad
,
...
...
@@ -189,8 +189,8 @@ namespace dlib { namespace tt
);
/*!
requires
-
var
s and means should be the output of a call to
batch_normalize(dest,means,
var
s,src,gamma,beta)
-
invstd
s and means should be the output of a call to
batch_normalize(dest,means,
invstd
s,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1
...
...
@@ -201,10 +201,10 @@ namespace dlib { namespace tt
- gamma.nc() == src.nc()
- gamma.k() == src.k()
- have_same_dimensions(means, gamma) == true
- have_same_dimensions(
var
s, gamma) == true
- have_same_dimensions(
invstd
s, gamma) == true
ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize(dest,means,
var
s,src,gamma,beta))
batch_normalize(dest,means,
invstd
s,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad.
...
...
@@ -213,7 +213,7 @@ namespace dlib { namespace tt
void
batch_normalize_conv
(
resizable_tensor
&
dest
,
resizable_tensor
&
means
,
resizable_tensor
&
var
s
,
resizable_tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
const
tensor
&
beta
...
...
@@ -227,17 +227,17 @@ namespace dlib { namespace tt
ensures
- have_same_dimensions(#dest, src) == true
- #means.num_samples()==means.nr()==means.nc() == 1
- #
vars.num_samples() ==vars.nr() ==var
s.nc() == 1
- means.k() ==
var
s.k() == src.k()
- #
invstds.num_samples() ==invstds.nr() ==invstd
s.nc() == 1
- means.k() ==
invstd
s.k() == src.k()
- #src == the batch normalized version of src.
- #means == the mean values of the contents of src.
- #
vars == the variance values of the contents of src
.
- #
invstds == 1/(the standard deviation values of the contents of src)
.
!*/
void
batch_normalize_conv_gradient
(
const
tensor
&
gradient_input
,
const
tensor
&
means
,
const
tensor
&
var
s
,
const
tensor
&
invstd
s
,
const
tensor
&
src
,
const
tensor
&
gamma
,
tensor
&
src_grad
,
...
...
@@ -246,8 +246,8 @@ namespace dlib { namespace tt
);
/*!
requires
-
var
s and means should be the output of a call to
batch_normalize_conv(dest,means,
var
s,src,gamma,beta)
-
invstd
s and means should be the output of a call to
batch_normalize_conv(dest,means,
invstd
s,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1
...
...
@@ -256,10 +256,10 @@ namespace dlib { namespace tt
- have_same_dimensions(gamma, beta_grad) == true
- gamma.k() == src.k()
- have_same_dimensions(means, gamma) == true
- have_same_dimensions(
var
s, gamma) == true
- have_same_dimensions(
invstd
s, gamma) == true
ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize_conv(dest,means,
var
s,src,gamma,beta))
batch_normalize_conv(dest,means,
invstd
s,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment