Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
669a1e17
Commit
669a1e17
authored
Jan 23, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added affine_transform_conv() and multiply_conv() as well as a CPU
implementation of assign_conv_bias_gradient().
parent
e44b2aa2
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
328 additions
and
3 deletions
+328
-3
cpu_dlib.cpp
dlib/dnn/cpu_dlib.cpp
+124
-1
cpu_dlib.h
dlib/dnn/cpu_dlib.h
+20
-0
cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+88
-0
cuda_dlib.h
dlib/dnn/cuda_dlib.h
+15
-0
tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+30
-2
tensor_tools.h
dlib/dnn/tensor_tools.h
+51
-0
No files found.
dlib/dnn/cpu_dlib.cpp
View file @
669a1e17
...
...
@@ -54,6 +54,57 @@ namespace dlib
}
}
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
)
{
auto
d
=
dest
.
host
();
auto
s1
=
src1
.
host
();
auto
s2
=
src2
.
host
();
if
(
have_same_dimensions
(
dest
,
src1
))
{
DLIB_CASSERT
(
src2
.
num_samples
()
==
1
&&
src2
.
nr
()
==
1
&&
src2
.
nc
()
==
1
&&
src2
.
k
()
==
src1
.
k
(),
""
);
for
(
long
n
=
0
;
n
<
dest
.
num_samples
();
++
n
)
{
for
(
long
k
=
0
;
k
<
dest
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
dest
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
dest
.
nc
();
++
c
)
{
*
d
++
=
(
*
s1
++
)
*
s2
[
k
];
}
}
}
}
}
else
{
DLIB_CASSERT
(
have_same_dimensions
(
src1
,
src2
),
""
);
DLIB_CASSERT
(
dest
.
num_samples
()
==
1
&&
dest
.
nr
()
==
1
&&
dest
.
nc
()
==
1
&&
dest
.
k
()
==
src1
.
k
(),
""
);
for
(
long
k
=
0
;
k
<
src1
.
k
();
++
k
)
d
[
k
]
=
0
;
for
(
long
n
=
0
;
n
<
src1
.
num_samples
();
++
n
)
{
for
(
long
k
=
0
;
k
<
src1
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
src1
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
src1
.
nc
();
++
c
)
{
d
[
k
]
+=
(
*
s1
++
)
*
(
*
s2
++
);
}
}
}
}
}
}
void
add
(
float
beta
,
tensor
&
dest
,
...
...
@@ -196,6 +247,44 @@ namespace dlib
}
}
// ------------------------------------------------------------------------------------
void
assign_conv_bias_gradient
(
tensor
&
grad
,
const
tensor
&
gradient_input
)
{
DLIB_CASSERT
(
grad
.
num_samples
()
==
1
&&
grad
.
k
()
>=
1
&&
grad
.
nr
()
==
1
&&
grad
.
nc
()
==
1
&&
gradient_input
.
k
()
==
grad
.
k
()
&&
gradient_input
.
size
()
>
0
&&
is_same_object
(
grad
,
gradient_input
)
==
false
,
""
);
auto
g
=
grad
.
host
();
auto
gi
=
gradient_input
.
host
();
for
(
long
k
=
0
;
k
<
gradient_input
.
k
();
++
k
)
g
[
k
]
=
0
;
for
(
long
n
=
0
;
n
<
gradient_input
.
num_samples
();
++
n
)
{
for
(
long
k
=
0
;
k
<
gradient_input
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
gradient_input
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
gradient_input
.
nc
();
++
c
)
{
g
[
k
]
+=
(
*
gi
++
);
}
}
}
}
}
// -----------------------------------------------------------------------------------
void
affine_transform
(
...
...
@@ -293,6 +382,41 @@ namespace dlib
}
}
// -----------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
)
{
DLIB_CASSERT
(
have_same_dimensions
(
dest
,
src
),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
A
,
B
),
""
);
DLIB_CASSERT
(
A
.
num_samples
()
==
1
&&
A
.
nr
()
==
1
&&
A
.
nc
()
==
1
&&
A
.
k
()
==
src
.
k
(),
""
);
auto
d
=
dest
.
host
();
auto
s
=
src
.
host
();
const
auto
a
=
A
.
host
();
const
auto
b
=
B
.
host
();
for
(
long
n
=
0
;
n
<
dest
.
num_samples
();
++
n
)
{
for
(
long
k
=
0
;
k
<
dest
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
dest
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
dest
.
nc
();
++
c
)
{
*
d
++
=
a
[
k
]
*
(
*
s
++
)
+
b
[
k
];
}
}
}
}
}
// -----------------------------------------------------------------------------------
void
batch_normalize_inference
(
...
...
@@ -1238,7 +1362,6 @@ namespace dlib
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
...
...
dlib/dnn/cpu_dlib.h
View file @
669a1e17
...
...
@@ -25,6 +25,12 @@ namespace dlib
const
tensor
&
src2
);
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
);
void
add
(
float
beta
,
tensor
&
dest
,
...
...
@@ -43,6 +49,11 @@ namespace dlib
const
tensor
&
src2
);
void
assign_conv_bias_gradient
(
tensor
&
grad
,
const
tensor
&
gradient_input
);
// -----------------------------------------------------------------------------------
void
affine_transform
(
...
...
@@ -81,6 +92,15 @@ namespace dlib
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
batch_normalize_inference
(
...
...
dlib/dnn/cuda_dlib.cu
View file @
669a1e17
...
...
@@ -94,6 +94,68 @@ namespace dlib
}
}
// ------------------------------------------------------------------------------------
__global__ void _cuda_multiply_conv(float* d, const float* s1, size_t n, const float* s2, size_t bs, size_t ks)
{
for (auto i : grid_stride_range(0, n))
{
auto k = (i/bs)%ks;
d[i] = s1[i]*s2[k];
}
}
__global__ void _cuda_multiply_conv2(float* d, const float* s1, size_t n, const float* s2, size_t bs, size_t ks)
{
// zero initialize d before we begin.
for (auto i : grid_stride_range(0, ks))
d[i] = 0;
__syncthreads();
// loop over all the image planes
for (auto i : grid_stride_range_y(0, n))
{
// sum all the elements in the i-th image plane
float temp = 0;
for (auto j : grid_stride_range(i*bs, (i+1)*bs))
temp += s1[j]*s2[j];
auto k = i%ks;
// and store the sum into d[k]
warp_reduce_atomic_add(d[k], temp);
}
}
void multiply_conv (
tensor& dest,
const tensor& src1,
const tensor& src2
)
{
if (have_same_dimensions(dest,src1))
{
DLIB_CASSERT(src2.num_samples() == 1 && src2.nr() == 1 && src2.nc() == 1 && src2.k() == src1.k(),"");
if (dest.size() == 0)
return;
launch_kernel(_cuda_multiply_conv,max_jobs(dest.size()),
dest.device(), src1.device(), src1.size(), src2.device(), src1.nr()*src1.nc(), src1.k());
}
else
{
DLIB_CASSERT(have_same_dimensions(src1,src2),"");
DLIB_CASSERT(dest.num_samples() == 1 && dest.nr() == 1 && dest.nc() == 1 && dest.k() == src1.k(),"");
if (dest.size() == 0)
return;
dim3 blocks(10,1);
dim3 threads(32,32); // x size must be 32 because we are using warp_reduce_atomic_add() in the kernel.
_cuda_multiply_conv2<<<blocks,threads>>>(
dest.device(), src1.device(), src1.num_samples()*src1.k(), src2.device(), src1.nr()*src1.nc(), src1.k());
}
}
// ------------------------------------------------------------------------------------
__global__ void _cuda_add1(float* d, const float* s1, const float* s2, size_t n)
...
...
@@ -302,6 +364,32 @@ namespace dlib
}
}
// -----------------------------------------------------------------------------------
__global__ void _cuda_affine_transform_conv(float* d, const float* s, size_t n, const float* A, const float* B, size_t bs, size_t ks)
{
for (auto i : grid_stride_range(0, n))
{
auto k = (i/bs)%ks;
d[i] = A[k]*s[i] + B[k];
}
}
void affine_transform_conv(
tensor& dest,
const tensor& src,
const tensor& A,
const tensor& B
)
{
DLIB_CASSERT(have_same_dimensions(dest, src),"");
DLIB_CASSERT(have_same_dimensions(A, B),"");
DLIB_CASSERT(A.num_samples() == 1 && A.nr() == 1 && A.nc() == 1 && A.k() == src.k(),"");
launch_kernel(_cuda_affine_transform_conv,max_jobs(dest.size()),
dest.device(), src.device(), src.size(), A.device(), B.device(), src.nr()*src.nc(), src.k());
}
// -----------------------------------------------------------------------------------
__global__ void _add_bias_gradient(float* out, const float* in, size_t n, size_t total_n)
...
...
dlib/dnn/cuda_dlib.h
View file @
669a1e17
...
...
@@ -30,6 +30,12 @@ namespace dlib
const
tensor
&
src2
);
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
);
void
add
(
tensor
&
dest
,
const
tensor
&
src1
,
...
...
@@ -82,6 +88,15 @@ namespace dlib
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
assign_bias_gradient
(
...
...
dlib/dnn/tensor_tools.cpp
View file @
669a1e17
...
...
@@ -115,6 +115,19 @@ namespace dlib { namespace tt
}
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
)
{
#ifdef DLIB_USE_CUDA
cuda
::
multiply_conv
(
dest
,
src1
,
src2
);
#else
cpu
::
multiply_conv
(
dest
,
src1
,
src2
);
#endif
}
// ----------------------------------------------------------------------------------------
void
affine_transform
(
...
...
@@ -181,6 +194,22 @@ namespace dlib { namespace tt
#endif
}
// ----------------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
)
{
#ifdef DLIB_USE_CUDA
cuda
::
affine_transform_conv
(
dest
,
src
,
A
,
B
);
#else
cpu
::
affine_transform_conv
(
dest
,
src
,
A
,
B
);
#endif
}
// ----------------------------------------------------------------------------------------
void
batch_normalize_inference
(
...
...
@@ -362,8 +391,7 @@ namespace dlib { namespace tt
#ifdef DLIB_USE_CUDA
cuda
::
assign_conv_bias_gradient
(
grad
,
gradient_input
);
#else
// TODO
DLIB_CASSERT
(
false
,
""
);
cpu
::
assign_conv_bias_gradient
(
grad
,
gradient_input
);
#endif
}
...
...
dlib/dnn/tensor_tools.h
View file @
669a1e17
...
...
@@ -118,6 +118,34 @@ namespace dlib { namespace tt
with num_samples()==1 which is then assigned to #dest.
!*/
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
);
/*!
requires
- if (have_same_dimensions(dest, src1) == true) then
- src2.num_samples() == 1
- src2.nr() == 1
- src2.nc() == 1
- src2.k() == src1.k()
- else
- have_same_dimensions(src1, src2) == true)
- dest.num_samples() == 1
- dest.nr() == 1
- dest.nc() == 1
- dest.k() == src1.k()
ensures
- Performs #dest == src1*src2
In particular, if the elements of dest, src1, and src2 were indexed by (n,k,r,c) then
we would have:
- if (have_same_dimensions(dest,src1)) then
#dest(n,k,r,c) == src1(n,k,r,c)*src2(k)
- else
#dest(k) == sum over {n,r,c} of src1(n,k,r,c)*src2(n,k,r,c)
!*/
// ----------------------------------------------------------------------------------------
void
affine_transform
(
...
...
@@ -196,6 +224,29 @@ namespace dlib { namespace tt
- #dest.host()[i] == A.host()[i]*src.host()[i] + B.host()[i]
!*/
// ----------------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
);
/*!
requires
- have_same_dimensions(dest,src) == true
- have_same_dimensions(A, B) == true
- A.num_samples() == 1
- A.nr() == 1
- A.nc() == 1
- A.k() == src.k()
ensures
- Performs #dest == A*src + B
In particular, if the elements of dest and src were indexed by (n,k,r,c) then
we would have:
#dest(n,k,r,c) == A(k)*src(n,k,r,c) + B(k).
!*/
// ----------------------------------------------------------------------------------------
void
batch_normalize_inference
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment