Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
b09ddc3a
Commit
b09ddc3a
authored
Aug 27, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed a bug in memcpy() for tensors where you weren't allowed to copy
alias tensors. Now any kind of tensors are supported.
parent
fe42f662
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
216 additions
and
12 deletions
+216
-12
gpu_data.cpp
dlib/dnn/gpu_data.cpp
+49
-10
gpu_data.h
dlib/dnn/gpu_data.h
+41
-1
gpu_data_abstract.h
dlib/dnn/gpu_data_abstract.h
+25
-0
tensor.h
dlib/dnn/tensor.h
+4
-1
dnn.cpp
dlib/test/dnn.cpp
+97
-0
No files found.
dlib/dnn/gpu_data.cpp
View file @
b09ddc3a
...
...
@@ -24,20 +24,59 @@ namespace dlib
)
{
DLIB_CASSERT
(
dest
.
size
()
==
src
.
size
(),
""
);
if
(
src
.
size
()
==
0
)
if
(
src
.
size
()
==
0
||
&
dest
==
&
src
)
return
;
// copy the memory efficiently based on which copy is current in each object.
if
(
dest
.
device_ready
()
&&
src
.
device_ready
())
CHECK_CUDA
(
cudaMemcpy
(
dest
.
device
(),
src
.
device
(),
src
.
size
()
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
));
else
if
(
!
dest
.
device_ready
()
&&
src
.
device_ready
())
CHECK_CUDA
(
cudaMemcpy
(
dest
.
host_write_only
(),
src
.
device
(),
src
.
size
()
*
sizeof
(
float
),
cudaMemcpyDeviceToHost
));
else
if
(
dest
.
device_ready
()
&&
!
src
.
device_ready
())
CHECK_CUDA
(
cudaMemcpy
(
dest
.
device
(),
src
.
host
(),
src
.
size
()
*
sizeof
(
float
),
cudaMemcpyHostToDevice
));
else
CHECK_CUDA
(
cudaMemcpy
(
dest
.
host_write_only
(),
src
.
host
(),
src
.
size
()
*
sizeof
(
float
),
cudaMemcpyHostToHost
));
memcpy
(
dest
,
0
,
src
,
0
,
src
.
size
());
}
void
memcpy
(
gpu_data
&
dest
,
size_t
dest_offset
,
const
gpu_data
&
src
,
size_t
src_offset
,
size_t
num
)
{
DLIB_CASSERT
(
dest_offset
+
num
<=
dest
.
size
(),
""
);
DLIB_CASSERT
(
src_offset
+
num
<=
src
.
size
(),
""
);
if
(
num
==
0
)
return
;
// if there is aliasing
if
(
&
dest
==
&
src
&&
std
::
max
(
dest_offset
,
src_offset
)
<
std
::
min
(
dest_offset
,
src_offset
)
+
num
)
{
// if they perfectly alias each other then there is nothing to do
if
(
dest_offset
==
src_offset
)
return
;
else
std
::
memmove
(
dest
.
host
()
+
dest_offset
,
src
.
host
()
+
src_offset
,
sizeof
(
float
)
*
num
);
}
else
{
// if we write to the entire thing then we can use device_write_only()
if
(
dest_offset
==
0
&&
num
==
dest
.
size
())
{
// copy the memory efficiently based on which copy is current in each object.
if
(
src
.
device_ready
())
CHECK_CUDA
(
cudaMemcpy
(
dest
.
device_write_only
(),
src
.
device
()
+
src_offset
,
num
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
));
else
CHECK_CUDA
(
cudaMemcpy
(
dest
.
device_write_only
(),
src
.
host
()
+
src_offset
,
num
*
sizeof
(
float
),
cudaMemcpyHostToDevice
));
}
else
{
// copy the memory efficiently based on which copy is current in each object.
if
(
dest
.
device_ready
()
&&
src
.
device_ready
())
CHECK_CUDA
(
cudaMemcpy
(
dest
.
device
()
+
dest_offset
,
src
.
device
()
+
src_offset
,
num
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
));
else
if
(
!
dest
.
device_ready
()
&&
src
.
device_ready
())
CHECK_CUDA
(
cudaMemcpy
(
dest
.
host
()
+
dest_offset
,
src
.
device
()
+
src_offset
,
num
*
sizeof
(
float
),
cudaMemcpyDeviceToHost
));
else
if
(
dest
.
device_ready
()
&&
!
src
.
device_ready
())
CHECK_CUDA
(
cudaMemcpy
(
dest
.
device
()
+
dest_offset
,
src
.
host
()
+
src_offset
,
num
*
sizeof
(
float
),
cudaMemcpyHostToDevice
));
else
CHECK_CUDA
(
cudaMemcpy
(
dest
.
host
()
+
dest_offset
,
src
.
host
()
+
src_offset
,
num
*
sizeof
(
float
),
cudaMemcpyHostToHost
));
}
}
}
// ----------------------------------------------------------------------------------------
void
gpu_data
::
...
...
dlib/dnn/gpu_data.h
View file @
b09ddc3a
...
...
@@ -208,14 +208,54 @@ namespace dlib
#ifdef DLIB_USE_CUDA
void
memcpy
(
gpu_data
&
dest
,
const
gpu_data
&
src
);
void
memcpy
(
gpu_data
&
dest
,
size_t
dest_offset
,
const
gpu_data
&
src
,
size_t
src_offset
,
size_t
num
);
#else
inline
void
memcpy
(
gpu_data
&
dest
,
const
gpu_data
&
src
)
{
DLIB_CASSERT
(
dest
.
size
()
==
src
.
size
(),
""
);
if
(
src
.
size
()
==
0
)
if
(
src
.
size
()
==
0
||
&
dest
==
&
src
)
return
;
std
::
memcpy
(
dest
.
host_write_only
(),
src
.
host
(),
sizeof
(
float
)
*
src
.
size
());
}
inline
void
memcpy
(
gpu_data
&
dest
,
size_t
dest_offset
,
const
gpu_data
&
src
,
size_t
src_offset
,
size_t
num
)
{
DLIB_CASSERT
(
dest_offset
+
num
<=
dest
.
size
(),
""
);
DLIB_CASSERT
(
src_offset
+
num
<=
src
.
size
(),
""
);
if
(
num
==
0
)
return
;
if
(
&
dest
==
&
src
&&
std
::
max
(
dest_offset
,
src_offset
)
<
std
::
min
(
dest_offset
,
src_offset
)
+
num
)
{
// if they perfectly alias each other then there is nothing to do
if
(
dest_offset
==
src_offset
)
return
;
else
std
::
memmove
(
dest
.
host
()
+
dest_offset
,
src
.
host
()
+
src_offset
,
sizeof
(
float
)
*
num
);
}
else
{
// if we write to the entire thing then we can use host_write_only()
if
(
dest_offset
==
0
&&
num
==
dest
.
size
())
std
::
memcpy
(
dest
.
host_write_only
(),
src
.
host
()
+
src_offset
,
sizeof
(
float
)
*
num
);
else
std
::
memcpy
(
dest
.
host
()
+
dest_offset
,
src
.
host
()
+
src_offset
,
sizeof
(
float
)
*
num
);
}
}
#endif
// ----------------------------------------------------------------------------------------
...
...
dlib/dnn/gpu_data_abstract.h
View file @
b09ddc3a
...
...
@@ -233,6 +233,31 @@ namespace dlib
- This function blocks until the copy has completed.
!*/
void
memcpy
(
gpu_data
&
dest
,
size_t
dest_offset
,
const
gpu_data
&
src
,
size_t
src_offset
,
size_t
num
);
/*!
requires
- dest_offset + num <= dest.size()
- src_offset + num <= src.size()
ensures
- Copies the data in src to dest, but only copies data in the range
[src.host()+src_offset, src.host()+src_offset+num) to
[dest.host()+dest_offset, dest.host()+dest_offset+num). Therefore, it is
just like the above memcpy() except that you can specify some subset of data
in a gpu_data object to be copied.
- Like the above version of memcpy(), the copy will happen in the most
efficient way, automatically using the appropriate type of host/device
transfers based on where data is currently resident.
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can
always copy gpu_data objects to and from each other regardless.
- This function blocks until the copy has completed.
!*/
// ----------------------------------------------------------------------------------------
}
...
...
dlib/dnn/tensor.h
View file @
b09ddc3a
...
...
@@ -169,7 +169,10 @@ namespace dlib
const
tensor
&
src
)
{
memcpy
(
dest
.
data
(),
src
.
data
());
DLIB_CASSERT
(
dest
.
size
()
==
src
.
size
(),
""
);
memcpy
(
dest
.
data
(),
dest
.
get_alias_offset
(),
src
.
data
(),
src
.
get_alias_offset
(),
src
.
size
());
}
...
...
dlib/test/dnn.cpp
View file @
b09ddc3a
...
...
@@ -497,6 +497,103 @@ namespace
#endif
}
{
resizable_tensor
A
,
B
;
A
.
set_size
(
11
);
B
.
copy_size
(
A
);
A
=
4
;
B
=
1
;
matrix
<
float
>
truth
;
alias_tensor
at
(
5
);
A
=
4
;
A
.
host
();
B
.
host
();
{
// non-aliasing test
auto
aA
=
at
(
A
,
5
);
auto
aB
=
at
(
B
,
5
);
memcpy
(
aA
,
aB
);
truth
=
{
4
,
4
,
4
,
4
,
4
,
1
,
1
,
1
,
1
,
1
,
4
};
DLIB_TEST
(
max
(
abs
(
mat
(
A
)
-
truth
))
<
1e-5
);
}
{
// aliasing test
auto
aA
=
at
(
A
,
1
);
auto
aB
=
at
(
A
,
6
);
memcpy
(
aA
,
aB
);
truth
=
{
4
,
1
,
1
,
1
,
1
,
4
,
1
,
1
,
1
,
1
,
4
};
DLIB_TEST
(
max
(
abs
(
mat
(
A
)
-
truth
))
<
1e-5
);
}
#ifdef DLIB_USE_CUDA
A
=
4
;
A
.
device
();
B
.
host
();
{
// non-aliasing test
auto
aA
=
at
(
A
,
5
);
auto
aB
=
at
(
B
,
5
);
memcpy
(
aA
,
aB
);
truth
=
{
4
,
4
,
4
,
4
,
4
,
1
,
1
,
1
,
1
,
1
,
4
};
DLIB_TEST
(
max
(
abs
(
mat
(
A
)
-
truth
))
<
1e-5
);
}
{
// aliasing test
auto
aA
=
at
(
A
,
1
);
auto
aB
=
at
(
A
,
6
);
memcpy
(
aA
,
aB
);
truth
=
{
4
,
1
,
1
,
1
,
1
,
4
,
1
,
1
,
1
,
1
,
4
};
DLIB_TEST
(
max
(
abs
(
mat
(
A
)
-
truth
))
<
1e-5
);
}
A
=
4
;
A
.
device
();
B
.
device
();
{
// non-aliasing test
auto
aA
=
at
(
A
,
5
);
auto
aB
=
at
(
B
,
5
);
memcpy
(
aA
,
aB
);
truth
=
{
4
,
4
,
4
,
4
,
4
,
1
,
1
,
1
,
1
,
1
,
4
};
DLIB_TEST
(
max
(
abs
(
mat
(
A
)
-
truth
))
<
1e-5
);
}
{
// aliasing test
auto
aA
=
at
(
A
,
1
);
auto
aB
=
at
(
A
,
6
);
memcpy
(
aA
,
aB
);
truth
=
{
4
,
1
,
1
,
1
,
1
,
4
,
1
,
1
,
1
,
1
,
4
};
DLIB_TEST
(
max
(
abs
(
mat
(
A
)
-
truth
))
<
1e-5
);
}
A
=
4
;
A
.
host
();
B
.
device
();
{
// non-aliasing test
auto
aA
=
at
(
A
,
5
);
auto
aB
=
at
(
B
,
5
);
memcpy
(
aA
,
aB
);
truth
=
{
4
,
4
,
4
,
4
,
4
,
1
,
1
,
1
,
1
,
1
,
4
};
DLIB_TEST
(
max
(
abs
(
mat
(
A
)
-
truth
))
<
1e-5
);
}
{
// aliasing test
auto
aA
=
at
(
A
,
1
);
auto
aB
=
at
(
A
,
6
);
memcpy
(
aA
,
aB
);
truth
=
{
4
,
1
,
1
,
1
,
1
,
4
,
1
,
1
,
1
,
1
,
4
};
DLIB_TEST
(
max
(
abs
(
mat
(
A
)
-
truth
))
<
1e-5
);
}
#endif
}
{
resizable_tensor
A
,
B
;
A
.
set_size
(
2
,
3
,
4
,
5
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment