Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
e8d24076
Commit
e8d24076
authored
Aug 17, 2019
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Simplified the device_global_buffer() code and API.
parent
d7fb51e6
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
101 additions
and
56 deletions
+101
-56
cuda_data_ptr.cpp
dlib/cuda/cuda_data_ptr.cpp
+42
-8
cuda_data_ptr.h
dlib/cuda/cuda_data_ptr.h
+55
-38
cuda_dlib.h
dlib/cuda/cuda_dlib.h
+1
-3
cudnn_dlibapi.cpp
dlib/cuda/cudnn_dlibapi.cpp
+3
-6
cudnn_dlibapi.h
dlib/cuda/cudnn_dlibapi.h
+0
-1
No files found.
dlib/cuda/cuda_data_ptr.cpp
View file @
e8d24076
...
...
@@ -13,6 +13,36 @@ namespace dlib
namespace
cuda
{
// ----------------------------------------------------------------------------------------
weak_cuda_data_void_ptr
::
weak_cuda_data_void_ptr
(
const
cuda_data_void_ptr
&
ptr
)
:
num
(
ptr
.
num
),
pdata
(
ptr
.
pdata
)
{
}
// ----------------------------------------------------------------------------------------
cuda_data_void_ptr
weak_cuda_data_void_ptr
::
lock
()
const
{
auto
ptr
=
pdata
.
lock
();
if
(
ptr
)
{
cuda_data_void_ptr
temp
;
temp
.
pdata
=
ptr
;
temp
.
num
=
num
;
return
temp
;
}
else
{
return
cuda_data_void_ptr
();
}
}
// -----------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------
cuda_data_void_ptr
::
...
...
@@ -100,7 +130,8 @@ namespace dlib
{
}
std
::
shared_ptr
<
resizable_cuda_buffer
>
get_buffer
(
cuda_data_void_ptr
get
(
size_t
size
)
{
int
new_device_id
;
...
...
@@ -109,11 +140,12 @@ namespace dlib
if
(
new_device_id
>=
(
long
)
buffers
.
size
())
buffers
.
resize
(
new_device_id
+
16
);
// If we don't have a buffer already for this device then make one
std
::
shared_ptr
<
resizable_cuda_buffer
>
buff
=
buffers
[
new_device_id
].
lock
();
if
(
!
buff
)
// If we don't have a buffer already for this device then make one, or if it's too
// small, make a bigger one.
cuda_data_void_ptr
buff
=
buffers
[
new_device_id
].
lock
();
if
(
!
buff
||
buff
.
size
()
<
size
)
{
buff
=
std
::
make_shared
<
resizable_cuda_buffer
>
(
);
buff
=
cuda_data_void_ptr
(
size
);
buffers
[
new_device_id
]
=
buff
;
}
...
...
@@ -123,13 +155,15 @@ namespace dlib
private
:
std
::
vector
<
std
::
weak_ptr
<
resizable_cuda_buffer
>
>
buffers
;
std
::
vector
<
weak_cuda_data_void_ptr
>
buffers
;
};
std
::
shared_ptr
<
resizable_cuda_buffer
>
device_global_buffer
()
// ----------------------------------------------------------------------------------------
cuda_data_void_ptr
device_global_buffer
(
size_t
size
)
{
thread_local
cudnn_device_buffer
buffer
;
return
buffer
.
get
_buffer
(
);
return
buffer
.
get
(
size
);
}
// ------------------------------------------------------------------------------------
...
...
dlib/cuda/cuda_data_ptr.h
View file @
e8d24076
...
...
@@ -17,6 +17,37 @@ namespace dlib
// ------------------------------------------------------------------------------------
class
cuda_data_void_ptr
;
class
weak_cuda_data_void_ptr
{
/*!
WHAT THIS OBJECT REPRESENTS
This is just like a std::weak_ptr version of cuda_data_void_ptr. It allows you
to hold a non-owning reference to a cuda_data_void_ptr.
!*/
public
:
weak_cuda_data_void_ptr
()
=
default
;
weak_cuda_data_void_ptr
(
const
cuda_data_void_ptr
&
ptr
);
void
reset
()
{
pdata
.
reset
();
num
=
0
;
}
cuda_data_void_ptr
lock
()
const
;
/*!
ensures
- if (the memory block referenced by this object hasn't been deleted) then
- returns a cuda_data_void_ptr referencing that memory block
- else
- returns a default initialized cuda_data_void_ptr (i.e. an empty one).
!*/
private
:
size_t
num
=
0
;
std
::
weak_ptr
<
void
>
pdata
;
};
// ----------------------------------------------------------------------------------------
class
cuda_data_void_ptr
{
/*!
...
...
@@ -64,6 +95,7 @@ namespace dlib
private
:
friend
class
weak_cuda_data_void_ptr
;
size_t
num
=
0
;
std
::
shared_ptr
<
void
>
pdata
;
};
...
...
@@ -214,40 +246,7 @@ namespace dlib
// ------------------------------------------------------------------------------------
class
resizable_cuda_buffer
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a block of memory on a CUDA device that will be automatically
resized if requested size is larger than allocated.
!*/
public
:
cuda_data_void_ptr
get
(
size_t
size
)
/*!
ensures
- This object will return the buffer of requested size or larger.
- buffer.size() >= size
- Client code should not hold the returned cuda_data_void_ptr for long
durations, but instead should call get() whenever the buffer is
needed. Doing so ensures that multiple buffers are not kept around
in the event of a resize.
!*/
{
if
(
buffer
.
size
()
<
size
)
{
buffer
.
reset
();
buffer
=
cuda_data_void_ptr
(
size
);
}
return
buffer
;
}
private
:
cuda_data_void_ptr
buffer
;
};
// ----------------------------------------------------------------------------------------
std
::
shared_ptr
<
resizable_cuda_buffer
>
device_global_buffer
(
);
cuda_data_void_ptr
device_global_buffer
(
size_t
size
);
/*!
ensures
- Returns a pointer to a globally shared CUDA memory buffer on the
...
...
@@ -256,10 +255,28 @@ namespace dlib
as scratch space for CUDA computations that all take place on the default
stream. Using it in this way ensures that there aren't any race conditions
involving the use of the buffer.
- The global buffer is deallocated once all references to it are
destructed. It will be reallocated as required. So if you want to avoid
these reallocations then hold a copy of the shared_ptr returned by this
function.
- The returned pointer will point to at least size bytes. It may point to more.
- The global buffer is deallocated once all references to it are destructed.
However, if device_global_buffer() is called before then with a size <= the last
size requested, then the previously returned global buffer pointer is returned.
This avoids triggering expensive CUDA reallocations. So if you want to avoid
these reallocations then hold a copy of the pointer returned by this function.
However, as a general rule, client code should not hold the returned
cuda_data_void_ptr for long durations, but instead should call
device_global_buffer() whenever the buffer is needed, and overwrite the previously
returned pointer with the new pointer. Doing so ensures multiple buffers are not
kept around in the event that multiple sized buffers are requested. To explain
this, consider this code, assumed to execute at program startup:
auto ptr1 = device_global_buffer(1);
auto ptr2 = device_global_buffer(2);
auto ptr3 = device_global_buffer(3);
since the sizes increased at each call 3 separate buffers were allocated. First
one of size 1, then of size 2, then of size 3. If we then executed:
ptr1 = device_global_buffer(1);
ptr2 = device_global_buffer(2);
ptr3 = device_global_buffer(3);
all three of these pointers would now point to the same buffer, since the smaller
requests can be satisfied by returning the size 3 buffer in each case.
!*/
// ----------------------------------------------------------------------------------------
...
...
dlib/cuda/cuda_dlib.h
View file @
e8d24076
...
...
@@ -438,8 +438,7 @@ namespace dlib
const
size_t
bytes_per_plane
=
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
()
*
sizeof
(
uint16_t
);
// Allocate a cuda buffer to store all the truth images and also one float
// for the scalar loss output.
work
=
device_global_buffer
();
buf
=
work
->
get
(
subnetwork_output
.
num_samples
()
*
bytes_per_plane
+
sizeof
(
float
));
buf
=
device_global_buffer
(
subnetwork_output
.
num_samples
()
*
bytes_per_plane
+
sizeof
(
float
));
cuda_data_void_ptr
loss_buf
=
buf
;
buf
=
buf
+
sizeof
(
float
);
...
...
@@ -466,7 +465,6 @@ namespace dlib
double
&
loss
);
mutable
std
::
shared_ptr
<
resizable_cuda_buffer
>
work
;
mutable
cuda_data_void_ptr
buf
;
};
...
...
dlib/cuda/cudnn_dlibapi.cpp
View file @
e8d24076
...
...
@@ -749,7 +749,6 @@ namespace dlib
forward_workspace
.
reset
();
backward_data_workspace
.
reset
();
backward_filters_workspace
.
reset
();
workspace
.
reset
();
}
void
tensor_conv
::
...
...
@@ -919,8 +918,6 @@ namespace dlib
(
const
cudnnFilterDescriptor_t
)
filter_handle
,
backward_filters_best_algo
,
&
backward_filters_workspace_size_in_bytes
));
workspace
=
device_global_buffer
();
}
catch
(...)
{
...
...
@@ -989,7 +986,7 @@ namespace dlib
// while the function is still executing on the device. But each time we come
// here, we make sure to grab the latest workspace buffer so that, globally, we
// minimize the number of such buffers.
forward_workspace
=
workspace
->
get
(
forward_workspace_size_in_bytes
);
forward_workspace
=
device_global_buffer
(
forward_workspace_size_in_bytes
);
CHECK_CUDNN
(
cudnnConvolutionForward
(
context
(),
...
...
@@ -1022,7 +1019,7 @@ namespace dlib
// while the function is still executing on the device. But each time we come
// here, we make sure to grab the latest workspace buffer so that, globally, we
// minimize the number of such buffers.
backward_data_workspace
=
workspace
->
get
(
backward_data_workspace_size_in_bytes
);
backward_data_workspace
=
device_global_buffer
(
backward_data_workspace_size_in_bytes
);
CHECK_CUDNN
(
cudnnConvolutionBackwardData
(
context
(),
...
...
@@ -1056,7 +1053,7 @@ namespace dlib
// while the function is still executing on the device. But each time we come
// here, we make sure to grab the latest workspace buffer so that, globally, we
// minimize the number of such buffers.
backward_filters_workspace
=
workspace
->
get
(
backward_filters_workspace_size_in_bytes
);
backward_filters_workspace
=
device_global_buffer
(
backward_filters_workspace_size_in_bytes
);
CHECK_CUDNN
(
cudnnConvolutionBackwardFilter
(
context
(),
&
alpha
,
...
...
dlib/cuda/cudnn_dlibapi.h
View file @
e8d24076
...
...
@@ -268,7 +268,6 @@ namespace dlib
size_t
forward_workspace_size_in_bytes
;
size_t
backward_data_workspace_size_in_bytes
;
size_t
backward_filters_workspace_size_in_bytes
;
std
::
shared_ptr
<
resizable_cuda_buffer
>
workspace
;
cuda_data_void_ptr
forward_workspace
;
cuda_data_void_ptr
backward_data_workspace
;
cuda_data_void_ptr
backward_filters_workspace
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment