Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
ecc00ce7
Commit
ecc00ce7
authored
Feb 01, 2016
by
Davis King
Browse files
Options
Browse Files
Download
Plain Diff
merged
parents
573fce15
99ce564b
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
120 additions
and
10 deletions
+120
-10
cudnn_dlibapi.cpp
dlib/dnn/cudnn_dlibapi.cpp
+1
-1
gpu_data.h
dlib/dnn/gpu_data.h
+19
-0
gpu_data_abstract.h
dlib/dnn/gpu_data_abstract.h
+31
-0
loss.h
dlib/dnn/loss.h
+5
-1
tensor.h
dlib/dnn/tensor.h
+38
-8
tensor_abstract.h
dlib/dnn/tensor_abstract.h
+26
-0
No files found.
dlib/dnn/cudnn_dlibapi.cpp
View file @
ecc00ce7
...
...
@@ -240,7 +240,7 @@ namespace dlib
return
;
CHECK_CUDNN
(
cudnnSetTensor
(
context
(),
descriptor
(
t
),
t
.
device
(),
t
.
device
_write_only
(),
&
value
));
}
...
...
dlib/dnn/gpu_data.h
View file @
ecc00ce7
...
...
@@ -95,6 +95,13 @@ namespace dlib
return
data_host
.
get
();
}
float
*
host_write_only
()
{
host_current
=
true
;
device_current
=
false
;
return
data_host
.
get
();
}
const
float
*
device
()
const
{
#ifndef DLIB_USE_CUDA
...
...
@@ -116,6 +123,18 @@ namespace dlib
return
data_device
.
get
();
}
float
*
device_write_only
()
{
#ifndef DLIB_USE_CUDA
DLIB_CASSERT
(
false
,
"CUDA NOT ENABLED"
);
#endif
wait_for_transfer_to_finish
();
host_current
=
false
;
device_current
=
true
;
device_in_use
=
true
;
return
data_device
.
get
();
}
bool
host_ready
(
)
const
{
return
host_current
;
}
...
...
dlib/dnn/gpu_data_abstract.h
View file @
ecc00ce7
...
...
@@ -125,6 +125,20 @@ namespace dlib
calling host().
!*/
float
*
host_write_only
(
);
/*!
ensures
- This function returns the same pointer as host(), except that it never
performs a device to host memory copy. Instead, it immediately marks the
device side data as out of date, effectively discarding it. Therefore,
the values in the data pointed to by host_write_only() are undefined and
you should only call host_write_only() if you are going to assign to
every memory location in the returned memory block.
- #host_ready() == true
- #device_ready() == false
!*/
const
float
*
device
(
)
const
;
/*!
...
...
@@ -154,6 +168,23 @@ namespace dlib
- #device_ready() == true
!*/
float
*
device_write_only
(
);
/*!
requires
- DLIB_USE_CUDA is #defined
ensures
- This function returns the same pointer as device(), except that it never
performs a host to device memory copy. Instead, it immediately marks the
host side data as out of date, effectively discarding it. Therefore, the
values in the data pointed to by device_write_only() are undefined and
you should only call device_write_only() if you are going to assign to
every memory location in the returned memory block.
- #host_ready() == false
- #device_ready() == true
!*/
size_t
size
(
)
const
;
/*!
...
...
dlib/dnn/loss.h
View file @
ecc00ce7
...
...
@@ -68,7 +68,7 @@ namespace dlib
const
double
scale
=
1
.
0
/
output_tensor
.
num_samples
();
double
loss
=
0
;
const
float
*
out_data
=
output_tensor
.
host
();
float
*
g
=
grad
.
host
();
float
*
g
=
grad
.
host
_write_only
();
for
(
long
i
=
0
;
i
<
output_tensor
.
num_samples
();
++
i
)
{
const
float
y
=
*
truth
++
;
...
...
@@ -79,6 +79,10 @@ namespace dlib
loss
+=
scale
*
temp
;
g
[
i
]
=
-
scale
*
y
;
}
else
{
g
[
i
]
=
0
;
}
}
return
loss
;
}
...
...
dlib/dnn/tensor.h
View file @
ecc00ce7
...
...
@@ -8,6 +8,7 @@
#include "../matrix.h"
#include "cudnn_dlibapi.h"
#include "gpu_data.h"
#include "../byte_orderer.h"
#include <memory>
namespace
dlib
...
...
@@ -46,8 +47,10 @@ namespace dlib
virtual
const
float
*
host
()
const
=
0
;
virtual
float
*
host
()
=
0
;
virtual
float
*
host_write_only
()
=
0
;
virtual
const
float
*
device
()
const
=
0
;
virtual
float
*
device
()
=
0
;
virtual
float
*
device_write_only
()
=
0
;
tensor
&
operator
=
(
float
val
)
{
...
...
@@ -62,8 +65,9 @@ namespace dlib
return
*
this
;
}
#endif
for
(
auto
&
d
:
*
this
)
d
=
val
;
auto
d
=
host_write_only
();
for
(
size_t
i
=
0
;
i
<
size
();
++
i
)
d
[
i
]
=
val
;
return
*
this
;
}
...
...
@@ -95,7 +99,7 @@ namespace dlib
static_assert
((
is_same_type
<
float
,
typename
EXP
::
type
>::
value
==
true
),
"To assign a matrix to a tensor the matrix must contain float values"
);
set_ptrm
(
host
(),
m_n
,
m_nr
*
m_nc
*
m_k
)
=
item
;
set_ptrm
(
host
_write_only
(),
m_n
,
m_nr
*
m_nc
*
m_k
)
=
item
;
return
*
this
;
}
...
...
@@ -279,8 +283,10 @@ namespace dlib
virtual
const
float
*
host
()
const
{
return
data_instance
.
host
();
}
virtual
float
*
host
()
{
return
data_instance
.
host
();
}
virtual
float
*
host_write_only
()
{
return
data_instance
.
host_write_only
();
}
virtual
const
float
*
device
()
const
{
return
data_instance
.
device
();
}
virtual
float
*
device
()
{
return
data_instance
.
device
();
}
virtual
float
*
device_write_only
()
{
return
data_instance
.
device_write_only
();
}
void
clear
(
)
...
...
@@ -373,21 +379,33 @@ namespace dlib
inline
void
serialize
(
const
tensor
&
item
,
std
::
ostream
&
out
)
{
int
version
=
1
;
int
version
=
2
;
serialize
(
version
,
out
);
serialize
(
item
.
num_samples
(),
out
);
serialize
(
item
.
k
(),
out
);
serialize
(
item
.
nr
(),
out
);
serialize
(
item
.
nc
(),
out
);
for
(
auto
&
d
:
item
)
serialize
(
d
,
out
);
byte_orderer
bo
;
auto
sbuf
=
out
.
rdbuf
();
for
(
auto
d
:
item
)
{
// Write out our data as 4byte little endian IEEE floats rather than using
// dlib's default float serialization. We do this because it will result in
// more compact outputs. It's slightly less portable but it seems doubtful
// that any CUDA enabled platform isn't going to use IEEE floats. But if one
// does we can just update the serialization code here to handle it if such a
// platform is encountered.
bo
.
host_to_little
(
d
);
static_assert
(
sizeof
(
d
)
==
4
,
"This serialization code assumes we are writing 4 byte floats"
);
sbuf
->
sputn
((
char
*
)
&
d
,
sizeof
(
d
));
}
}
inline
void
deserialize
(
resizable_tensor
&
item
,
std
::
istream
&
in
)
{
int
version
;
deserialize
(
version
,
in
);
if
(
version
!=
1
)
if
(
version
!=
2
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::resizable_tensor."
);
long
num_samples
=
0
,
k
=
0
,
nr
=
0
,
nc
=
0
;
...
...
@@ -396,8 +414,18 @@ namespace dlib
deserialize
(
nr
,
in
);
deserialize
(
nc
,
in
);
item
.
set_size
(
num_samples
,
k
,
nr
,
nc
);
byte_orderer
bo
;
auto
sbuf
=
in
.
rdbuf
();
for
(
auto
&
d
:
item
)
deserialize
(
d
,
in
);
{
static_assert
(
sizeof
(
d
)
==
4
,
"This serialization code assumes we are writing 4 byte floats"
);
if
(
sbuf
->
sgetn
((
char
*
)
&
d
,
sizeof
(
d
))
!=
sizeof
(
d
))
{
in
.
setstate
(
std
::
ios
::
badbit
);
throw
serialization_error
(
"Error reading data while deserializing dlib::resizable_tensor."
);
}
bo
.
little_to_host
(
d
);
}
}
// ----------------------------------------------------------------------------------------
...
...
@@ -441,8 +469,10 @@ namespace dlib
virtual
const
float
*
host
()
const
{
return
data_instance
->
host
()
+
data_offset
;
}
virtual
float
*
host
()
{
return
data_instance
->
host
()
+
data_offset
;
}
virtual
float
*
host_write_only
()
{
return
data_instance
->
host
()
+
data_offset
;
}
virtual
const
float
*
device
()
const
{
return
data_instance
->
device
()
+
data_offset
;
}
virtual
float
*
device
()
{
return
data_instance
->
device
()
+
data_offset
;
}
virtual
float
*
device_write_only
()
{
return
data_instance
->
device
()
+
data_offset
;
}
#ifdef DLIB_USE_CUDA
...
...
dlib/dnn/tensor_abstract.h
View file @
ecc00ce7
...
...
@@ -133,6 +133,18 @@ namespace dlib
calling host().
!*/
float
float
*
host_write_only
(
)
=
0
;
/*!
ensures
- This function returns the same pointer as host(), except that it never
performs a device to host memory copy. Instead, it immediately marks the
device side data as out of date, effectively discarding it. Therefore,
the values in the data pointed to by host_write_only() are undefined and
you should only call host_write_only() if you are going to assign to
every memory location in the returned memory block.
!*/
virtual
const
float
*
device
(
)
const
=
0
;
/*!
...
...
@@ -161,6 +173,20 @@ namespace dlib
host() will perform a device to host transfer.
!*/
float
float
*
device_write_only
(
)
=
0
;
/*!
requires
- DLIB_USE_CUDA is #defined
ensures
- This function returns the same pointer as device(), except that it never
performs a host to device memory copy. Instead, it immediately marks the
host side data as out of date, effectively discarding it. Therefore, the
values in the data pointed to by device_write_only() are undefined and
you should only call device_write_only() if you are going to assign to
every memory location in the returned memory block.
!*/
tensor
&
operator
=
(
float
val
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment