Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
49a5d39d
Commit
49a5d39d
authored
Apr 04, 2018
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Made loss_multiclass_log_per_pixel use CUDA.
parent
6c962dd9
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
139 additions
and
0 deletions
+139
-0
cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+66
-0
cuda_dlib.h
dlib/dnn/cuda_dlib.h
+61
-0
loss.h
dlib/dnn/loss.h
+12
-0
No files found.
dlib/dnn/cuda_dlib.cu
View file @
49a5d39d
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
#include "cuda_utils.h"
#include "cuda_utils.h"
#include "cuda_dlib.h"
#include "cuda_dlib.h"
#include "cudnn_dlibapi.h"
namespace dlib
namespace dlib
...
@@ -1623,6 +1624,71 @@ namespace dlib
...
@@ -1623,6 +1624,71 @@ namespace dlib
}
}
}
}
// ----------------------------------------------------------------------------------------
__device__ float cuda_safe_log(float x, float epsilon = 1e-10)
{
// Prevent trying to calculate the logarithm of a very small number (let alone zero)
if (x >= epsilon)
return ::log(x);
else
return ::log(epsilon);
}
__global__ void _cuda_compute_loss_multiclass_log_per_pixel(float* loss_out, float* g, const uint16_t* truth, size_t n, size_t plane_size, size_t sample_size, size_t nk, uint16_t label_to_ignore, const float scale)
{
float loss = 0;
for(auto i : grid_stride_range(0, n))
{
const size_t k = (i/plane_size)%nk;
const size_t idx = (i%plane_size) + plane_size*(i/sample_size);
const size_t y = truth[idx];
if (k == y)
{
loss -= cuda_safe_log(g[i]);
g[i] = scale*(g[i] - 1);
}
else if (y == label_to_ignore)
{
g[i] = 0.f;
}
else
{
g[i] = scale*g[i];
}
}
warp_reduce_atomic_add(*loss_out, loss);
}
void compute_loss_multiclass_log_per_pixel::
do_work(
float* loss_cuda_work_buffer,
const uint16_t* truth_buffer,
const tensor& subnetwork_output,
tensor& gradient,
double& loss
)
{
CHECK_CUDA(cudaMemset(loss_cuda_work_buffer, 0, sizeof(float)));
softmax(gradient, subnetwork_output);
static const uint16_t label_to_ignore = std::numeric_limits<uint16_t>::max();
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
const double scale = 1.0 / (subnetwork_output.num_samples() * subnetwork_output.nr() * subnetwork_output.nc());
launch_kernel(_cuda_compute_loss_multiclass_log_per_pixel, max_jobs(gradient.size()),
loss_cuda_work_buffer, gradient.device(), truth_buffer, gradient.size(), gradient.nr()*gradient.nc(), gradient.nr()*gradient.nc()*gradient.k(), gradient.k(), label_to_ignore, scale);
float floss;
CHECK_CUDA(cudaMemcpy(&floss, loss_cuda_work_buffer, sizeof(float), cudaMemcpyDefault));
loss = scale*floss;
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
}
}
...
...
dlib/dnn/cuda_dlib.h
View file @
49a5d39d
...
@@ -409,6 +409,67 @@ namespace dlib
...
@@ -409,6 +409,67 @@ namespace dlib
size_t
count_k
size_t
count_k
);
);
// ----------------------------------------------------------------------------------------
class
compute_loss_multiclass_log_per_pixel
{
/*!
The point of this class is to compute the loss computed by
loss_multiclass_log_per_pixel, but to do so with CUDA.
!*/
public
:
compute_loss_multiclass_log_per_pixel
(
)
{
work
=
device_global_buffer
();
}
template
<
typename
const_label_iterator
>
void
operator
()
(
const_label_iterator
truth
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
double
&
loss
)
const
{
const
size_t
bytes_per_plane
=
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
()
*
sizeof
(
uint16_t
);
// Allocate a cuda buffer to store all the truth images and also one float
// for the scalar loss output.
cuda_data_void_ptr
buf
=
work
->
get
(
subnetwork_output
.
num_samples
()
*
bytes_per_plane
+
sizeof
(
float
));
cuda_data_void_ptr
loss_buf
=
buf
;
buf
=
buf
+
sizeof
(
float
);
// copy the truth data into a cuda buffer.
for
(
long
i
=
0
;
i
<
subnetwork_output
.
num_samples
();
++
i
,
++
truth
)
{
const
matrix
<
uint16_t
>&
t
=
*
truth
;
DLIB_ASSERT
(
t
.
nr
()
==
subnetwork_output
.
nr
());
DLIB_ASSERT
(
t
.
nc
()
==
subnetwork_output
.
nc
());
memcpy
(
buf
+
i
*
bytes_per_plane
,
&
t
(
0
,
0
),
bytes_per_plane
);
}
do_work
(
static_cast
<
float
*>
(
loss_buf
.
data
()),
static_cast
<
uint16_t
*>
(
buf
.
data
()),
subnetwork_output
,
gradient
,
loss
);
}
private
:
static
void
do_work
(
float
*
loss_cuda_work_buffer
,
const
uint16_t
*
truth_buffer
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
double
&
loss
);
std
::
shared_ptr
<
resizable_cuda_buffer
>
work
;
};
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
...
...
dlib/dnn/loss.h
View file @
49a5d39d
...
@@ -2407,6 +2407,13 @@ namespace dlib
...
@@ -2407,6 +2407,13 @@ namespace dlib
"output size = "
<<
output_tensor
.
nr
()
<<
" x "
<<
output_tensor
.
nc
());
"output size = "
<<
output_tensor
.
nr
()
<<
" x "
<<
output_tensor
.
nc
());
}
}
#ifdef DLIB_USE_CUDA
double
loss
;
cuda_compute
(
truth
,
output_tensor
,
grad
,
loss
);
return
loss
;
#else
tt
::
softmax
(
grad
,
output_tensor
);
tt
::
softmax
(
grad
,
output_tensor
);
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
...
@@ -2445,6 +2452,7 @@ namespace dlib
...
@@ -2445,6 +2452,7 @@ namespace dlib
}
}
}
}
return
loss
;
return
loss
;
#endif
}
}
friend
void
serialize
(
const
loss_multiclass_log_per_pixel_
&
,
std
::
ostream
&
out
)
friend
void
serialize
(
const
loss_multiclass_log_per_pixel_
&
,
std
::
ostream
&
out
)
...
@@ -2478,6 +2486,10 @@ namespace dlib
...
@@ -2478,6 +2486,10 @@ namespace dlib
return
((
sample
*
t
.
k
()
+
k
)
*
t
.
nr
()
+
row
)
*
t
.
nc
()
+
column
;
return
((
sample
*
t
.
k
()
+
k
)
*
t
.
nr
()
+
row
)
*
t
.
nc
()
+
column
;
}
}
#ifdef DLIB_USE_CUDA
cuda
::
compute_loss_multiclass_log_per_pixel
cuda_compute
;
#endif
};
};
template
<
typename
SUBNET
>
template
<
typename
SUBNET
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment