Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
373cbb57
Commit
373cbb57
authored
Apr 05, 2017
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added a tt::inv() for computing matrix inversions on the GPU.
parent
b3dc8169
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
381 additions
and
14 deletions
+381
-14
CMakeLists.txt
dlib/CMakeLists.txt
+23
-2
cusolver_dlibapi.cu
dlib/dnn/cusolver_dlibapi.cu
+204
-0
cusolver_dlibapi.h
dlib/dnn/cusolver_dlibapi.h
+75
-0
tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+26
-11
tensor_tools.h
dlib/dnn/tensor_tools.h
+31
-0
cublas.cpp
dlib/test/cublas.cpp
+22
-1
No files found.
dlib/CMakeLists.txt
View file @
373cbb57
...
...
@@ -568,14 +568,24 @@ if (NOT TARGET dlib)
endif
()
endif
()
endif
()
endif
()
# Find where cuSOLVER is since the FindCUDA cmake package doesn't
# bother to look for it.
get_filename_component
(
cuda_blas_path
${
CUDA_CUBLAS_LIBRARIES
}
DIRECTORY
)
find_library
(
cusolver cusolver HINTS
${
cuda_blas_path
}
)
# Also find OpenMP since cuSOLVER needs it.
find_package
(
OpenMP
)
if
(
NOT OPENMP_FOUND
)
message
(
STATUS
"*** Didn't find OpenMP, which is required to use CUDA. ***"
)
endif
()
endif
()
if
(
CUDA_FOUND AND cudnn AND cudnn_include AND COMPILER_CAN_DO_CPP_11 AND cuda_test_compile_worked AND cudnn_test_compile_worked
)
if
(
CUDA_FOUND AND cudnn AND cudnn_include AND COMPILER_CAN_DO_CPP_11 AND cuda_test_compile_worked AND cudnn_test_compile_worked
AND OPENMP_FOUND
)
set
(
source_files
${
source_files
}
dnn/cuda_dlib.cu
dnn/cudnn_dlibapi.cpp
dnn/cublas_dlibapi.cpp
dnn/cusolver_dlibapi.cu
dnn/curand_dlibapi.cpp
dnn/cuda_data_ptr.cpp
dnn/gpu_data.cpp
...
...
@@ -584,6 +594,7 @@ if (NOT TARGET dlib)
${
CUDA_CUBLAS_LIBRARIES
}
${
cudnn
}
${
CUDA_curand_LIBRARY
}
${
cusolver
}
)
include_directories
(
${
cudnn_include
}
)
else
()
...
...
@@ -648,6 +659,11 @@ if (NOT TARGET dlib)
PUBLIC
${
dlib_needed_includes
}
)
target_link_libraries
(
dlib PRIVATE
${
dlib_needed_libraries
}
)
if
(
OPENMP_FOUND
)
# Enable OpenMP
target_compile_options
(
dlib PUBLIC
${
OpenMP_CXX_FLAGS
}
)
target_link_libraries
(
dlib PUBLIC
${
OpenMP_CXX_FLAGS
}
)
endif
()
if
(
UNIX AND NOT DLIB_IN_PROJECT_BUILD
)
if
(
DLIB_USE_CUDA
)
cuda_add_library
(
dlib_shared SHARED
${
source_files
}
)
...
...
@@ -662,6 +678,11 @@ if (NOT TARGET dlib)
PUBLIC
${
dlib_needed_includes
}
)
target_link_libraries
(
dlib_shared PRIVATE
${
dlib_needed_libraries
}
)
if
(
OPENMP_FOUND
)
# Enable OpenMP
target_compile_options
(
dlib_shared PUBLIC
${
OpenMP_CXX_FLAGS
}
)
target_link_libraries
(
dlib_shared PUBLIC
${
OpenMP_CXX_FLAGS
}
)
endif
()
endif
()
endif
()
##### end of if NOT DLIB_ISO_CPP_ONLY ##########################################################
...
...
dlib/dnn/cusolver_dlibapi.cu
0 → 100644
View file @
373cbb57
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CuSOLVER_CU_
#define DLIB_DNN_CuSOLVER_CU_
#ifdef DLIB_USE_CUDA
#include "cusolver_dlibapi.h"
#include <cublas_v2.h>
#include <cusolverDn.h>
#include "cuda_utils.h"
// ----------------------------------------------------------------------------------------
static const char* cusolver_get_error_string(cusolverStatus_t s)
{
switch(s)
{
case CUSOLVER_STATUS_NOT_INITIALIZED:
return "CUDA Runtime API initialization failed.";
case CUSOLVER_STATUS_ALLOC_FAILED:
return "CUDA Resources could not be allocated.";
default:
return "A call to cuSolver failed";
}
}
// Check the return value of a call to the cuSolver runtime for an error condition.
#define CHECK_CUSOLVER(call) \
do{ \
const cusolverStatus_t error = call; \
if (error != CUSOLVER_STATUS_SUCCESS) \
{ \
std::ostringstream sout; \
sout << "Error while calling " << #call << " in file " << __FILE__ << ":" << __LINE__ << ". ";\
sout << "code: " << error << ", reason: " << cusolver_get_error_string(error);\
throw dlib::cusolver_error(sout.str()); \
} \
}while(false)
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
namespace dlib
{
namespace cuda
{
// -----------------------------------------------------------------------------------
class cusolver_context
{
public:
// not copyable
cusolver_context(const cusolver_context&) = delete;
cusolver_context& operator=(const cusolver_context&) = delete;
cusolver_context()
{
handles.resize(16);
}
~cusolver_context()
{
for (auto h : handles)
{
if (h)
cusolverDnDestroy(h);
}
}
cusolverDnHandle_t get_handle (
)
{
int new_device_id;
CHECK_CUDA(cudaGetDevice(&new_device_id));
// make room for more devices if needed
if (new_device_id >= (long)handles.size())
handles.resize(new_device_id+16);
// If we don't have a handle already for this device then make one
if (!handles[new_device_id])
CHECK_CUSOLVER(cusolverDnCreate(&handles[new_device_id]));
// Finally, return the handle for the current device
return handles[new_device_id];
}
private:
std::vector<cusolverDnHandle_t> handles;
};
static cusolverDnHandle_t context()
{
thread_local cusolver_context c;
return c.get_handle();
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
__global__ void _cuda_set_to_identity_matrix(float* m, size_t nr)
{
for (auto j : grid_stride_range(0, nr*nr))
{
if (j%(nr+1) == 0)
m[j] = 1;
else
m[j] = 0;
}
}
void set_to_identity_matrix (
tensor& m
)
{
DLIB_CASSERT(m.size() == m.num_samples()*m.num_samples());
launch_kernel(_cuda_set_to_identity_matrix, max_jobs(m.size()), m.device(), m.num_samples());
}
// ------------------------------------------------------------------------------------
inv::~inv()
{
sync_if_needed();
}
// ------------------------------------------------------------------------------------
void inv::
operator() (
const tensor& m_,
resizable_tensor& out
)
{
DLIB_CASSERT(m_.size() == m_.num_samples()*m_.num_samples(), "Input matrix must be square if you want to invert it.");
m = m_;
out.copy_size(m);
set_to_identity_matrix(out);
const int nc = m.num_samples();
int Lwork;
CHECK_CUSOLVER(cusolverDnSgetrf_bufferSize(context(), nc , nc, m.device(), nc, &Lwork));
if (Lwork > (int)workspace.size())
{
sync_if_needed();
workspace = cuda_data_ptr<float>(Lwork);
}
if (nc > (int)Ipiv.size())
{
sync_if_needed();
Ipiv = cuda_data_ptr<int>(nc);
}
if (info.size() != 1)
{
info = cuda_data_ptr<int>(1);
}
CHECK_CUSOLVER(cusolverDnSgetrf(context(), nc, nc, m.device(), nc, workspace, Ipiv, info));
CHECK_CUSOLVER(cusolverDnSgetrs(context(), CUBLAS_OP_N, nc, nc, m.device(), nc, Ipiv, out.device(), nc, info));
did_work_lately = true;
}
// ------------------------------------------------------------------------------------
int inv::
get_last_status(
)
{
std::vector<int> linfo;
memcpy(linfo, info);
if (linfo.size() != 0)
return linfo[0];
else
return 0;
}
// ------------------------------------------------------------------------------------
void inv::
sync_if_needed()
{
if (did_work_lately)
{
did_work_lately = false;
// make sure we wait until any previous kernel launches have finished
// before we do something like deallocate the GPU memory.
cudaDeviceSynchronize();
}
}
// ------------------------------------------------------------------------------------
}
}
#endif // DLIB_USE_CUDA
#endif // DLIB_DNN_CuSOLVER_CU_
dlib/dnn/cusolver_dlibapi.h
0 → 100644
View file @
373cbb57
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CuSOLVER_H_
#define DLIB_DNN_CuSOLVER_H_
#ifdef DLIB_USE_CUDA
#include "tensor.h"
#include "cuda_errors.h"
#include "cuda_data_ptr.h"
#include "../noncopyable.h"
namespace
dlib
{
namespace
cuda
{
// -----------------------------------------------------------------------------------
class
inv
:
noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a functor for doing matrix inversion on the GPU. The only
reason it's an object is to avoid the reallocation of some GPU memory
blocks if you want to do a bunch of matrix inversions in a row.
!*/
public
:
inv
()
=
default
;
~
inv
();
void
operator
()
(
const
tensor
&
m
,
resizable_tensor
&
out
);
/*!
requires
- m.size() == m.num_samples()*m.num_samples()
(i.e. mat(m) must be a square matrix)
ensures
- out == inv(mat(m));
!*/
int
get_last_status
(
);
/*!
ensures
- returns 0 if the last matrix inversion was successful and != 0
otherwise.
!*/
private
:
void
sync_if_needed
();
bool
did_work_lately
=
false
;
resizable_tensor
m
;
cuda_data_ptr
<
float
>
workspace
;
cuda_data_ptr
<
int
>
Ipiv
;
cuda_data_ptr
<
int
>
info
;
};
// ------------------------------------------------------------------------------------
}
}
#endif // DLIB_USE_CUDA
#endif // DLIB_DNN_CuSOLVER_H_
dlib/dnn/tensor_tools.cpp
View file @
373cbb57
...
...
@@ -792,20 +792,35 @@ namespace dlib { namespace tt
// ------------------------------------------------------------------------------------
void
copy_tensor
(
tensor
&
dest
,
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
)
{
void
copy_tensor
(
tensor
&
dest
,
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
)
{
#ifdef DLIB_USE_CUDA
cuda
::
copy_tensor
(
dest
,
dest_k_offset
,
src
,
src_k_offset
,
count_k
);
cuda
::
copy_tensor
(
dest
,
dest_k_offset
,
src
,
src_k_offset
,
count_k
);
#else
cpu
::
copy_tensor
(
dest
,
dest_k_offset
,
src
,
src_k_offset
,
count_k
);
cpu
::
copy_tensor
(
dest
,
dest_k_offset
,
src
,
src_k_offset
,
count_k
);
#endif
}
}
// ----------------------------------------------------------------------------------------
void
inv
::
operator
()
(
const
tensor
&
m
,
resizable_tensor
&
out
)
{
#ifdef DLIB_USE_CUDA
finv
(
m
,
out
);
#else
out
=
dlib
::
inv
(
m
);
#endif
}
// ----------------------------------------------------------------------------------------
...
...
dlib/dnn/tensor_tools.h
View file @
373cbb57
...
...
@@ -6,6 +6,7 @@
#include "tensor.h"
#include "cudnn_dlibapi.h"
#include "cublas_dlibapi.h"
#include "cusolver_dlibapi.h"
#include "curand_dlibapi.h"
#include "cpu_dlib.h"
#include "cuda_dlib.h"
...
...
@@ -123,6 +124,36 @@ namespace dlib { namespace tt
- performs: dest = alpha*L*R + beta*mat(dest)
!*/
// ----------------------------------------------------------------------------------------
class
inv
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a functor for doing matrix inversion on the GPU. The only
reason it's an object is to avoid the reallocation of some GPU memory
blocks if you want to do a bunch of matrix inversions in a row.
!*/
public
:
void
operator
()
(
const
tensor
&
m
,
resizable_tensor
&
out
);
/*!
requires
- m.size() == m.num_samples()*m.num_samples()
(i.e. mat(m) must be a square matrix)
ensures
- out == inv(mat(m));
!*/
private
:
#ifdef DLIB_USE_CUDA
cuda
::
inv
finv
;
#endif
};
// ----------------------------------------------------------------------------------------
class
tensor_rand
...
...
dlib/test/cublas.cpp
View file @
373cbb57
...
...
@@ -8,7 +8,7 @@
#include <cstdlib>
#include <ctime>
#include <vector>
#include "../dnn/
cublas_dlibapi
.h"
#include "../dnn/
tensor_tools
.h"
#include "tester.h"
...
...
@@ -25,6 +25,26 @@ namespace
logger
dlog
(
"test.cublas"
);
void
test_inv
()
{
tt
::
tensor_rand
rnd
;
dlib
::
tt
::
inv
tinv
;
dlib
::
cuda
::
inv
cinv
;
resizable_tensor
minv1
,
minv2
;
for
(
int
n
=
1
;
n
<
20
;
++
n
)
{
print_spinner
();
resizable_tensor
m
(
n
,
n
);
rnd
.
fill_uniform
(
m
);
tinv
(
m
,
minv1
);
cinv
(
m
,
minv2
);
matrix
<
float
>
mref
=
inv
(
mat
(
m
));
DLIB_TEST_MSG
(
mean
(
abs
(
mref
-
mat
(
minv1
)))
/
mean
(
abs
(
mref
))
<
1e-5
,
mean
(
abs
(
mref
-
mat
(
minv1
)))
/
mean
(
abs
(
mref
))
<<
" n: "
<<
n
);
DLIB_TEST_MSG
(
mean
(
abs
(
mref
-
mat
(
minv2
)))
/
mean
(
abs
(
mref
))
<
1e-5
,
mean
(
abs
(
mref
-
mat
(
minv2
)))
/
mean
(
abs
(
mref
))
<<
" n: "
<<
n
);
}
}
class
cublas_tester
:
public
tester
{
...
...
@@ -38,6 +58,7 @@ namespace
void
perform_test
(
)
{
test_inv
();
{
resizable_tensor
a
(
4
,
3
),
b
(
3
,
4
),
c
(
3
,
3
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment