Commit bb8e0bc8 authored by Davis King's avatar Davis King

More cleanup

parent 0ce6ed5b
...@@ -59,6 +59,55 @@ namespace dlib ...@@ -59,6 +59,55 @@ namespace dlib
} }
} }
// ------------------------------------------------------------------------------------
class cudnn_device_buffer
{
public:
// not copyable
cudnn_device_buffer(const cudnn_device_buffer&) = delete;
cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
cudnn_device_buffer()
{
buffers.resize(16);
}
~cudnn_device_buffer()
{
}
std::shared_ptr<resizable_cuda_buffer> get_buffer (
)
{
int new_device_id;
CHECK_CUDA(cudaGetDevice(&new_device_id));
// make room for more devices if needed
if (new_device_id >= (long)buffers.size())
buffers.resize(new_device_id+16);
// If we don't have a buffer already for this device then make one
std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
if (!buff)
{
buff = std::make_shared<resizable_cuda_buffer>();
buffers[new_device_id] = buff;
}
// Finally, return the buffer for the current device
return buff;
}
private:
std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
};
std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
{
thread_local cudnn_device_buffer buffer;
return buffer.get_buffer();
}
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
} }
......
...@@ -179,6 +179,26 @@ namespace dlib ...@@ -179,6 +179,26 @@ namespace dlib
cuda_data_void_ptr buffer; cuda_data_void_ptr buffer;
}; };
// ----------------------------------------------------------------------------------------
std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
);
/*!
ensures
- Returns a pointer to a globally shared CUDA memory buffer on the
currently selected CUDA device. The buffer is also thread local. So
each host thread will get its own buffer. You can use this global buffer
as scratch space for CUDA computations that all take place on the default
stream. Using it in this way ensures that there aren't any race conditions
involving the use of the buffer.
- The global buffer is deallocated once all references to it are
destructed. It will be reallocated as required. So if you want to avoid
these reallocations then hold a copy of the shared_ptr returned by this
function.
!*/
// ----------------------------------------------------------------------------------------
} }
} }
......
...@@ -117,55 +117,6 @@ namespace dlib ...@@ -117,55 +117,6 @@ namespace dlib
thread_local cudnn_context c; thread_local cudnn_context c;
return c.get_handle(); return c.get_handle();
} }
// ------------------------------------------------------------------------------------
class cudnn_device_buffer
{
public:
// not copyable
cudnn_device_buffer(const cudnn_device_buffer&) = delete;
cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
cudnn_device_buffer()
{
buffers.resize(16);
}
~cudnn_device_buffer()
{
}
std::shared_ptr<resizable_cuda_buffer> get_buffer (
)
{
int new_device_id;
CHECK_CUDA(cudaGetDevice(&new_device_id));
// make room for more devices if needed
if (new_device_id >= (long)buffers.size())
buffers.resize(new_device_id+16);
// If we don't have a buffer already for this device then make one
std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
if (!buff)
{
buff = std::make_shared<resizable_cuda_buffer>();
buffers[new_device_id] = buff;
}
// Finally, return the buffer for the current device
return buff;
}
private:
std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
};
std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
{
thread_local cudnn_device_buffer buffer;
return buffer.get_buffer();
}
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
class cudnn_activation_descriptor class cudnn_activation_descriptor
......
...@@ -17,24 +17,6 @@ namespace dlib ...@@ -17,24 +17,6 @@ namespace dlib
namespace cuda namespace cuda
{ {
// ----------------------------------------------------------------------------------------
std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
);
/*!
ensures
- Returns a pointer to a globally shared CUDA memory buffer on the
currently selected CUDA device. The buffer is also thread local. So
each host thread will get its own buffer. You can use this global buffer
as scratch space for CUDA computations that all take place on the default
stream. Using it in this way ensures that there aren't any race conditions
involving the use of the buffer.
- The global buffer is deallocated once all references to it are
destructed. It will be reallocated as required. So if you want to avoid
these reallocations then hold a copy of the shared_ptr returned by this
function.
!*/
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
class tensor_descriptor class tensor_descriptor
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment