Commit 5c623e3c authored by Juha Reunanen's avatar Juha Reunanen Committed by Davis E. King

Apply the #1514 fix even on non-Windows platforms, and change all remaining…

Apply the #1514 fix even on non-Windows platforms, and change all remaining cudaStreamSynchronize calls (#1596)

* Apply the #1514 fix even on non-Windows platforms

* Try to fix #1513 even more by circumventing the remaining cudaStreamSynchronize calls

* Make the fix apply only if CUDA_VERSION == 10000

* Make the fix apply on CUDA 9.2 also

* CHECK_CUDA(cudaStreamSynchronize(stream));
parent 9ca72b3e
...@@ -79,12 +79,33 @@ namespace dlib ...@@ -79,12 +79,33 @@ namespace dlib
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void synchronize_stream(cudaStream_t stream)
{
#if CUDA_VERSION >= 9020 && CUDA_VERSION <= 10000
// This should be pretty much the same as cudaStreamSynchronize, which for some
// reason makes training freeze in some cases.
// (see https://github.com/davisking/dlib/issues/1513)
while (true)
{
cudaError_t err = cudaStreamQuery(stream);
switch (err)
{
case cudaSuccess: return; // now we are synchronized
case cudaErrorNotReady: break; // continue waiting
default: CHECK_CUDA(err); // unexpected error: throw
}
}
#else // CUDA_VERSION
CHECK_CUDA(cudaStreamSynchronize(stream));
#endif // CUDA_VERSION
}
void gpu_data:: void gpu_data::
wait_for_transfer_to_finish() const wait_for_transfer_to_finish() const
{ {
if (have_active_transfer) if (have_active_transfer)
{ {
CHECK_CUDA(cudaStreamSynchronize((cudaStream_t)cuda_stream.get())); synchronize_stream((cudaStream_t)cuda_stream.get());
have_active_transfer = false; have_active_transfer = false;
// Check for errors. These calls to cudaGetLastError() are what help us find // Check for errors. These calls to cudaGetLastError() are what help us find
// out if our kernel launches have been failing. // out if our kernel launches have been failing.
...@@ -118,25 +139,6 @@ namespace dlib ...@@ -118,25 +139,6 @@ namespace dlib
} }
} }
#ifdef WIN32
// This should be pretty much the same as cudaStreamSynchronize, which for some
// reason makes training freeze on some Windows machines.
// (see https://github.com/davisking/dlib/issues/1513)
void synchronize_stream(cudaStream_t stream)
{
while (true)
{
cudaError_t err = cudaStreamQuery(stream);
switch (err)
{
case cudaSuccess: return; // now we are synchronized
case cudaErrorNotReady: break; // continue waiting
default: CHECK_CUDA(err); // unexpected error: throw
}
}
}
#endif // WIN32
void gpu_data:: void gpu_data::
async_copy_to_device() const async_copy_to_device() const
{ {
...@@ -146,12 +148,7 @@ namespace dlib ...@@ -146,12 +148,7 @@ namespace dlib
{ {
// Wait for any possible CUDA kernels that might be using our memory block to // Wait for any possible CUDA kernels that might be using our memory block to
// complete before we overwrite the memory. // complete before we overwrite the memory.
#ifdef WIN32
synchronize_stream(0); synchronize_stream(0);
#else
CHECK_CUDA(cudaStreamSynchronize(0));
#endif
device_in_use = false; device_in_use = false;
} }
CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get())); CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get()));
...@@ -171,7 +168,7 @@ namespace dlib ...@@ -171,7 +168,7 @@ namespace dlib
{ {
// Wait for any possible CUDA kernels that might be using our memory block to // Wait for any possible CUDA kernels that might be using our memory block to
// complete before we free the memory. // complete before we free the memory.
CHECK_CUDA(cudaStreamSynchronize(0)); synchronize_stream(0);
device_in_use = false; device_in_use = false;
} }
wait_for_transfer_to_finish(); wait_for_transfer_to_finish();
...@@ -188,7 +185,7 @@ namespace dlib ...@@ -188,7 +185,7 @@ namespace dlib
{ {
// Wait for any possible CUDA kernels that might be using our memory block to // Wait for any possible CUDA kernels that might be using our memory block to
// complete before we free the memory. // complete before we free the memory.
CHECK_CUDA(cudaStreamSynchronize(0)); synchronize_stream(0);
device_in_use = false; device_in_use = false;
} }
wait_for_transfer_to_finish(); wait_for_transfer_to_finish();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment