Commit d7fb51e6 authored by Juha Reunanen's avatar Juha Reunanen Committed by Davis E. King

Fix #1849 by calling device_global_buffer() unconditionally (#1862)

* Hold on to the CUDA buffer - second try
see: https://github.com/davisking/dlib/pull/1855#discussion_r314666348

* Fix #1849 by calling device_global_buffer() unconditionally
parent b02be8c7
...@@ -438,16 +438,12 @@ namespace dlib ...@@ -438,16 +438,12 @@ namespace dlib
const size_t bytes_per_plane = subnetwork_output.nr()*subnetwork_output.nc()*sizeof(uint16_t); const size_t bytes_per_plane = subnetwork_output.nr()*subnetwork_output.nc()*sizeof(uint16_t);
// Allocate a cuda buffer to store all the truth images and also one float // Allocate a cuda buffer to store all the truth images and also one float
// for the scalar loss output. // for the scalar loss output.
if (!work) work = device_global_buffer();
{ buf = work->get(subnetwork_output.num_samples()*bytes_per_plane + sizeof(float));
work = device_global_buffer();
}
cuda_data_void_ptr buf = work->get(subnetwork_output.num_samples()*bytes_per_plane + sizeof(float));
cuda_data_void_ptr loss_buf = buf; cuda_data_void_ptr loss_buf = buf;
buf = buf+sizeof(float); buf = buf+sizeof(float);
// copy the truth data into a cuda buffer. // copy the truth data into a cuda buffer.
for (long i = 0; i < subnetwork_output.num_samples(); ++i, ++truth) for (long i = 0; i < subnetwork_output.num_samples(); ++i, ++truth)
{ {
...@@ -471,6 +467,7 @@ namespace dlib ...@@ -471,6 +467,7 @@ namespace dlib
); );
mutable std::shared_ptr<resizable_cuda_buffer> work; mutable std::shared_ptr<resizable_cuda_buffer> work;
mutable cuda_data_void_ptr buf;
}; };
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment