Commit b54b1c44 authored by Davis King's avatar Davis King

merged

parents f2ad2087 d7e6f1d7
...@@ -118,6 +118,25 @@ namespace dlib ...@@ -118,6 +118,25 @@ namespace dlib
} }
} }
#ifdef WIN32
// This should be pretty much the same as cudaStreamSynchronize, which for some
// reason makes training freeze on some Windows machines.
// (see https://github.com/davisking/dlib/issues/1513)
void synchronize_stream(cudaStream_t stream)
{
while (true)
{
cudaError_t err = cudaStreamQuery(stream);
switch (err)
{
case cudaSuccess: return; // now we are synchronized
case cudaErrorNotReady: break; // continue waiting
default: CHECK_CUDA(err); // unexpected error: throw
}
}
}
#endif // WIN32
void gpu_data:: void gpu_data::
async_copy_to_device() const async_copy_to_device() const
{ {
...@@ -127,7 +146,12 @@ namespace dlib ...@@ -127,7 +146,12 @@ namespace dlib
{ {
// Wait for any possible CUDA kernels that might be using our memory block to // Wait for any possible CUDA kernels that might be using our memory block to
// complete before we overwrite the memory. // complete before we overwrite the memory.
#ifdef WIN32
synchronize_stream(0);
#else
CHECK_CUDA(cudaStreamSynchronize(0)); CHECK_CUDA(cudaStreamSynchronize(0));
#endif
device_in_use = false; device_in_use = false;
} }
CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get())); CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get()));
......
...@@ -195,19 +195,19 @@ namespace dlib ...@@ -195,19 +195,19 @@ namespace dlib
for (long i = 0; i < output_tensor.num_samples(); ++i) for (long i = 0; i < output_tensor.num_samples(); ++i)
{ {
const float y = *truth++; const float y = *truth++;
DLIB_CASSERT(y == +1 || y == -1, "y: " << y); DLIB_CASSERT(y != 0, "y: " << y);
float temp; float temp;
if (y > 0) if (y > 0)
{ {
temp = log1pexp(-out_data[i]); temp = log1pexp(-out_data[i]);
loss += scale*temp; loss += y*scale*temp;
g[i] = scale*(g[i]-1); g[i] = y*scale*(g[i]-1);
} }
else else
{ {
temp = -(-out_data[i]-log1pexp(-out_data[i])); temp = -(-out_data[i]-log1pexp(-out_data[i]));
loss += scale*temp; loss += -y*scale*temp;
g[i] = scale*g[i]; g[i] = -y*scale*g[i];
} }
} }
return loss; return loss;
......
...@@ -232,10 +232,16 @@ namespace dlib ...@@ -232,10 +232,16 @@ namespace dlib
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements the log loss, which is EXAMPLE_LOSS_LAYER_. In particular, it implements the log loss, which is
appropriate for binary classification problems. Therefore, the possible appropriate for binary classification problems. Therefore, there are two possible
labels when using this loss are +1 and -1. Moreover, it will cause the classes of labels: positive (> 0) and negative (< 0) when using this loss.
network to produce outputs > 0 when predicting a member of the +1 class and The absolute value of the label represents its weight. Putting a larger weight
values < 0 otherwise. on a sample increases the importance of getting its prediction correct during
training. A good rule of thumb is to use weights with absolute value 1 unless
you have a very unbalanced training dataset, in that case, give larger weight
to the class with less training examples.
This loss will cause the network to produce outputs > 0 when predicting a
member of the positive class and values < 0 otherwise.
To be more specific, this object contains a sigmoid layer followed by a To be more specific, this object contains a sigmoid layer followed by a
cross-entropy layer. cross-entropy layer.
......
...@@ -420,14 +420,14 @@ namespace dlib ...@@ -420,14 +420,14 @@ namespace dlib
// pick a random bootstrap of the data. // pick a random bootstrap of the data.
std::vector<std::pair<float,uint32_t>> idxs(y.size()); std::vector<std::pair<float,uint32_t>> idxs(y.size());
for (auto& idx : idxs) for (auto& idx : idxs)
idx = std::make_pair(0,rnd.get_integer(y.size())); idx = std::make_pair(0.0f, static_cast<uint32_t>(rnd.get_integer(y.size())));
// We are going to use ranges_to_process as a stack that tracks which // We are going to use ranges_to_process as a stack that tracks which
// range of samples we are going to split next. // range of samples we are going to split next.
std::vector<range_t> ranges_to_process; std::vector<range_t> ranges_to_process;
// start with the root of the tree, i.e. the entire range of training // start with the root of the tree, i.e. the entire range of training
// samples. // samples.
ranges_to_process.emplace_back(sumy,0,y.size()); ranges_to_process.emplace_back(sumy, 0, static_cast<uint32_t>(y.size()));
// push an unpopulated root node into the tree. We will populate it // push an unpopulated root node into the tree. We will populate it
// when we process its corresponding range. // when we process its corresponding range.
tree.emplace_back(); tree.emplace_back();
...@@ -477,7 +477,7 @@ namespace dlib ...@@ -477,7 +477,7 @@ namespace dlib
// Add to leaves. Don't forget to set the pointer in the // Add to leaves. Don't forget to set the pointer in the
// parent node to the newly allocated leaf node. // parent node to the newly allocated leaf node.
tree[range.tree_idx].left = leaves.size() + max_num_nodes; tree[range.tree_idx].left = leaves.size() + max_num_nodes;
leaves.emplace_back(left_split.avg()); leaves.emplace_back(static_cast<float>(left_split.avg()));
} }
...@@ -501,7 +501,7 @@ namespace dlib ...@@ -501,7 +501,7 @@ namespace dlib
// Add to leaves. Don't forget to set the pointer in the // Add to leaves. Don't forget to set the pointer in the
// parent node to the newly allocated leaf node. // parent node to the newly allocated leaf node.
tree[range.tree_idx].right = leaves.size() + max_num_nodes; tree[range.tree_idx].right = leaves.size() + max_num_nodes;
leaves.emplace_back(right_split.avg()); leaves.emplace_back(static_cast<float>(right_split.avg()));
} }
} // end while (still building tree) } // end while (still building tree)
......
...@@ -129,7 +129,7 @@ std::shared_ptr<simple_object_detector_py> merge_simple_object_detectors ( ...@@ -129,7 +129,7 @@ std::shared_ptr<simple_object_detector_py> merge_simple_object_detectors (
{ {
DLIB_CASSERT(len(detectors) > 0); DLIB_CASSERT(len(detectors) > 0);
std::vector<simple_object_detector> temp; std::vector<simple_object_detector> temp;
for (auto& d : detectors) for (const auto& d : detectors)
temp.push_back(d.cast<simple_object_detector_py>().detector); temp.push_back(d.cast<simple_object_detector_py>().detector);
simple_object_detector_py result; simple_object_detector_py result;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment