Commit b54b1c44 authored by Davis King's avatar Davis King

merged

parents f2ad2087 d7e6f1d7
......@@ -118,6 +118,25 @@ namespace dlib
}
}
#ifdef WIN32
// This should be pretty much the same as cudaStreamSynchronize, which for some
// reason makes training freeze on some Windows machines.
// (see https://github.com/davisking/dlib/issues/1513)
void synchronize_stream(cudaStream_t stream)
{
while (true)
{
cudaError_t err = cudaStreamQuery(stream);
switch (err)
{
case cudaSuccess: return; // now we are synchronized
case cudaErrorNotReady: break; // continue waiting
default: CHECK_CUDA(err); // unexpected error: throw
}
}
}
#endif // WIN32
void gpu_data::
async_copy_to_device() const
{
......@@ -127,7 +146,12 @@ namespace dlib
{
// Wait for any possible CUDA kernels that might be using our memory block to
// complete before we overwrite the memory.
#ifdef WIN32
synchronize_stream(0);
#else
CHECK_CUDA(cudaStreamSynchronize(0));
#endif
device_in_use = false;
}
CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get()));
......
......@@ -195,19 +195,19 @@ namespace dlib
for (long i = 0; i < output_tensor.num_samples(); ++i)
{
const float y = *truth++;
DLIB_CASSERT(y == +1 || y == -1, "y: " << y);
DLIB_CASSERT(y != 0, "y: " << y);
float temp;
if (y > 0)
{
temp = log1pexp(-out_data[i]);
loss += scale*temp;
g[i] = scale*(g[i]-1);
loss += y*scale*temp;
g[i] = y*scale*(g[i]-1);
}
else
{
temp = -(-out_data[i]-log1pexp(-out_data[i]));
loss += scale*temp;
g[i] = scale*g[i];
loss += -y*scale*temp;
g[i] = -y*scale*g[i];
}
}
return loss;
......
......@@ -232,10 +232,16 @@ namespace dlib
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements the log loss, which is
appropriate for binary classification problems. Therefore, the possible
labels when using this loss are +1 and -1. Moreover, it will cause the
network to produce outputs > 0 when predicting a member of the +1 class and
values < 0 otherwise.
appropriate for binary classification problems. Therefore, there are two possible
classes of labels: positive (> 0) and negative (< 0) when using this loss.
The absolute value of the label represents its weight. Putting a larger weight
on a sample increases the importance of getting its prediction correct during
training. A good rule of thumb is to use weights with absolute value 1 unless
you have a very unbalanced training dataset, in that case, give larger weight
to the class with less training examples.
This loss will cause the network to produce outputs > 0 when predicting a
member of the positive class and values < 0 otherwise.
To be more specific, this object contains a sigmoid layer followed by a
cross-entropy layer.
......
......@@ -420,14 +420,14 @@ namespace dlib
// pick a random bootstrap of the data.
std::vector<std::pair<float,uint32_t>> idxs(y.size());
for (auto& idx : idxs)
idx = std::make_pair(0,rnd.get_integer(y.size()));
idx = std::make_pair(0.0f, static_cast<uint32_t>(rnd.get_integer(y.size())));
// We are going to use ranges_to_process as a stack that tracks which
// range of samples we are going to split next.
std::vector<range_t> ranges_to_process;
// start with the root of the tree, i.e. the entire range of training
// samples.
ranges_to_process.emplace_back(sumy,0,y.size());
ranges_to_process.emplace_back(sumy, 0, static_cast<uint32_t>(y.size()));
// push an unpopulated root node into the tree. We will populate it
// when we process its corresponding range.
tree.emplace_back();
......@@ -477,7 +477,7 @@ namespace dlib
// Add to leaves. Don't forget to set the pointer in the
// parent node to the newly allocated leaf node.
tree[range.tree_idx].left = leaves.size() + max_num_nodes;
leaves.emplace_back(left_split.avg());
leaves.emplace_back(static_cast<float>(left_split.avg()));
}
......@@ -501,7 +501,7 @@ namespace dlib
// Add to leaves. Don't forget to set the pointer in the
// parent node to the newly allocated leaf node.
tree[range.tree_idx].right = leaves.size() + max_num_nodes;
leaves.emplace_back(right_split.avg());
leaves.emplace_back(static_cast<float>(right_split.avg()));
}
} // end while (still building tree)
......
......@@ -129,7 +129,7 @@ std::shared_ptr<simple_object_detector_py> merge_simple_object_detectors (
{
DLIB_CASSERT(len(detectors) > 0);
std::vector<simple_object_detector> temp;
for (auto& d : detectors)
for (const auto& d : detectors)
temp.push_back(d.cast<simple_object_detector_py>().detector);
simple_object_detector_py result;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment