merged

b54b1c44 · Davis King · f2ad2087 · d7e6f1d7 · b54b1c44 · b54b1c44
Commit b54b1c44 authored Nov 12, 2018 by Davis King
5 changed files
--- a/dlib/cuda/gpu_data.cpp
+++ b/dlib/cuda/gpu_data.cpp
@@ -118,6 +118,25 @@ namespace dlib
        }
    }

+#ifdef WIN32
+    // This should be pretty much the same as cudaStreamSynchronize, which for some
+    // reason makes training freeze on some Windows machines.
+    // (see https://github.com/davisking/dlib/issues/1513)
+    void synchronize_stream(cudaStream_t stream)
+    {
+        while (true)
+        {
+            cudaError_t err = cudaStreamQuery(stream);
+            switch (err)
+            {
+            case cudaSuccess: return;      // now we are synchronized
+            case cudaErrorNotReady: break; // continue waiting
+            default: CHECK_CUDA(err);      // unexpected error: throw
+            }
+        }
+    }
+#endif // WIN32
+
    void gpu_data::
    async_copy_to_device() const
    {
@@ -127,7 +146,12 @@ namespace dlib
            {
                // Wait for any possible CUDA kernels that might be using our memory block to
                // complete before we overwrite the memory.
+#ifdef WIN32
+                synchronize_stream(0);
+#else
                CHECK_CUDA(cudaStreamSynchronize(0));
+#endif
+
                device_in_use = false;
            }
            CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get()));

--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -195,19 +195,19 @@ namespace dlib
            for (long i = 0; i < output_tensor.num_samples(); ++i)
            {
                const float y = *truth++;
-                DLIB_CASSERT(y == +1 || y == -1, "y: " << y);
+                DLIB_CASSERT(y != 0, "y: " << y);
                float temp;
                if (y > 0)
                {
                    temp = log1pexp(-out_data[i]);
-                    loss += scale*temp;
-                    g[i] = scale*(g[i]-1);
+                    loss += y*scale*temp;
+                    g[i] = y*scale*(g[i]-1);
                }
                else
                {
                    temp = -(-out_data[i]-log1pexp(-out_data[i]));
-                    loss += scale*temp;
-                    g[i] = scale*g[i];
+                    loss += -y*scale*temp;
+                    g[i] = -y*scale*g[i];
                }
            }
            return loss;

--- a/dlib/dnn/loss_abstract.h
+++ b/dlib/dnn/loss_abstract.h
@@ -232,10 +232,16 @@ namespace dlib
            WHAT THIS OBJECT REPRESENTS
                This object implements the loss layer interface defined above by
                EXAMPLE_LOSS_LAYER_.  In particular, it implements the log loss, which is
-                appropriate for binary classification problems.  Therefore, the possible
-                labels when using this loss are +1 and -1.  Moreover, it will cause the
-                network to produce outputs > 0 when predicting a member of the +1 class and
-                values < 0 otherwise.
+                appropriate for binary classification problems.  Therefore, there are two possible
+                classes of labels: positive (> 0) and negative (< 0) when using this loss.
+                The absolute value of the label represents its weight.  Putting a larger weight
+                on a sample increases the importance of getting its prediction correct during 
+                training.  A good rule of thumb is to use weights with absolute value 1 unless 
+                you have a very unbalanced training dataset, in that case, give larger weight
+                to the class with less training examples.
+                
+                This loss will cause the network to produce outputs > 0 when predicting a
+                member of the positive class and values < 0 otherwise.

                To be more specific, this object contains a sigmoid layer followed by a 
                cross-entropy layer.  

--- a/dlib/random_forest/random_forest_regression.h
+++ b/dlib/random_forest/random_forest_regression.h
@@ -420,14 +420,14 @@ namespace dlib
                // pick a random bootstrap of the data.
                std::vector<std::pair<float,uint32_t>> idxs(y.size());
                for (auto& idx : idxs)
-                    idx = std::make_pair(0,rnd.get_integer(y.size()));
+                    idx = std::make_pair(0.0f, static_cast<uint32_t>(rnd.get_integer(y.size())));

                // We are going to use ranges_to_process as a stack that tracks which
                // range of samples we are going to split next.
                std::vector<range_t> ranges_to_process;
                // start with the root of the tree, i.e. the entire range of training
                // samples.
-                ranges_to_process.emplace_back(sumy,0,y.size());
+                ranges_to_process.emplace_back(sumy, 0, static_cast<uint32_t>(y.size()));
                // push an unpopulated root node into the tree.  We will populate it
                // when we process its corresponding range. 
                tree.emplace_back();
@@ -477,7 +477,7 @@ namespace dlib
                        // Add to leaves.  Don't forget to set the pointer in the
                        // parent node to the newly allocated leaf node.
                        tree[range.tree_idx].left = leaves.size() + max_num_nodes;
-                        leaves.emplace_back(left_split.avg()); 
+                        leaves.emplace_back(static_cast<float>(left_split.avg()));
                    }


@@ -501,7 +501,7 @@ namespace dlib
                        // Add to leaves.  Don't forget to set the pointer in the
                        // parent node to the newly allocated leaf node.
                        tree[range.tree_idx].right = leaves.size() + max_num_nodes;
-                        leaves.emplace_back(right_split.avg()); 
+                        leaves.emplace_back(static_cast<float>(right_split.avg()));
                    }
                } // end while (still building tree)


--- a/tools/python/src/object_detection.cpp
+++ b/tools/python/src/object_detection.cpp
@@ -129,7 +129,7 @@ std::shared_ptr<simple_object_detector_py> merge_simple_object_detectors (
 {
    DLIB_CASSERT(len(detectors) > 0);
    std::vector<simple_object_detector> temp;
-    for (auto& d : detectors)
+    for (const auto& d : detectors)
        temp.push_back(d.cast<simple_object_detector_py>().detector);

    simple_object_detector_py result;