Allow batched face recognition for greater performances (#1335)

ec342e39 · Guillaume "Vermeille" Sanchez · Davis E. King · eded3773 · ec342e39
Commit ec342e39 authored Jun 27, 2018 by Guillaume "Vermeille" Sanchez Committed by Davis E. King Jun 27, 2018
Hide whitespace changes
Inline Side-by-side

Showing with 64 additions and 13 deletions

face_recognition.cpp tools/python/src/face_recognition.cpp +64 -13

No files found.
--- a/tools/python/src/face_recognition.cpp
+++ b/tools/python/src/face_recognition.cpp
@@ -11,6 +11,7 @@
 #include <dlib/image_io.h>
 #include <dlib/clustering.h>
 #include <pybind11/stl_bind.h>
+#include <pybind11/stl.h>
 using namespace dlib;
@@ -47,33 +48,78 @@ public:
        const int num_jitters
    )
    {
+        std::vector<numpy_image<rgb_pixel>> batch_img(1, img);
+        std::vector<std::vector<full_object_detection>> batch_faces(1, faces);
+        return batch_compute_face_descriptors(batch_img, batch_faces, num_jitters)[0];
+    }
+    std::vector<std::vector<matrix<double,0,1>>> batch_compute_face_descriptors (
+        const std::vector<numpy_image<rgb_pixel>>& batch_imgs,
+        const std::vector<std::vector<full_object_detection>>& batch_faces,
+        const int num_jitters
+    )
+    {
+        if (batch_imgs.size() != batch_faces.size())
+            throw dlib::error("The array of images and the array of array of locations must be of the same size");
-        for (auto& f : faces)
+        int total_chips = 0;
+        for (auto& faces : batch_faces)
        {
-            if (f.num_parts() != 68 && f.num_parts() != 5)
+            total_chips += faces.size();
-                throw dlib::error("The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style.");
+            for (auto& f : faces)
+            {
+                if (f.num_parts() != 68 && f.num_parts() != 5)
+                    throw dlib::error("The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style.");
+            }
        }
-        std::vector<chip_details> dets;
-        for (auto& f : faces)
-            dets.push_back(get_face_chip_details(f, 150, 0.25));
        dlib::array<matrix<rgb_pixel>> face_chips;
-        extract_image_chips(img, dets, face_chips);
+        for (int i = 0; i < batch_imgs.size(); ++i)
+        {
+            auto& faces = batch_faces[i];
+            auto& img = batch_imgs[i];
+            std::vector<chip_details> dets;
+            for (auto& f : faces)
+                dets.push_back(get_face_chip_details(f, 150, 0.25));
+            dlib::array<matrix<rgb_pixel>> this_img_face_chips;
+            extract_image_chips(img, dets, this_img_face_chips);
-        std::vector<matrix<double,0,1>> face_descriptors;
+            for (auto& chip : this_img_face_chips)
-        face_descriptors.reserve(face_chips.size());
+                face_chips.push_back(chip);
+        }
+        std::vector<std::vector<matrix<double,0,1>>> face_descriptors(batch_imgs.size());
        if (num_jitters <= 1)
        {
            // extract descriptors and convert from float vectors to double vectors
-            for (auto& d : net(face_chips,16))
+            auto descriptors = net(face_chips, 16);
-                face_descriptors.push_back(matrix_cast<double>(d));
+            auto next = std::begin(descriptors);
+            for (int i = 0; i < batch_faces.size(); ++i)
+            {
+                for (int j = 0; j < batch_faces[i].size(); ++j)
+                {
+                    face_descriptors[i].push_back(matrix_cast<double>(*next++));
+                }
+            }
+            DLIB_ASSERT(next == std::end(descriptors));
        }
        else
        {
-            for (auto& fimg : face_chips)
+            // extract descriptors and convert from float vectors to double vectors
-                face_descriptors.push_back(matrix_cast<double>(mean(mat(net(jitter_image(fimg,num_jitters),16)))));
+            auto fimg = std::begin(face_chips);
+            for (int i = 0; i < batch_faces.size(); ++i)
+            {
+                for (int j = 0; j < batch_faces[i].size(); ++j)
+                {
+                    auto& r = matrix_cast<double>(mean(mat(net(
+                                    jitter_image(*fimg++, num_jitters), 16))));
+                    face_descriptors[i].push_back(matrix_cast<double>(r));
+                }
+            }
+            DLIB_ASSERT(fimg == std::end(face_chips));
        }
        return face_descriptors;
@@ -214,6 +260,11 @@ void bind_face_recognition(py::module &m)
        .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, py::arg("img"),py::arg("faces"),py::arg("num_jitters")=0,
            "Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors.  "
            "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor."
+            )
+        .def("compute_face_descriptor", &face_recognition_model_v1::batch_compute_face_descriptors, py::arg("batch_img"),py::arg("batch_faces"),py::arg("num_jitters")=0,
+            "Takes an array of images and an array of arrays of full_object_detections. `batch_faces[i]` must be an array of full_object_detections corresponding to the image `batch_img[i]`, "
+            "referencing faces in that image. Every face will be converting into 128D face descriptors.  "
+            "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor."
            );
    }