Add python api that generates desciptor(s) from the aligned image(s) (#1667)

* Add python api that generates desciptor(s) from the aligned image(s) * Remove asserts from face_recognition.py example/tutorial * In batch_compute_face_descriptors_from_aligned_images, use for-in loop to simplify the code Improvde the document on binding methods and the error message if the aligned image is not of size 150x150

Add python api that generates desciptor(s) from the aligned image(s) (#1667)
* Add python api that generates desciptor(s) from the aligned image(s) * Remove asserts from face_recognition.py example/tutorial * In batch_compute_face_descriptors_from_aligned_images, use for-in loop to simplify the code Improvde the document on binding methods and the error message if the aligned image is not of size 150x150
f7f6f676 · Kapil Sachdeva · Davis E. King · 04a2387c · f7f6f676 · f7f6f676
Commit f7f6f676 authored Feb 26, 2019 by Kapil Sachdeva Committed by Davis E. King Feb 26, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 87 additions and 2 deletions

face_recognition.py python_examples/face_recognition.py +17 -0

face_recognition.cpp tools/python/src/face_recognition.cpp +70 -2

No files found.
--- a/python_examples/face_recognition.py
+++ b/python_examples/face_recognition.py
@@ -114,7 +114,24 @@ for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
        # In particular, a padding of 0.5 would double the width of the cropped area, a value of 1.
        # would triple it, and so forth.

+        # There is another overload of compute_face_descriptor that can take
+        # as an input an aligned image. 
+        #
+        # Note that it is important to generate the aligned image as
+        # dlib.get_face_chip would do it i.e. the size must be 150x150, 
+        # centered and scaled.
+        #
+        # Here is a sample usage of that

+        print("Computing descriptor on aligned image ..")
+        
+        # Let's generate the aligned image using get_face_chip
+        face_chip = dlib.get_face_chip(img, shape)        
+
+        # Now we simply pass this chip (aligned image) to the api
+        face_descriptor_from_prealigned_image = facerec.compute_face_descriptor(face_chip)                
+        print(face_descriptor_from_prealigned_image)        
+        
        dlib.hit_enter_to_continue()


--- a/tools/python/src/face_recognition.cpp
+++ b/tools/python/src/face_recognition.cpp
@@ -43,6 +43,15 @@ public:
        return compute_face_descriptors(img, faces, num_jitters, padding)[0];
    }

+    matrix<double,0,1> compute_face_descriptor_from_aligned_image (
+        numpy_image<rgb_pixel> img,        
+        const int num_jitters
+    )
+    {
+        std::vector<numpy_image<rgb_pixel>> images{img};        
+        return batch_compute_face_descriptors_from_aligned_images(images, num_jitters)[0];                
+    }
+
    std::vector<matrix<double,0,1>> compute_face_descriptors (
        numpy_image<rgb_pixel> img,
        const std::vector<full_object_detection>& faces,
@@ -53,7 +62,7 @@ public:
        std::vector<numpy_image<rgb_pixel>> batch_img(1, img);
        std::vector<std::vector<full_object_detection>> batch_faces(1, faces);
        return batch_compute_face_descriptors(batch_img, batch_faces, num_jitters, padding)[0];
-    }
+    }       

    std::vector<std::vector<matrix<double,0,1>>> batch_compute_face_descriptors (
        const std::vector<numpy_image<rgb_pixel>>& batch_imgs,
@@ -127,6 +136,52 @@ public:
        return face_descriptors;
    }

+    std::vector<matrix<double,0,1>> batch_compute_face_descriptors_from_aligned_images (
+        const std::vector<numpy_image<rgb_pixel>>& batch_imgs,        
+        const int num_jitters
+    )
+    {
+        dlib::array<matrix<rgb_pixel>> face_chips;           
+        for (auto& img : batch_imgs) {
+
+            matrix<rgb_pixel> image;
+            if (is_image<unsigned char>(img))
+                assign_image(image, numpy_image<unsigned char>(img));
+            else if (is_image<rgb_pixel>(img))
+                assign_image(image, numpy_image<rgb_pixel>(img));
+            else
+                throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
+
+            // Check for the size of the image
+            if ((image.nr() != 150) || (image.nc() != 150)) {
+                throw dlib::error("Unsupported image size, it should be of size 150x150. Also cropping must be done as `dlib.get_face_chip` would do it. \
+                That is, centered and scaled essentially the same way.");
+            }
+
+            face_chips.push_back(image);        
+        }       
+
+        std::vector<matrix<double,0,1>> face_descriptors;
+        if (num_jitters <= 1)
+        {
+            // extract descriptors and convert from float vectors to double vectors
+            auto descriptors = net(face_chips, 16);      
+
+            for (auto& des: descriptors) {
+                face_descriptors.push_back(matrix_cast<double>(des));
+            }       
+        }
+        else
+        {
+            // extract descriptors and convert from float vectors to double vectors
+            for (auto& fimg : face_chips) {
+                auto& r = mean(mat(net(jitter_image(fimg, num_jitters), 16)));
+                face_descriptors.push_back(matrix_cast<double>(r)); 
+            }
+        }        
+        return face_descriptors;        
+    }
+
 private:

    dlib::rand rnd;
@@ -300,6 +355,12 @@ void bind_face_recognition(py::module &m)
            "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor. "
            "Optionally allows to override default padding of 0.25 around the face."
            )
+        .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptor_from_aligned_image,
+            py::arg("img"), py::arg("num_jitters")=0,
+            "Takes an aligned face image of size 150x150 and converts it into a 128D face descriptor."
+            "Note that the alignment should be done in the same way dlib.get_face_chip does it."
+            "If num_jitters>1 then image will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor. "            
+            )
        .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors,
            py::arg("img"), py::arg("faces"), py::arg("num_jitters")=0, py::arg("padding")=0.25,
            "Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors.  "
@@ -309,9 +370,16 @@ void bind_face_recognition(py::module &m)
        .def("compute_face_descriptor", &face_recognition_model_v1::batch_compute_face_descriptors,
            py::arg("batch_img"), py::arg("batch_faces"), py::arg("num_jitters")=0, py::arg("padding")=0.25,
            "Takes an array of images and an array of arrays of full_object_detections. `batch_faces[i]` must be an array of full_object_detections corresponding to the image `batch_img[i]`, "
-            "referencing faces in that image. Every face will be converting into 128D face descriptors.  "
+            "referencing faces in that image. Every face will be converted into 128D face descriptors.  "
            "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor. "
            "Optionally allows to override default padding of 0.25 around the face."
+            )
+        .def("compute_face_descriptor", &face_recognition_model_v1::batch_compute_face_descriptors_from_aligned_images,
+            py::arg("batch_img"), py::arg("num_jitters")=0,
+            "Takes an array of aligned images of faces of size 150_x_150."
+            "Note that the alignment should be done in the same way dlib.get_face_chip does it."
+            "Every face will be converted into 128D face descriptors.  "
+            "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor. "            
            );
    }