Commit ec342e39 authored by Guillaume "Vermeille" Sanchez's avatar Guillaume "Vermeille" Sanchez Committed by Davis E. King

Allow batched face recognition for greater performances (#1335)

parent eded3773
......@@ -11,6 +11,7 @@
#include <dlib/image_io.h>
#include <dlib/clustering.h>
#include <pybind11/stl_bind.h>
#include <pybind11/stl.h>
using namespace dlib;
......@@ -47,33 +48,78 @@ public:
const int num_jitters
)
{
std::vector<numpy_image<rgb_pixel>> batch_img(1, img);
std::vector<std::vector<full_object_detection>> batch_faces(1, faces);
return batch_compute_face_descriptors(batch_img, batch_faces, num_jitters)[0];
}
std::vector<std::vector<matrix<double,0,1>>> batch_compute_face_descriptors (
const std::vector<numpy_image<rgb_pixel>>& batch_imgs,
const std::vector<std::vector<full_object_detection>>& batch_faces,
const int num_jitters
)
{
if (batch_imgs.size() != batch_faces.size())
throw dlib::error("The array of images and the array of array of locations must be of the same size");
for (auto& f : faces)
int total_chips = 0;
for (auto& faces : batch_faces)
{
if (f.num_parts() != 68 && f.num_parts() != 5)
throw dlib::error("The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style.");
total_chips += faces.size();
for (auto& f : faces)
{
if (f.num_parts() != 68 && f.num_parts() != 5)
throw dlib::error("The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style.");
}
}
std::vector<chip_details> dets;
for (auto& f : faces)
dets.push_back(get_face_chip_details(f, 150, 0.25));
dlib::array<matrix<rgb_pixel>> face_chips;
extract_image_chips(img, dets, face_chips);
for (int i = 0; i < batch_imgs.size(); ++i)
{
auto& faces = batch_faces[i];
auto& img = batch_imgs[i];
std::vector<chip_details> dets;
for (auto& f : faces)
dets.push_back(get_face_chip_details(f, 150, 0.25));
dlib::array<matrix<rgb_pixel>> this_img_face_chips;
extract_image_chips(img, dets, this_img_face_chips);
std::vector<matrix<double,0,1>> face_descriptors;
face_descriptors.reserve(face_chips.size());
for (auto& chip : this_img_face_chips)
face_chips.push_back(chip);
}
std::vector<std::vector<matrix<double,0,1>>> face_descriptors(batch_imgs.size());
if (num_jitters <= 1)
{
// extract descriptors and convert from float vectors to double vectors
for (auto& d : net(face_chips,16))
face_descriptors.push_back(matrix_cast<double>(d));
auto descriptors = net(face_chips, 16);
auto next = std::begin(descriptors);
for (int i = 0; i < batch_faces.size(); ++i)
{
for (int j = 0; j < batch_faces[i].size(); ++j)
{
face_descriptors[i].push_back(matrix_cast<double>(*next++));
}
}
DLIB_ASSERT(next == std::end(descriptors));
}
else
{
for (auto& fimg : face_chips)
face_descriptors.push_back(matrix_cast<double>(mean(mat(net(jitter_image(fimg,num_jitters),16)))));
// extract descriptors and convert from float vectors to double vectors
auto fimg = std::begin(face_chips);
for (int i = 0; i < batch_faces.size(); ++i)
{
for (int j = 0; j < batch_faces[i].size(); ++j)
{
auto& r = matrix_cast<double>(mean(mat(net(
jitter_image(*fimg++, num_jitters), 16))));
face_descriptors[i].push_back(matrix_cast<double>(r));
}
}
DLIB_ASSERT(fimg == std::end(face_chips));
}
return face_descriptors;
......@@ -214,6 +260,11 @@ void bind_face_recognition(py::module &m)
.def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, py::arg("img"),py::arg("faces"),py::arg("num_jitters")=0,
"Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors. "
"If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor."
)
.def("compute_face_descriptor", &face_recognition_model_v1::batch_compute_face_descriptors, py::arg("batch_img"),py::arg("batch_faces"),py::arg("num_jitters")=0,
"Takes an array of images and an array of arrays of full_object_detections. `batch_faces[i]` must be an array of full_object_detections corresponding to the image `batch_img[i]`, "
"referencing faces in that image. Every face will be converting into 128D face descriptors. "
"If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor."
);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment