Added numpy_image templated class that makes a numpy array

conform to dlib's generic image interface. This makes dealing with numpy images in pybind11 modules much nicer.

Added numpy_image templated class that makes a numpy array
conform to dlib's generic image interface. This makes dealing with numpy images in pybind11 modules much nicer.
43e5f42e · Davis King · f6651c2c · 43e5f42e · f6651c2c · 43e5f42e
Commit 43e5f42e authored Apr 28, 2018 by Davis King
13 changed files
--- a/dlib/python.h
+++ b/dlib/python.h
@@ -6,7 +6,6 @@
 #include "python/pybind_utils.h"
 #include "python/pyassert.h"
 #include "python/serialize_pickle.h"
-#include "python/numpy.h"
 #include "python/numpy_image.h"

 #endif // DLIB_PYTHoN_TOP_

--- a/dlib/python/numpy.h
+++ b/dlib/python/numpy.h
-// Copyright (C) 2014  Davis E. King (davis@dlib.net)
-// License: Boost Software License   See LICENSE.txt for the full license.
-#ifndef DLIB_PYTHON_NuMPY_Hh_
-#define DLIB_PYTHON_NuMPY_Hh_
-
-#include <pybind11/pybind11.h>
-#include <dlib/error.h>
-#include <dlib/algs.h>
-#include <dlib/string.h>
-#include <dlib/array.h>
-#include <dlib/pixel.h>
-
-namespace py = pybind11;
-
-// ----------------------------------------------------------------------------------------
-
-template <typename TT>
-void validate_numpy_array_type (
-    const py::object& obj
-)
-{
-    const char ch = obj.attr("dtype").attr("char").cast<char>();
-
-    using T = typename dlib::pixel_traits<TT>::basic_pixel_type;
-
-    if (dlib::is_same_type<T,double>::value)
-    {
-        if (ch != 'd')
-            throw dlib::error("Expected numpy.ndarray of float64");
-    }
-    else if (dlib::is_same_type<T,float>::value)
-    {
-        if (ch != 'f')
-            throw dlib::error("Expected numpy.ndarray of float32");
-    }
-    else if (dlib::is_same_type<T,dlib::int16>::value)
-    {
-        if (ch != 'h')
-            throw dlib::error("Expected numpy.ndarray of int16");
-    }
-    else if (dlib::is_same_type<T,dlib::uint16>::value)
-    {
-        if (ch != 'H')
-            throw dlib::error("Expected numpy.ndarray of uint16");
-    }
-    else if (dlib::is_same_type<T,dlib::int32>::value)
-    {
-        if (ch != 'i')
-            throw dlib::error("Expected numpy.ndarray of int32");
-    }
-    else if (dlib::is_same_type<T,dlib::uint32>::value)
-    {
-        if (ch != 'I')
-            throw dlib::error("Expected numpy.ndarray of uint32");
-    }
-    else if (dlib::is_same_type<T,unsigned char>::value)
-    {
-        if (ch != 'B')
-            throw dlib::error("Expected numpy.ndarray of uint8");
-    }
-    else if (dlib::is_same_type<T,signed char>::value)
-    {
-        if (ch != 'b')
-            throw dlib::error("Expected numpy.ndarray of int8");
-    }
-    else
-    {
-        throw dlib::error("validate_numpy_array_type() called with unsupported type.");
-    }
-}
-
-// ----------------------------------------------------------------------------------------
-
-template <int dims>
-void get_numpy_ndarray_shape (
-    const py::object& obj,
-    long (&shape)[dims]
-)
-/*!
-    ensures
-        - stores the shape of the array into #shape.
-        - the dimension of the given numpy array is not greater than #dims.
-!*/
-{
-    Py_buffer pybuf;
-    if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES ))
-        throw dlib::error("Expected numpy.ndarray with shape set.");
-
-    try
-    {
-
-        if (pybuf.ndim > dims)
-            throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
-
-        for (int i = 0; i < dims; ++i)
-        {
-            if (i < pybuf.ndim)
-                shape[i] = pybuf.shape[i];
-            else
-                shape[i] = 1;
-        }
-    }
-    catch(...)
-    {
-        PyBuffer_Release(&pybuf);
-        throw;
-    }
-    PyBuffer_Release(&pybuf);
-}
-
-// ----------------------------------------------------------------------------------------
-
-template <typename T, int dims>
-void get_numpy_ndarray_parts (
-    py::object& obj,
-    T*& data,
-    dlib::array<T>& contig_buf,
-    long (&shape)[dims]
-)
-/*!
-    ensures
-        - extracts the pointer to the data from the given numpy ndarray.  Stores the shape
-          of the array into #shape.
-        - the dimension of the given numpy array is not greater than #dims.
-        - #shape[#dims-1] == pixel_traits<T>::num when #dims is greater than 2
-!*/
-{
-    Py_buffer pybuf;
-    if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES | PyBUF_WRITABLE ))
-        throw dlib::error("Expected writable numpy.ndarray with shape set.");
-
-    try
-    {
-        validate_numpy_array_type<T>(obj);
-
-        if (pybuf.ndim > dims)
-            throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
-        get_numpy_ndarray_shape(obj, shape);
-
-        if (dlib::pixel_traits<T>::num > 1 && dlib::pixel_traits<T>::num != shape[dims-1])
-            throw dlib::error("Expected numpy.ndarray with " + dlib::cast_to_string(dlib::pixel_traits<T>::num) + " channels.");
-
-        if (PyBuffer_IsContiguous(&pybuf, 'C'))
-            data = (T*)pybuf.buf;
-        else
-        {
-            contig_buf.resize(pybuf.len);
-            if (PyBuffer_ToContiguous(&contig_buf[0], &pybuf, pybuf.len, 'C'))
-                throw dlib::error("Can't copy numpy.ndarray to a contiguous buffer.");
-            data = &contig_buf[0];
-        }
-    }
-    catch(...)
-    {
-        PyBuffer_Release(&pybuf);
-        throw;
-    }
-    PyBuffer_Release(&pybuf);
-}
-
-// ----------------------------------------------------------------------------------------
-
-template <typename T, int dims>
-void get_numpy_ndarray_parts (
-    const py::object& obj,
-    const T*& data,
-    dlib::array<T>& contig_buf,
-    long (&shape)[dims]
-)
-/*!
-    ensures
-        - extracts the pointer to the data from the given numpy ndarray.  Stores the shape
-          of the array into #shape.
-        - the dimension of the given numpy array is not greater than #dims.
-        - #shape[#dims-1] == pixel_traits<T>::num when #dims is greater than 2
-!*/
-{
-    Py_buffer pybuf;
-    if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES ))
-        throw dlib::error("Expected numpy.ndarray with shape set.");
-
-    try
-    {
-        validate_numpy_array_type<T>(obj);
-
-        if (pybuf.ndim > dims)
-            throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
-        get_numpy_ndarray_shape(obj, shape);
-
-        if (dlib::pixel_traits<T>::num > 1 && dlib::pixel_traits<T>::num != shape[dims-1])
-            throw dlib::error("Expected numpy.ndarray with " + dlib::cast_to_string(dlib::pixel_traits<T>::num) + " channels.");
-
-        if (PyBuffer_IsContiguous(&pybuf, 'C'))
-            data = (const T*)pybuf.buf;
-        else
-        {
-            contig_buf.resize(pybuf.len);
-            if (PyBuffer_ToContiguous(&contig_buf[0], &pybuf, pybuf.len, 'C'))
-                throw dlib::error("Can't copy numpy.ndarray to a contiguous buffer.");
-            data = &contig_buf[0];
-        }
-    }
-    catch(...)
-    {
-        PyBuffer_Release(&pybuf);
-        throw;
-    }
-    PyBuffer_Release(&pybuf);
-}
-
-// ----------------------------------------------------------------------------------------
-
-#endif // DLIB_PYTHON_NuMPY_Hh_
-
--- a/dlib/python/numpy_image.h
+++ b/dlib/python/numpy_image.h
--- a/tools/python/CMakeLists.txt
+++ b/tools/python/CMakeLists.txt
@@ -6,7 +6,7 @@ set(USE_SSE4_INSTRUCTIONS ON CACHE BOOL "Use SSE4 instructions")
 # Set this to disable link time optimization. The only reason for
 # doing this to make the compile faster which is nice when developing
 # new modules.
-#set(PYBIND11_LTO_CXX_FLAGS "")
+set(PYBIND11_LTO_CXX_FLAGS "")


 # Avoid cmake warnings about changes in behavior of some Mac OS X path 

--- a/tools/python/src/cnn_face_detector.cpp
+++ b/tools/python/src/cnn_face_detector.cpp
@@ -26,7 +26,7 @@ public:
    }

    std::vector<mmod_rect> detect (
-        py::object pyimage,
+        py::array pyimage,
        const int upsample_num_times
    )
    {
@@ -35,10 +35,10 @@ public:

        // Copy the data into dlib based objects
        matrix<rgb_pixel> image;
-        if (is_gray_python_image(pyimage))
-            assign_image(image, numpy_gray_image(pyimage));
-        else if (is_rgb_python_image(pyimage))
-            assign_image(image, numpy_rgb_image(pyimage));
+        if (is_image<unsigned char>(pyimage))
+            assign_image(image, numpy_image<unsigned char>(pyimage));
+        else if (is_image<rgb_pixel>(pyimage))
+            assign_image(image, numpy_image<rgb_pixel>(pyimage));
        else
            throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");

@@ -63,25 +63,25 @@ public:
        return rects;
    }

-    std::vector<std::vector<mmod_rect> > detect_mult (
+    std::vector<std::vector<mmod_rect>> detect_mult (
        py::list imgs,
        const int upsample_num_times,
        const int batch_size = 128
    )
    {
        pyramid_down<2> pyr;
-        std::vector<matrix<rgb_pixel> > dimgs;
+        std::vector<matrix<rgb_pixel>> dimgs;
        dimgs.reserve(len(imgs));

        for(int i = 0; i < len(imgs); i++)
        {
            // Copy the data into dlib based objects
            matrix<rgb_pixel> image;
-            py::object tmp = imgs[i].cast<py::object>();
-            if (is_gray_python_image(tmp))
-                assign_image(image, numpy_gray_image(tmp));
-            else if (is_rgb_python_image(tmp))
-                assign_image(image, numpy_rgb_image(tmp));
+            py::array tmp = imgs[i].cast<py::array>();
+            if (is_image<unsigned char>(tmp))
+                assign_image(image, numpy_image<unsigned char>(tmp));
+            else if (is_image<rgb_pixel>(tmp))
+                assign_image(image, numpy_image<rgb_pixel>(tmp));
            else
                throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");

@@ -89,16 +89,12 @@ public:
            {
                pyramid_up(image);
            }
-            dimgs.push_back(image);
+            dimgs.emplace_back(std::move(image));
        }

        for(int i = 1; i < dimgs.size(); i++)
        {
-            if
-            (
-                dimgs[i - 1].nc() != dimgs[i].nc() ||
-                dimgs[i - 1].nr() != dimgs[i].nr()
-            )
+            if (dimgs[i - 1].nc() != dimgs[i].nc() || dimgs[i - 1].nr() != dimgs[i].nr())
                throw dlib::error("Images in list must all have the same dimensions.");
            
        }        

--- a/tools/python/src/conversion.h
+++ b/tools/python/src/conversion.h
@@ -6,46 +6,32 @@
 #include "opaque_types.h"
 #include <dlib/python.h>
 #include <dlib/pixel.h>
+#include <dlib/python/numpy_image.h>

 using namespace dlib;
 using namespace std;

 namespace py = pybind11;

-template <typename dest_image_type>
-void pyimage_to_dlib_image(py::object img, dest_image_type& image)
-{
-    if (is_gray_python_image(img))
-        assign_image(image, numpy_gray_image(img));
-    else if (is_rgb_python_image(img))
-        assign_image(image, numpy_rgb_image(img));
-    else
-        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
-}

 template <typename image_array, typename param_type>
 void images_and_nested_params_to_dlib(
        const py::object& pyimages,
        const py::object& pyparams,
        image_array& images,
-        std::vector<std::vector<param_type> >& params
+        std::vector<std::vector<param_type>>& params
 )
 {
    // Now copy the data into dlib based objects.
    py::iterator image_it = pyimages.begin();
    py::iterator params_it = pyparams.begin();

-    for (unsigned long image_idx = 0;
-         image_it != pyimages.end()
-           && params_it != pyparams.end();
-         ++image_it, ++params_it, ++image_idx)
+    for (unsigned long image_idx = 0; image_it != pyimages.end() && params_it != pyparams.end(); ++image_it, ++params_it, ++image_idx)
    {
-        for (py::iterator param_it = params_it->begin();
-             param_it != params_it->end();
-             ++param_it)
-          params[image_idx].push_back(param_it->cast<param_type>());
+        for (py::iterator param_it = params_it->begin(); param_it != params_it->end(); ++param_it)
+            params[image_idx].push_back(param_it->cast<param_type>());

-        pyimage_to_dlib_image(image_it->cast<py::object>(), images[image_idx]);
+        images[image_idx] = image_it->cast<py::object>();
    }
 }


--- a/tools/python/src/correlation_tracker.cpp
+++ b/tools/python/src/correlation_tracker.cpp
@@ -15,17 +15,17 @@ namespace py = pybind11;

 void start_track (
    correlation_tracker& tracker,
-    py::object img,
+    py::array img,
    const drectangle& bounding_box
 )
 {
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
    {
-        tracker.start_track(numpy_gray_image(img), bounding_box);
+        tracker.start_track(numpy_image<unsigned char>(img), bounding_box);
    }
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
    {
-        tracker.start_track(numpy_rgb_image(img), bounding_box);
+        tracker.start_track(numpy_image<rgb_pixel>(img), bounding_box);
    }
    else
    {
@@ -35,7 +35,7 @@ void start_track (

 void start_track_rec (
    correlation_tracker& tracker,
-    py::object img,
+    py::array img,
    const rectangle& bounding_box
 )
 {
@@ -45,16 +45,16 @@ void start_track_rec (

 double update (
    correlation_tracker& tracker,
-    py::object img
+    py::array img
 )
 {
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
    {
-        return tracker.update(numpy_gray_image(img));
+        return tracker.update(numpy_image<unsigned char>(img));
    }
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
    {
-        return tracker.update(numpy_rgb_image(img));
+        return tracker.update(numpy_image<rgb_pixel>(img));
    }
    else
    {
@@ -64,17 +64,17 @@ double update (

 double update_guess (
    correlation_tracker& tracker,
-    py::object img,
+    py::array img,
    const drectangle& bounding_box
 )
 {
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
    {
-        return tracker.update(numpy_gray_image(img), bounding_box);
+        return tracker.update(numpy_image<unsigned char>(img), bounding_box);
    }
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
    {
-        return tracker.update(numpy_rgb_image(img), bounding_box);
+        return tracker.update(numpy_image<rgb_pixel>(img), bounding_box);
    }
    else
    {
@@ -84,7 +84,7 @@ double update_guess (

 double update_guess_rec (
    correlation_tracker& tracker,
-    py::object img,
+    py::array img,
    const rectangle& bounding_box
 )
 {

--- a/tools/python/src/face_recognition.cpp
+++ b/tools/python/src/face_recognition.cpp
@@ -32,7 +32,7 @@ public:
    }

    matrix<double,0,1> compute_face_descriptor (
-        py::object img,
+        numpy_image<rgb_pixel> img,
        const full_object_detection& face,
        const int num_jitters
    )
@@ -42,13 +42,11 @@ public:
    }

    std::vector<matrix<double,0,1>> compute_face_descriptors (
-        py::object img,
+        numpy_image<rgb_pixel> img,
        const std::vector<full_object_detection>& faces,
        const int num_jitters
    )
    {
-        if (!is_rgb_python_image(img))
-            throw dlib::error("Unsupported image type, must be RGB image.");

        for (auto& f : faces)
        {
@@ -61,7 +59,7 @@ public:
        for (auto& f : faces)
            dets.push_back(get_face_chip_details(f, 150, 0.25));
        dlib::array<matrix<rgb_pixel>> face_chips;
-        extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+        extract_image_chips(img, dets, face_chips);

        std::vector<matrix<double,0,1>> face_descriptors;
        face_descriptors.reserve(face_chips.size());
@@ -161,22 +159,20 @@ py::list chinese_whispers_clustering(py::list descriptors, float threshold)
 }

 void save_face_chips (
-    py::object img,
+    numpy_image<rgb_pixel> img,
    const std::vector<full_object_detection>& faces,
    const std::string& chip_filename,
    size_t size = 150,
    float padding = 0.25
 )
 {
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");

    int num_faces = faces.size();
    std::vector<chip_details> dets;
    for (auto& f : faces)
        dets.push_back(get_face_chip_details(f, size, padding));
    dlib::array<matrix<rgb_pixel>> face_chips;
-    extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+    extract_image_chips(numpy_image<rgb_pixel>(img), dets, face_chips);
    int i=0;
    for (auto& chip : face_chips) 
    {
@@ -195,7 +191,7 @@ void save_face_chips (
 }

 void save_face_chip (
-    py::object img,
+    numpy_image<rgb_pixel> img,
    const full_object_detection& face,
    const std::string& chip_filename,
    size_t size = 150,
@@ -204,7 +200,6 @@ void save_face_chip (
 {
    std::vector<full_object_detection> faces(1, face);
    save_face_chips(img, faces, chip_filename, size, padding);
-    return;
 }

 void bind_face_recognition(py::module &m)

--- a/tools/python/src/gui.cpp
+++ b/tools/python/src/gui.cpp
@@ -35,13 +35,13 @@ void image_window_set_image_simple_detector_py (

 void image_window_set_image (
    image_window& win,
-    py::object img
+    py::array img
 )
 {
-    if (is_gray_python_image(img))
-        return win.set_image(numpy_gray_image(img));
-    else if (is_rgb_python_image(img))
-        return win.set_image(numpy_rgb_image(img));
+    if (is_image<unsigned char>(img))
+        return win.set_image(numpy_image<unsigned char>(img));
+    else if (is_image<rgb_pixel>(img))
+        return win.set_image(numpy_image<rgb_pixel>(img));
    else
        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
 }
@@ -74,14 +74,14 @@ void add_overlay_parts (
    win.add_overlay(render_face_detections(detection, color));
 }

-std::shared_ptr<image_window> make_image_window_from_image(py::object img)
+std::shared_ptr<image_window> make_image_window_from_image(py::array img)
 {
    auto win = std::make_shared<image_window>();
    image_window_set_image(*win, img);
    return win;
 }

-std::shared_ptr<image_window> make_image_window_from_image_and_title(py::object img, const string& title)
+std::shared_ptr<image_window> make_image_window_from_image_and_title(py::array img, const string& title)
 {
    auto win = std::make_shared<image_window>();
    image_window_set_image(*win, img);

--- a/tools/python/src/numpy_returns.cpp
+++ b/tools/python/src/numpy_returns.cpp
@@ -10,36 +10,14 @@ using namespace std;

 namespace py = pybind11;

-py::array_t<uint8_t> convert_to_numpy(matrix<rgb_pixel>&& rgb_image)
-{
-    const size_t dtype_size = sizeof(uint8_t);
-    const auto rows = static_cast<const size_t>(num_rows(rgb_image));
-    const auto cols = static_cast<const size_t>(num_columns(rgb_image));
-    const size_t channels = 3;
-    const size_t image_size = dtype_size * rows * cols * channels;
-
-    unique_ptr<rgb_pixel[]> arr_ptr = rgb_image.steal_memory();
-    uint8_t* arr = (uint8_t *) arr_ptr.release();
-
-    return pybind11::array_t<uint8_t>(
-        {rows, cols, channels},                                                     // shape
-        {dtype_size * cols * channels, dtype_size * channels, dtype_size},          // strides
-        arr,                                                                        // pointer
-        pybind11::capsule{
-            arr, [](void *arr_p) {
-                delete[] reinterpret_cast<uint8_t *>(arr_p);
-            }
-        }
-    );
-}

 // -------------------------------- Basic Image IO ----------------------------------------

-py::array_t<uint8_t> load_rgb_image (const std::string &path)
+numpy_image<rgb_pixel> load_rgb_image (const std::string &path)
 {
-    matrix<rgb_pixel> img;
+    numpy_image<rgb_pixel> img;
    load_image(img, path);
-    return convert_to_numpy(std::move(img));
+    return img; 
 }

 bool has_ending (std::string const full_string, std::string const &ending) {
@@ -50,22 +28,21 @@ bool has_ending (std::string const full_string, std::string const &ending) {
    }
 }

-void save_rgb_image(py::object img, const std::string &path)
-{
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");
+// ----------------------------------------------------------------------------------------

+void save_rgb_image(numpy_image<rgb_pixel> img, const std::string &path)
+{
    std::string lowered_path = path;
    std::transform(lowered_path.begin(), lowered_path.end(), lowered_path.begin(), ::tolower);

    if(has_ending(lowered_path, ".bmp")) {
-        save_bmp(numpy_rgb_image(img), path);
+        save_bmp(img, path);
    } else if(has_ending(lowered_path, ".dng")) {
-        save_dng(numpy_rgb_image(img), path);
+        save_dng(img, path);
    } else if(has_ending(lowered_path, ".png")) {
-        save_png(numpy_rgb_image(img), path);
+        save_png(img, path);
    } else if(has_ending(lowered_path, ".jpg") || has_ending(lowered_path, ".jpeg")) {
-        save_jpeg(numpy_rgb_image(img), path);
+        save_jpeg(img, path);
    } else {
        throw dlib::error("Unsupported image type, image path must end with one of [.bmp, .png, .dng, .jpg, .jpeg]");
    }
@@ -74,31 +51,22 @@ void save_rgb_image(py::object img, const std::string &path)

 // ----------------------------------------------------------------------------------------

-py::list get_jitter_images(py::object img, size_t num_jitters = 1, bool disturb_colors = false)
+py::list get_jitter_images(numpy_image<rgb_pixel> img, size_t num_jitters = 1, bool disturb_colors = false)
 {
    static dlib::rand rnd_jitter;
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");
-
-    // Convert the image to matrix<rgb_pixel> for processing
-    matrix<rgb_pixel> img_mat;
-    assign_image(img_mat, numpy_rgb_image(img));

    // The top level list (containing 1 or more images) to return to python
    py::list jitter_list;

    for (int i = 0; i < num_jitters; ++i) {
        // Get a jittered crop
-        matrix<rgb_pixel> crop = dlib::jitter_image(img_mat, rnd_jitter);
+        numpy_image<rgb_pixel> crop = dlib::jitter_image(img, rnd_jitter);
        // If required disturb colors of the image
        if(disturb_colors)
            dlib::disturb_colors(crop, rnd_jitter);
        
-        // Convert image to Numpy array
-        py::array_t<uint8_t> arr = convert_to_numpy(std::move(crop));
-                
        // Append image to jittered image list
-        jitter_list.append(arr);
+        jitter_list.append(crop);
    }
           
    return jitter_list;
@@ -107,14 +75,12 @@ py::list get_jitter_images(py::object img, size_t num_jitters = 1, bool disturb_
 // ----------------------------------------------------------------------------------------

 py::list get_face_chips (
-    py::object img,
+    numpy_image<rgb_pixel> img,
    const std::vector<full_object_detection>& faces,
    size_t size = 150,
    float padding = 0.25
 )
 {
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");

    if (faces.size() < 1) {
        throw dlib::error("No face were specified in the faces array.");
@@ -125,33 +91,27 @@ py::list get_face_chips (
    std::vector<chip_details> dets;
    for (auto& f : faces)
        dets.push_back(get_face_chip_details(f, size, padding));
-    dlib::array<matrix<rgb_pixel>> face_chips;
-    extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+    dlib::array<numpy_image<rgb_pixel>> face_chips;
+    extract_image_chips(img, dets, face_chips);

    for (auto& chip : face_chips) 
    {
-        // Convert image to Numpy array
-        py::array_t<uint8_t> arr = convert_to_numpy(std::move(chip));
-
        // Append image to chips list
-        chips_list.append(arr);
+        chips_list.append(chip);
    }
    return chips_list;
 }

-py::array_t<uint8_t> get_face_chip (
-    py::object img,
+numpy_image<rgb_pixel> get_face_chip (
+    numpy_image<rgb_pixel> img,
    const full_object_detection& face,
    size_t size = 150,
    float padding = 0.25
 )
 {
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");
-
-    matrix<rgb_pixel> chip;
-    extract_image_chip(numpy_rgb_image(img), get_face_chip_details(face, size, padding), chip);
-    return convert_to_numpy(std::move(chip));
+    numpy_image<rgb_pixel> chip;
+    extract_image_chip(img, get_face_chip_details(face, size, padding), chip);
+    return chip;
 }

 // ----------------------------------------------------------------------------------------

--- a/tools/python/src/object_detection.cpp
+++ b/tools/python/src/object_detection.cpp
@@ -37,8 +37,8 @@ inline simple_object_detector_py train_simple_object_detector_on_images_py (
        throw dlib::error("The length of the boxes list must match the length of the images list.");

    // We never have any ignore boxes for this version of the API.
-    std::vector<std::vector<rectangle> > ignore(num_images), boxes(num_images);
-    dlib::array<array2d<rgb_pixel> > images(num_images);
+    std::vector<std::vector<rectangle>> ignore(num_images), boxes(num_images);
+    dlib::array<numpy_image<rgb_pixel>> images(num_images);
    images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes);

    return train_simple_object_detector_on_images("", images, boxes, ignore, options);
@@ -56,8 +56,8 @@ inline simple_test_results test_simple_object_detector_with_images_py (
        throw dlib::error("The length of the boxes list must match the length of the images list.");

    // We never have any ignore boxes for this version of the API.
-    std::vector<std::vector<rectangle> > ignore(num_images), boxes(num_images);
-    dlib::array<array2d<rgb_pixel> > images(num_images);
+    std::vector<std::vector<rectangle>> ignore(num_images), boxes(num_images);
+    dlib::array<numpy_image<rgb_pixel>> images(num_images);
    images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes);

    return test_simple_object_detector_with_images(images, upsampling_amount, boxes, ignore, detector);
@@ -86,22 +86,13 @@ inline simple_test_results test_simple_object_detector_py_with_images_py (
 // ----------------------------------------------------------------------------------------

 inline void find_candidate_object_locations_py (
-    py::object pyimage,
+    py::array pyimage,
    py::list& pyboxes,
    py::tuple pykvals,
    unsigned long min_size,
    unsigned long max_merging_iterations
 )
 {
-    // Copy the data into dlib based objects
-    array2d<rgb_pixel> image;
-    if (is_gray_python_image(pyimage))
-        assign_image(image, numpy_gray_image(pyimage));
-    else if (is_rgb_python_image(pyimage))
-        assign_image(image, numpy_rgb_image(pyimage));
-    else
-        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
-
    if (py::len(pykvals) != 3)
        throw dlib::error("kvals must be a tuple with three elements for start, end, num.");

@@ -117,7 +108,12 @@ inline void find_candidate_object_locations_py (
    for (long i = 0; i < count; ++i)
        rects.push_back(pyboxes[i].cast<rectangle>());
    // Find candidate objects
-    find_candidate_object_locations(image, rects, kvals, min_size, max_merging_iterations);
+    if (is_image<unsigned char>(pyimage))
+        find_candidate_object_locations(numpy_image<unsigned char>(pyimage), rects, kvals, min_size, max_merging_iterations);
+    else if (is_image<rgb_pixel>(pyimage))
+        find_candidate_object_locations(numpy_image<rgb_pixel>(pyimage), rects, kvals, min_size, max_merging_iterations);
+    else
+        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");

    // Collect boxes containing candidate objects
    std::vector<rectangle>::iterator iter;

--- a/tools/python/src/shape_predictor.cpp
+++ b/tools/python/src/shape_predictor.cpp
@@ -17,18 +17,18 @@ namespace py = pybind11;

 full_object_detection run_predictor (
        shape_predictor& predictor,
-        py::object img,
+        py::array img,
        py::object rect
 )
 {
    rectangle box = rect.cast<rectangle>();
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
    {
-        return predictor(numpy_gray_image(img), box);
+        return predictor(numpy_image<unsigned char>(img), box);
    }
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
    {
-        return predictor(numpy_rgb_image(img), box);
+        return predictor(numpy_image<rgb_pixel>(img), box);
    }
    else
    {
@@ -97,7 +97,7 @@ inline shape_predictor train_shape_predictor_on_images_py (
        throw dlib::error("The length of the detections list must match the length of the images list.");

    std::vector<std::vector<full_object_detection> > detections(num_images);
-    dlib::array<array2d<unsigned char> > images(num_images);
+    dlib::array<numpy_image<unsigned char>> images(num_images);
    images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);

    return train_shape_predictor_on_images(images, detections, options);
@@ -123,7 +123,7 @@ inline double test_shape_predictor_with_images_py (
    std::vector<std::vector<double> > scales;
    if (num_scales > 0)
        scales.resize(num_scales);
-    dlib::array<array2d<unsigned char> > images(num_images);
+    dlib::array<numpy_image<unsigned char>> images(num_images);

    // Now copy the data into dlib based objects so we can call the testing routine.
    for (unsigned long i = 0; i < num_images; ++i)
@@ -134,14 +134,12 @@ inline double test_shape_predictor_with_images_py (
             ++det_it)
          detections[i].push_back(det_it->cast<full_object_detection>());

-        pyimage_to_dlib_image(pyimages[i], images[i]);
+        images[i] = pyimages[i];
        if (num_scales > 0)
        {
            if (num_boxes != py::len(pyscales[i]))
                throw dlib::error("The length of the scales list must match the length of the detections list.");
-            for (py::iterator scale_it = pyscales[i].begin();
-                 scale_it != pyscales[i].end();
-                 ++scale_it)
+            for (py::iterator scale_it = pyscales[i].begin(); scale_it != pyscales[i].end(); ++scale_it)
                scales[i].push_back(scale_it->cast<double>());
        }
    }

--- a/tools/python/src/simple_object_detector_py.h
+++ b/tools/python/src/simple_object_detector_py.h
@@ -37,7 +37,7 @@ namespace dlib

    inline std::vector<dlib::rectangle> run_detector_with_upscale1 (
        dlib::simple_object_detector& detector,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount,
        const double adjust_threshold,
        std::vector<double>& detection_confidences,
@@ -49,19 +49,19 @@ namespace dlib
        std::vector<rectangle> rectangles;
        std::vector<rect_detection> rect_detections;

-        if (is_gray_python_image(img))
+        if (is_image<unsigned char>(img))
        {
            array2d<unsigned char> temp;
            if (upsampling_amount == 0)
            {
-                detector(numpy_gray_image(img), rect_detections, adjust_threshold);
+                detector(numpy_image<unsigned char>(img), rect_detections, adjust_threshold);
                split_rect_detections(rect_detections, rectangles,
                                      detection_confidences, weight_indices);
                return rectangles;
            }
            else
            {
-                pyramid_up(numpy_gray_image(img), temp, pyr);
+                pyramid_up(numpy_image<unsigned char>(img), temp, pyr);
                unsigned int levels = upsampling_amount-1;
                while (levels > 0)
                {
@@ -79,19 +79,19 @@ namespace dlib
                return rectangles;
            }
        }
-        else if (is_rgb_python_image(img))
+        else if (is_image<rgb_pixel>(img))
        {
            array2d<rgb_pixel> temp;
            if (upsampling_amount == 0)
            {
-                detector(numpy_rgb_image(img), rect_detections, adjust_threshold);
+                detector(numpy_image<rgb_pixel>(img), rect_detections, adjust_threshold);
                split_rect_detections(rect_detections, rectangles,
                                      detection_confidences, weight_indices);
                return rectangles;
            }
            else
            {
-                pyramid_up(numpy_rgb_image(img), temp, pyr);
+                pyramid_up(numpy_image<rgb_pixel>(img), temp, pyr);
                unsigned int levels = upsampling_amount-1;
                while (levels > 0)
                {
@@ -117,7 +117,7 @@ namespace dlib

    inline std::vector<dlib::rectangle> run_detectors_with_upscale1 (
        std::vector<simple_object_detector >& detectors,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount,
        const double adjust_threshold,
        std::vector<double>& detection_confidences,
@@ -129,19 +129,19 @@ namespace dlib
        std::vector<rectangle> rectangles;
        std::vector<rect_detection> rect_detections;

-        if (is_gray_python_image(img))
+        if (is_image<unsigned char>(img))
        {
            array2d<unsigned char> temp;
            if (upsampling_amount == 0)
            {
-                evaluate_detectors(detectors, numpy_gray_image(img), rect_detections, adjust_threshold);
+                evaluate_detectors(detectors, numpy_image<unsigned char>(img), rect_detections, adjust_threshold);
                split_rect_detections(rect_detections, rectangles,
                                      detection_confidences, weight_indices);
                return rectangles;
            }
            else
            {
-                pyramid_up(numpy_gray_image(img), temp, pyr);
+                pyramid_up(numpy_image<unsigned char>(img), temp, pyr);
                unsigned int levels = upsampling_amount-1;
                while (levels > 0)
                {
@@ -159,19 +159,19 @@ namespace dlib
                return rectangles;
            }
        }
-        else if (is_rgb_python_image(img))
+        else if (is_image<rgb_pixel>(img))
        {
            array2d<rgb_pixel> temp;
            if (upsampling_amount == 0)
            {
-                evaluate_detectors(detectors, numpy_rgb_image(img), rect_detections, adjust_threshold);
+                evaluate_detectors(detectors, numpy_image<rgb_pixel>(img), rect_detections, adjust_threshold);
                split_rect_detections(rect_detections, rectangles,
                                      detection_confidences, weight_indices);
                return rectangles;
            }
            else
            {
-                pyramid_up(numpy_rgb_image(img), temp, pyr);
+                pyramid_up(numpy_image<rgb_pixel>(img), temp, pyr);
                unsigned int levels = upsampling_amount-1;
                while (levels > 0)
                {
@@ -197,7 +197,7 @@ namespace dlib

    inline std::vector<dlib::rectangle> run_detector_with_upscale2 (
        dlib::simple_object_detector& detector,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount

    )
@@ -213,7 +213,7 @@ namespace dlib

    inline py::tuple run_rect_detector (
        dlib::simple_object_detector& detector,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount,
        const double adjust_threshold)
    {
@@ -234,7 +234,7 @@ namespace dlib

    inline py::tuple run_multiple_rect_detectors (
        py::list& detectors,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount,
        const double adjust_threshold)
    {
@@ -272,13 +272,13 @@ namespace dlib
        simple_object_detector_py(simple_object_detector& _detector, unsigned int _upsampling_amount) :
            detector(_detector), upsampling_amount(_upsampling_amount) {}

-        std::vector<dlib::rectangle> run_detector1 (py::object img,
+        std::vector<dlib::rectangle> run_detector1 (py::array img,
                                                    const unsigned int upsampling_amount_)
        {
            return run_detector_with_upscale2(detector, img, upsampling_amount_);
        }

-        std::vector<dlib::rectangle> run_detector2 (py::object img)
+        std::vector<dlib::rectangle> run_detector2 (py::array img)
        {
            return run_detector_with_upscale2(detector, img, upsampling_amount);
        }