Added numpy_image templated class that makes a numpy array

conform to dlib's generic image interface. This makes dealing with numpy images in pybind11 modules much nicer.

Added numpy_image templated class that makes a numpy array
conform to dlib's generic image interface. This makes dealing with numpy images in pybind11 modules much nicer.
43e5f42e · Davis King · f6651c2c · 43e5f42e · f6651c2c · 43e5f42e
Commit 43e5f42e authored Apr 28, 2018 by Davis King
13 changed files
--- a/dlib/python.h
+++ b/dlib/python.h
@@ -6,7 +6,6 @@
 #include "python/pybind_utils.h"
 #include "python/pyassert.h"
 #include "python/serialize_pickle.h"
-#include "python/numpy.h"
 #include "python/numpy_image.h"
 #endif // DLIB_PYTHoN_TOP_

--- a/dlib/python/numpy.h
+++ b/dlib/python/numpy.h
-// Copyright (C) 2014  Davis E. King (davis@dlib.net)
-// License: Boost Software License   See LICENSE.txt for the full license.
-#ifndef DLIB_PYTHON_NuMPY_Hh_
-#define DLIB_PYTHON_NuMPY_Hh_
-#include <pybind11/pybind11.h>
-#include <dlib/error.h>
-#include <dlib/algs.h>
-#include <dlib/string.h>
-#include <dlib/array.h>
-#include <dlib/pixel.h>
-namespace py = pybind11;
-// ----------------------------------------------------------------------------------------
-template <typename TT>
-void validate_numpy_array_type (
-    const py::object& obj
-)
-{
-    const char ch = obj.attr("dtype").attr("char").cast<char>();
-    using T = typename dlib::pixel_traits<TT>::basic_pixel_type;
-    if (dlib::is_same_type<T,double>::value)
-    {
-        if (ch != 'd')
-            throw dlib::error("Expected numpy.ndarray of float64");
-    }
-    else if (dlib::is_same_type<T,float>::value)
-    {
-        if (ch != 'f')
-            throw dlib::error("Expected numpy.ndarray of float32");
-    }
-    else if (dlib::is_same_type<T,dlib::int16>::value)
-    {
-        if (ch != 'h')
-            throw dlib::error("Expected numpy.ndarray of int16");
-    }
-    else if (dlib::is_same_type<T,dlib::uint16>::value)
-    {
-        if (ch != 'H')
-            throw dlib::error("Expected numpy.ndarray of uint16");
-    }
-    else if (dlib::is_same_type<T,dlib::int32>::value)
-    {
-        if (ch != 'i')
-            throw dlib::error("Expected numpy.ndarray of int32");
-    }
-    else if (dlib::is_same_type<T,dlib::uint32>::value)
-    {
-        if (ch != 'I')
-            throw dlib::error("Expected numpy.ndarray of uint32");
-    }
-    else if (dlib::is_same_type<T,unsigned char>::value)
-    {
-        if (ch != 'B')
-            throw dlib::error("Expected numpy.ndarray of uint8");
-    }
-    else if (dlib::is_same_type<T,signed char>::value)
-    {
-        if (ch != 'b')
-            throw dlib::error("Expected numpy.ndarray of int8");
-    }
-    else
-    {
-        throw dlib::error("validate_numpy_array_type() called with unsupported type.");
-    }
-}
-// ----------------------------------------------------------------------------------------
-template <int dims>
-void get_numpy_ndarray_shape (
-    const py::object& obj,
-    long (&shape)[dims]
-)
-/*!
-    ensures
-        - stores the shape of the array into #shape.
-        - the dimension of the given numpy array is not greater than #dims.
-!*/
-{
-    Py_buffer pybuf;
-    if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES ))
-        throw dlib::error("Expected numpy.ndarray with shape set.");
-    try
-    {
-        if (pybuf.ndim > dims)
-            throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
-        for (int i = 0; i < dims; ++i)
-        {
-            if (i < pybuf.ndim)
-                shape[i] = pybuf.shape[i];
-            else
-                shape[i] = 1;
-        }
-    }
-    catch(...)
-    {
-        PyBuffer_Release(&pybuf);
-        throw;
-    }
-    PyBuffer_Release(&pybuf);
-}
-// ----------------------------------------------------------------------------------------
-template <typename T, int dims>
-void get_numpy_ndarray_parts (
-    py::object& obj,
-    T*& data,
-    dlib::array<T>& contig_buf,
-    long (&shape)[dims]
-)
-/*!
-    ensures
-        - extracts the pointer to the data from the given numpy ndarray.  Stores the shape
-          of the array into #shape.
-        - the dimension of the given numpy array is not greater than #dims.
-        - #shape[#dims-1] == pixel_traits<T>::num when #dims is greater than 2
-!*/
-{
-    Py_buffer pybuf;
-    if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES | PyBUF_WRITABLE ))
-        throw dlib::error("Expected writable numpy.ndarray with shape set.");
-    try
-    {
-        validate_numpy_array_type<T>(obj);
-        if (pybuf.ndim > dims)
-            throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
-        get_numpy_ndarray_shape(obj, shape);
-        if (dlib::pixel_traits<T>::num > 1 && dlib::pixel_traits<T>::num != shape[dims-1])
-            throw dlib::error("Expected numpy.ndarray with " + dlib::cast_to_string(dlib::pixel_traits<T>::num) + " channels.");
-        if (PyBuffer_IsContiguous(&pybuf, 'C'))
-            data = (T*)pybuf.buf;
-        else
-        {
-            contig_buf.resize(pybuf.len);
-            if (PyBuffer_ToContiguous(&contig_buf[0], &pybuf, pybuf.len, 'C'))
-                throw dlib::error("Can't copy numpy.ndarray to a contiguous buffer.");
-            data = &contig_buf[0];
-        }
-    }
-    catch(...)
-    {
-        PyBuffer_Release(&pybuf);
-        throw;
-    }
-    PyBuffer_Release(&pybuf);
-}
-// ----------------------------------------------------------------------------------------
-template <typename T, int dims>
-void get_numpy_ndarray_parts (
-    const py::object& obj,
-    const T*& data,
-    dlib::array<T>& contig_buf,
-    long (&shape)[dims]
-)
-/*!
-    ensures
-        - extracts the pointer to the data from the given numpy ndarray.  Stores the shape
-          of the array into #shape.
-        - the dimension of the given numpy array is not greater than #dims.
-        - #shape[#dims-1] == pixel_traits<T>::num when #dims is greater than 2
-!*/
-{
-    Py_buffer pybuf;
-    if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES ))
-        throw dlib::error("Expected numpy.ndarray with shape set.");
-    try
-    {
-        validate_numpy_array_type<T>(obj);
-        if (pybuf.ndim > dims)
-            throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
-        get_numpy_ndarray_shape(obj, shape);
-        if (dlib::pixel_traits<T>::num > 1 && dlib::pixel_traits<T>::num != shape[dims-1])
-            throw dlib::error("Expected numpy.ndarray with " + dlib::cast_to_string(dlib::pixel_traits<T>::num) + " channels.");
-        if (PyBuffer_IsContiguous(&pybuf, 'C'))
-            data = (const T*)pybuf.buf;
-        else
-        {
-            contig_buf.resize(pybuf.len);
-            if (PyBuffer_ToContiguous(&contig_buf[0], &pybuf, pybuf.len, 'C'))
-                throw dlib::error("Can't copy numpy.ndarray to a contiguous buffer.");
-            data = &contig_buf[0];
-        }
-    }
-    catch(...)
-    {
-        PyBuffer_Release(&pybuf);
-        throw;
-    }
-    PyBuffer_Release(&pybuf);
-}
-// ----------------------------------------------------------------------------------------
-#endif // DLIB_PYTHON_NuMPY_Hh_
--- a/dlib/python/numpy_image.h
+++ b/dlib/python/numpy_image.h
@@ -3,126 +3,326 @@
 #ifndef DLIB_PYTHON_NuMPY_IMAGE_Hh_
 #define DLIB_PYTHON_NuMPY_IMAGE_Hh_
-#include "numpy.h"
+#include <dlib/algs.h>
-#include <dlib/pixel.h>
+#include <dlib/error.h>
 #include <dlib/matrix.h>
-#include <dlib/array.h>
+#include <dlib/pixel.h>
+#include <string>
+#include <memory>
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+namespace py = pybind11;
-// ----------------------------------------------------------------------------------------
-class numpy_gray_image
+namespace dlib
 {
-public:
-    numpy_gray_image() : _data(0), _nr(0), _nc(0) {}
+// ----------------------------------------------------------------------------------------
-    numpy_gray_image (py::object& img) 
+    template <
+        typename pixel_type
+        >
+    bool is_image (
+        const py::array& obj
+    )
+    /*!
+        ensures
+            - returns true if and only if the given python numpy array can reasonably be
+              interpreted as an image containing pixel_type pixels.
+    !*/
    {
-        long shape[2];
+        using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
-        get_numpy_ndarray_parts(img, _data, _contig_buf, shape);
+        constexpr size_t channels = pixel_traits<pixel_type>::num;
-        _nr = shape[0];
-        _nc = shape[1];
+        return obj.dtype().kind() == py::dtype::of<basic_pixel_type>().kind() && 
+               obj.itemsize() == sizeof(basic_pixel_type) && 
+               obj.ndim() == pixel_traits<pixel_type>::num;
    }
-    friend inline long num_rows(const numpy_gray_image& img) { return img._nr; } 
+// ----------------------------------------------------------------------------------------
-    friend inline long num_columns(const numpy_gray_image& img) { return img._nc; } 
-    friend inline void* image_data(numpy_gray_image& img) { return img._data; } 
-    friend inline const void* image_data(const numpy_gray_image& img) { return img._data; }
-    friend inline long width_step(const numpy_gray_image& img) { return img._nc*sizeof(unsigned char); }
-private:
+    template <
+        typename pixel_type
+        >
+    void assert_correct_num_channels_in_image (
+        const py::array& img
+    )
+    {
+        const size_t expected_channels = pixel_traits<pixel_type>::num;
+        if (expected_channels == 1)
+        {
+            if (img.ndim() != 2)
+                throw dlib::error("Expected a 2D numpy array, but instead got one with " + std::to_string(img.ndim()) + " dimensions.");
+        }
+        else
+        {
+            if (img.ndim() != 3)
+            {
+                throw dlib::error("Expected a numpy array with 3 dimensions, but instead got one with " + std::to_string(img.ndim()) + " dimensions.");
+            }
+            else if (img.shape(2) != expected_channels)
+            {
+                if (pixel_traits<pixel_type>::rgb)
+                    throw dlib::error("Expected a RGB image with " + std::to_string(expected_channels) + " channels but got an image with " + std::to_string(img.shape(2)) + " channels.");
+                else
+                    throw dlib::error("Expected an image with " + std::to_string(expected_channels) + " channels but got an image with " + std::to_string(img.shape(2)) + " channels.");
+            }
+        }
+    }
-    unsigned char* _data;
+// ----------------------------------------------------------------------------------------
-    dlib::array<unsigned char> _contig_buf;
-    long _nr;
-    long _nc;
-};
-namespace dlib
+    template <
-{
+        typename pixel_type
-    template <>
+        >
-    struct image_traits<numpy_gray_image >
+    void assert_is_image (
+        const py::array& obj
+    )
    {
-        typedef unsigned char pixel_type;
+        if (!is_image<pixel_type>(obj))
+        {
+            assert_correct_num_channels_in_image<pixel_type>(obj);
+            using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
+            const char expected_type = py::dtype::of<basic_pixel_type>().kind();
+            const char got_type = obj.dtype().kind();
+            const size_t expected_size = sizeof(basic_pixel_type);
+            const size_t got_size = obj.itemsize();
+            auto toname = [](char type, size_t size) {
+                if (type == 'i' && size == 1) return "int8";
+                else if (type == 'i' && size == 2) return "int16";
+                else if (type == 'i' && size == 4) return "int32";
+                else if (type == 'i' && size == 8) return "int64";
+                else if (type == 'u' && size == 1) return "uint8";
+                else if (type == 'u' && size == 2) return "uint16";
+                else if (type == 'u' && size == 4) return "uint32";
+                else if (type == 'u' && size == 8) return "uint64";
+                else if (type == 'f' && size == 4) return "float32";
+                else if (type == 'd' && size == 8) return "float64";
+                else DLIB_CASSERT(false, "unknown type");
+            };
+            throw dlib::error("Expected numpy array with elements of type " + std::string(toname(expected_type,expected_size)) + " but got " + toname(got_type, got_size) + ".");
+        }
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename pixel_type
+        >
+    class numpy_image : public py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type>
+    {
+        /*!
+            REQUIREMENTS ON pixel_type
+                - is a dlib pixel type, this just means that dlib::pixel_traits<pixel_type>
+                  is defined.
+            WHAT THIS OBJECT REPRESENTS
+                This is an image object that implements dlib's generic image interface and
+                is backed by a numpy array.  It therefore is easily interchanged with
+                python since there is no copying.  It is functionally just a pybind11
+                array_t object with the additional routines needed to conform to dlib's
+                generic image API.  It also includes appropriate runtime checks to make
+                sure that the numpy array is always typed and sized appropriately relative
+                to the supplied pixel_type. 
+        !*/
+    public:
+        numpy_image() = default;
+        numpy_image(
+            py::array& img
+        ) : py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type>(img)
+        {
+            assert_is_image<pixel_type>(img);
+        }
+        numpy_image& operator= (
+            const py::object& rhs
+        )
+        {
+            *this = rhs.cast<py::array>();
+            return *this;
+        }
+        numpy_image& operator= (
+            const py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type>& rhs
+        )
+        {
+            assert_is_image<pixel_type>(rhs);
+            py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type>::operator=(rhs);
+            return *this;
+        }
+        numpy_image& operator= (
+            matrix<pixel_type>&& rhs
+        )
+        {
+            *this = convert_to_numpy(std::move(rhs));
+            return *this;
+        }
+        void set_size(size_t rows, size_t cols)
+        {
+            using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
+            constexpr size_t channels = pixel_traits<pixel_type>::num;
+            *this = py::array_t<basic_pixel_type>({rows, cols, channels});
+        }
+    private:
+        static py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type> convert_to_numpy(matrix<pixel_type>&& img)
+        {
+            using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
+            const size_t dtype_size = sizeof(basic_pixel_type);
+            const auto rows = static_cast<const size_t>(num_rows(img));
+            const auto cols = static_cast<const size_t>(num_columns(img));
+            const size_t channels = pixel_traits<pixel_type>::num;
+            const size_t image_size = dtype_size * rows * cols * channels;
+            std::unique_ptr<pixel_type[]> arr_ptr = img.steal_memory();
+            basic_pixel_type* arr = (basic_pixel_type *) arr_ptr.release();
+            return pybind11::template array_t<basic_pixel_type>(
+                {rows, cols, channels},                                                     // shape
+                {dtype_size * cols * channels, dtype_size * channels, dtype_size},          // strides
+                arr,                                                                        // pointer
+                pybind11::capsule{ arr, [](void *arr_p) { delete[] reinterpret_cast<basic_pixel_type*>(arr_p); } }
+            );
+        }
    };
-}
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//                          BORING IMPLEMENTATION STUFF
+// ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------
-inline bool is_gray_python_image (py::object& img)
+    template <typename pixel_type>
-{
+    long num_rows(const numpy_image<pixel_type>& img)
-    try
+    {
+        if (img.size()==0)
+            return 0;
+        assert_correct_num_channels_in_image<pixel_type>(img);
+        return img.shape(0);
+    }
+    template <typename pixel_type>
+    long num_columns(const numpy_image<pixel_type>& img)
    {
-        long shape[2];
+        if (img.size()==0)
-        get_numpy_ndarray_shape(img, shape);
+            return 0;
-        return true;
+        assert_correct_num_channels_in_image<pixel_type>(img);
+        return img.shape(1);
    }
-    catch (dlib::error&)
+    template <typename pixel_type>
+    void set_image_size(numpy_image<pixel_type>& img, size_t rows, size_t cols)
    {
-        return false;
+        img.set_size(rows, cols);
    }
-}
-// ----------------------------------------------------------------------------------------
+    template <typename pixel_type>
+    void* image_data(numpy_image<pixel_type>& img)
+    {
+        if (img.size()==0)
+            return 0;
-class numpy_rgb_image
+        assert_is_image<pixel_type>(img);
-{
+        return img.mutable_data(0);
-public:
+    }
-    numpy_rgb_image() : _data(0), _nr(0), _nc(0) {}
+    template <typename pixel_type>
-    numpy_rgb_image (py::object& img) 
+    const void* image_data (const numpy_image<pixel_type>& img)
    {
-        long shape[3];
+        if (img.size()==0)
-        get_numpy_ndarray_parts(img, _data, _contig_buf, shape);
+            return 0;
-        _nr = shape[0];
-        _nc = shape[1];
+        assert_is_image<pixel_type>(img);
-        if (shape[2] != 3)
+        return img.data(0);
-            throw dlib::error("Error, python object is not a three band image and therefore can't be a RGB image.");
    }
-    friend inline long num_rows(const numpy_rgb_image& img) { return img._nr; } 
+    template <typename pixel_type>
-    friend inline long num_columns(const numpy_rgb_image& img) { return img._nc; } 
+    long width_step (const numpy_image<pixel_type>& img)
-    friend inline void* image_data(numpy_rgb_image& img) { return img._data; } 
+    {
-    friend inline const void* image_data(const numpy_rgb_image& img) { return img._data; }
+        if (img.size()==0)
-    friend inline long width_step(const numpy_rgb_image& img) { return img._nc*sizeof(dlib::rgb_pixel); }
+            return 0;
+        assert_correct_num_channels_in_image<pixel_type>(img);
+        using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
+        if (img.strides(2) != sizeof(basic_pixel_type))
+            throw dlib::error("The stride of the 3rd dimension (the channel dimension) of the numpy array must be " + std::to_string(sizeof(basic_pixel_type)));
+        if (img.strides(1) != sizeof(pixel_type))
+            throw dlib::error("The stride of the 2nd dimension (the columns dimension) of the numpy array must be " + std::to_string(sizeof(pixel_type)));
+        return img.strides(0);
+    }
-private:
+    template <typename pixel_type>
+    void swap(numpy_image<pixel_type>& a, numpy_image<pixel_type>& b)
+    {
+        std::swap(a,b);
+    }
-    dlib::rgb_pixel* _data;
-    dlib::array<dlib::rgb_pixel> _contig_buf;
-    long _nr;
-    long _nc;
-};
-namespace dlib
+    template <typename T> 
-{
+    struct image_traits<numpy_image<T>>
-    template <>
-    struct image_traits<numpy_rgb_image >
    {
-        typedef rgb_pixel pixel_type;
+        typedef T pixel_type;
    };
 }
 // ----------------------------------------------------------------------------------------
+namespace pybind11
-inline bool is_rgb_python_image (py::object& img)
 {
-    try
+    namespace detail
-    {
-        long shape[3];
-        get_numpy_ndarray_shape(img, shape);
-        if (shape[2] == 3)
-            return true;
-        return false;
-    }
-    catch (dlib::error&)
    {
-        return false;
+        template <typename pixel_type> struct handle_type_name<dlib::numpy_image<pixel_type>> 
+        {
+            using basic_pixel_type = typename dlib::pixel_traits<pixel_type>::basic_pixel_type;
+            static PYBIND11_DESCR name() {
+                constexpr size_t channels = dlib::pixel_traits<pixel_type>::num;
+                if (channels == 1)
+                    return _("numpy.ndarray[(rows,cols),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
+                else if (channels == 2)
+                    return _("numpy.ndarray[(rows,cols,2),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
+                else if (channels == 3)
+                    return _("numpy.ndarray[(rows,cols,3),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
+                else if (channels == 4)
+                    return _("numpy.ndarray[(rows,cols,4),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
+                else
+                    DLIB_CASSERT(false,"unsupported pixel type");
+            }
+        };
+        template <typename pixel_type>
+        struct pyobject_caster<dlib::numpy_image<pixel_type>> {
+            using type = dlib::numpy_image<pixel_type>;
+            bool load(handle src, bool convert) {
+                if (!convert && !type::check_(src))
+                    return false;
+                value = type::ensure(src);
+                return static_cast<bool>(value);
+            }
+            static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) {
+                return src.inc_ref();
+            }
+            PYBIND11_TYPE_CASTER(type, handle_type_name<type>::name());
+        };
    }
 }
 // ----------------------------------------------------------------------------------------
 #endif // DLIB_PYTHON_NuMPY_IMAGE_Hh_

--- a/tools/python/CMakeLists.txt
+++ b/tools/python/CMakeLists.txt
@@ -6,7 +6,7 @@ set(USE_SSE4_INSTRUCTIONS ON CACHE BOOL "Use SSE4 instructions")
 # Set this to disable link time optimization. The only reason for
 # doing this to make the compile faster which is nice when developing
 # new modules.
-#set(PYBIND11_LTO_CXX_FLAGS "")
+set(PYBIND11_LTO_CXX_FLAGS "")
 # Avoid cmake warnings about changes in behavior of some Mac OS X path 

--- a/tools/python/src/cnn_face_detector.cpp
+++ b/tools/python/src/cnn_face_detector.cpp
@@ -26,7 +26,7 @@ public:
    }
    std::vector<mmod_rect> detect (
-        py::object pyimage,
+        py::array pyimage,
        const int upsample_num_times
    )
    {
@@ -35,10 +35,10 @@ public:
        // Copy the data into dlib based objects
        matrix<rgb_pixel> image;
-        if (is_gray_python_image(pyimage))
+        if (is_image<unsigned char>(pyimage))
-            assign_image(image, numpy_gray_image(pyimage));
+            assign_image(image, numpy_image<unsigned char>(pyimage));
-        else if (is_rgb_python_image(pyimage))
+        else if (is_image<rgb_pixel>(pyimage))
-            assign_image(image, numpy_rgb_image(pyimage));
+            assign_image(image, numpy_image<rgb_pixel>(pyimage));
        else
            throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
@@ -63,25 +63,25 @@ public:
        return rects;
    }
-    std::vector<std::vector<mmod_rect> > detect_mult (
+    std::vector<std::vector<mmod_rect>> detect_mult (
        py::list imgs,
        const int upsample_num_times,
        const int batch_size = 128
    )
    {
        pyramid_down<2> pyr;
-        std::vector<matrix<rgb_pixel> > dimgs;
+        std::vector<matrix<rgb_pixel>> dimgs;
        dimgs.reserve(len(imgs));
        for(int i = 0; i < len(imgs); i++)
        {
            // Copy the data into dlib based objects
            matrix<rgb_pixel> image;
-            py::object tmp = imgs[i].cast<py::object>();
+            py::array tmp = imgs[i].cast<py::array>();
-            if (is_gray_python_image(tmp))
+            if (is_image<unsigned char>(tmp))
-                assign_image(image, numpy_gray_image(tmp));
+                assign_image(image, numpy_image<unsigned char>(tmp));
-            else if (is_rgb_python_image(tmp))
+            else if (is_image<rgb_pixel>(tmp))
-                assign_image(image, numpy_rgb_image(tmp));
+                assign_image(image, numpy_image<rgb_pixel>(tmp));
            else
                throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
@@ -89,16 +89,12 @@ public:
            {
                pyramid_up(image);
            }
-            dimgs.push_back(image);
+            dimgs.emplace_back(std::move(image));
        }
        for(int i = 1; i < dimgs.size(); i++)
        {
-            if
+            if (dimgs[i - 1].nc() != dimgs[i].nc() || dimgs[i - 1].nr() != dimgs[i].nr())
-            (
-                dimgs[i - 1].nc() != dimgs[i].nc() ||
-                dimgs[i - 1].nr() != dimgs[i].nr()
-            )
                throw dlib::error("Images in list must all have the same dimensions.");
        }        

--- a/tools/python/src/conversion.h
+++ b/tools/python/src/conversion.h
@@ -6,46 +6,32 @@
 #include "opaque_types.h"
 #include <dlib/python.h>
 #include <dlib/pixel.h>
+#include <dlib/python/numpy_image.h>
 using namespace dlib;
 using namespace std;
 namespace py = pybind11;
-template <typename dest_image_type>
-void pyimage_to_dlib_image(py::object img, dest_image_type& image)
-{
-    if (is_gray_python_image(img))
-        assign_image(image, numpy_gray_image(img));
-    else if (is_rgb_python_image(img))
-        assign_image(image, numpy_rgb_image(img));
-    else
-        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
-}
 template <typename image_array, typename param_type>
 void images_and_nested_params_to_dlib(
        const py::object& pyimages,
        const py::object& pyparams,
        image_array& images,
-        std::vector<std::vector<param_type> >& params
+        std::vector<std::vector<param_type>>& params
 )
 {
    // Now copy the data into dlib based objects.
    py::iterator image_it = pyimages.begin();
    py::iterator params_it = pyparams.begin();
-    for (unsigned long image_idx = 0;
+    for (unsigned long image_idx = 0; image_it != pyimages.end() && params_it != pyparams.end(); ++image_it, ++params_it, ++image_idx)
-         image_it != pyimages.end()
-           && params_it != pyparams.end();
-         ++image_it, ++params_it, ++image_idx)
    {
-        for (py::iterator param_it = params_it->begin();
+        for (py::iterator param_it = params_it->begin(); param_it != params_it->end(); ++param_it)
-             param_it != params_it->end();
+            params[image_idx].push_back(param_it->cast<param_type>());
-             ++param_it)
-          params[image_idx].push_back(param_it->cast<param_type>());
-        pyimage_to_dlib_image(image_it->cast<py::object>(), images[image_idx]);
+        images[image_idx] = image_it->cast<py::object>();
    }
 }

--- a/tools/python/src/correlation_tracker.cpp
+++ b/tools/python/src/correlation_tracker.cpp
@@ -15,17 +15,17 @@ namespace py = pybind11;
 void start_track (
    correlation_tracker& tracker,
-    py::object img,
+    py::array img,
    const drectangle& bounding_box
 )
 {
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
    {
-        tracker.start_track(numpy_gray_image(img), bounding_box);
+        tracker.start_track(numpy_image<unsigned char>(img), bounding_box);
    }
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
    {
-        tracker.start_track(numpy_rgb_image(img), bounding_box);
+        tracker.start_track(numpy_image<rgb_pixel>(img), bounding_box);
    }
    else
    {
@@ -35,7 +35,7 @@ void start_track (
 void start_track_rec (
    correlation_tracker& tracker,
-    py::object img,
+    py::array img,
    const rectangle& bounding_box
 )
 {
@@ -45,16 +45,16 @@ void start_track_rec (
 double update (
    correlation_tracker& tracker,
-    py::object img
+    py::array img
 )
 {
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
    {
-        return tracker.update(numpy_gray_image(img));
+        return tracker.update(numpy_image<unsigned char>(img));
    }
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
    {
-        return tracker.update(numpy_rgb_image(img));
+        return tracker.update(numpy_image<rgb_pixel>(img));
    }
    else
    {
@@ -64,17 +64,17 @@ double update (
 double update_guess (
    correlation_tracker& tracker,
-    py::object img,
+    py::array img,
    const drectangle& bounding_box
 )
 {
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
    {
-        return tracker.update(numpy_gray_image(img), bounding_box);
+        return tracker.update(numpy_image<unsigned char>(img), bounding_box);
    }
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
    {
-        return tracker.update(numpy_rgb_image(img), bounding_box);
+        return tracker.update(numpy_image<rgb_pixel>(img), bounding_box);
    }
    else
    {
@@ -84,7 +84,7 @@ double update_guess (
 double update_guess_rec (
    correlation_tracker& tracker,
-    py::object img,
+    py::array img,
    const rectangle& bounding_box
 )
 {

--- a/tools/python/src/face_recognition.cpp
+++ b/tools/python/src/face_recognition.cpp
@@ -32,7 +32,7 @@ public:
    }
    matrix<double,0,1> compute_face_descriptor (
-        py::object img,
+        numpy_image<rgb_pixel> img,
        const full_object_detection& face,
        const int num_jitters
    )
@@ -42,13 +42,11 @@ public:
    }
    std::vector<matrix<double,0,1>> compute_face_descriptors (
-        py::object img,
+        numpy_image<rgb_pixel> img,
        const std::vector<full_object_detection>& faces,
        const int num_jitters
    )
    {
-        if (!is_rgb_python_image(img))
-            throw dlib::error("Unsupported image type, must be RGB image.");
        for (auto& f : faces)
        {
@@ -61,7 +59,7 @@ public:
        for (auto& f : faces)
            dets.push_back(get_face_chip_details(f, 150, 0.25));
        dlib::array<matrix<rgb_pixel>> face_chips;
-        extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+        extract_image_chips(img, dets, face_chips);
        std::vector<matrix<double,0,1>> face_descriptors;
        face_descriptors.reserve(face_chips.size());
@@ -161,22 +159,20 @@ py::list chinese_whispers_clustering(py::list descriptors, float threshold)
 }
 void save_face_chips (
-    py::object img,
+    numpy_image<rgb_pixel> img,
    const std::vector<full_object_detection>& faces,
    const std::string& chip_filename,
    size_t size = 150,
    float padding = 0.25
 )
 {
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");
    int num_faces = faces.size();
    std::vector<chip_details> dets;
    for (auto& f : faces)
        dets.push_back(get_face_chip_details(f, size, padding));
    dlib::array<matrix<rgb_pixel>> face_chips;
-    extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+    extract_image_chips(numpy_image<rgb_pixel>(img), dets, face_chips);
    int i=0;
    for (auto& chip : face_chips) 
    {
@@ -195,7 +191,7 @@ void save_face_chips (
 }
 void save_face_chip (
-    py::object img,
+    numpy_image<rgb_pixel> img,
    const full_object_detection& face,
    const std::string& chip_filename,
    size_t size = 150,
@@ -204,7 +200,6 @@ void save_face_chip (
 {
    std::vector<full_object_detection> faces(1, face);
    save_face_chips(img, faces, chip_filename, size, padding);
-    return;
 }
 void bind_face_recognition(py::module &m)

--- a/tools/python/src/gui.cpp
+++ b/tools/python/src/gui.cpp
@@ -35,13 +35,13 @@ void image_window_set_image_simple_detector_py (
 void image_window_set_image (
    image_window& win,
-    py::object img
+    py::array img
 )
 {
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
-        return win.set_image(numpy_gray_image(img));
+        return win.set_image(numpy_image<unsigned char>(img));
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
-        return win.set_image(numpy_rgb_image(img));
+        return win.set_image(numpy_image<rgb_pixel>(img));
    else
        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
 }
@@ -74,14 +74,14 @@ void add_overlay_parts (
    win.add_overlay(render_face_detections(detection, color));
 }
-std::shared_ptr<image_window> make_image_window_from_image(py::object img)
+std::shared_ptr<image_window> make_image_window_from_image(py::array img)
 {
    auto win = std::make_shared<image_window>();
    image_window_set_image(*win, img);
    return win;
 }
-std::shared_ptr<image_window> make_image_window_from_image_and_title(py::object img, const string& title)
+std::shared_ptr<image_window> make_image_window_from_image_and_title(py::array img, const string& title)
 {
    auto win = std::make_shared<image_window>();
    image_window_set_image(*win, img);

--- a/tools/python/src/numpy_returns.cpp
+++ b/tools/python/src/numpy_returns.cpp
@@ -10,36 +10,14 @@ using namespace std;
 namespace py = pybind11;
-py::array_t<uint8_t> convert_to_numpy(matrix<rgb_pixel>&& rgb_image)
-{
-    const size_t dtype_size = sizeof(uint8_t);
-    const auto rows = static_cast<const size_t>(num_rows(rgb_image));
-    const auto cols = static_cast<const size_t>(num_columns(rgb_image));
-    const size_t channels = 3;
-    const size_t image_size = dtype_size * rows * cols * channels;
-    unique_ptr<rgb_pixel[]> arr_ptr = rgb_image.steal_memory();
-    uint8_t* arr = (uint8_t *) arr_ptr.release();
-    return pybind11::array_t<uint8_t>(
-        {rows, cols, channels},                                                     // shape
-        {dtype_size * cols * channels, dtype_size * channels, dtype_size},          // strides
-        arr,                                                                        // pointer
-        pybind11::capsule{
-            arr, [](void *arr_p) {
-                delete[] reinterpret_cast<uint8_t *>(arr_p);
-            }
-        }
-    );
-}
 // -------------------------------- Basic Image IO ----------------------------------------
-py::array_t<uint8_t> load_rgb_image (const std::string &path)
+numpy_image<rgb_pixel> load_rgb_image (const std::string &path)
 {
-    matrix<rgb_pixel> img;
+    numpy_image<rgb_pixel> img;
    load_image(img, path);
-    return convert_to_numpy(std::move(img));
+    return img; 
 }
 bool has_ending (std::string const full_string, std::string const &ending) {
@@ -50,22 +28,21 @@ bool has_ending (std::string const full_string, std::string const &ending) {
    }
 }
-void save_rgb_image(py::object img, const std::string &path)
+// ----------------------------------------------------------------------------------------
-{
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");
+void save_rgb_image(numpy_image<rgb_pixel> img, const std::string &path)
+{
    std::string lowered_path = path;
    std::transform(lowered_path.begin(), lowered_path.end(), lowered_path.begin(), ::tolower);
    if(has_ending(lowered_path, ".bmp")) {
-        save_bmp(numpy_rgb_image(img), path);
+        save_bmp(img, path);
    } else if(has_ending(lowered_path, ".dng")) {
-        save_dng(numpy_rgb_image(img), path);
+        save_dng(img, path);
    } else if(has_ending(lowered_path, ".png")) {
-        save_png(numpy_rgb_image(img), path);
+        save_png(img, path);
    } else if(has_ending(lowered_path, ".jpg") || has_ending(lowered_path, ".jpeg")) {
-        save_jpeg(numpy_rgb_image(img), path);
+        save_jpeg(img, path);
    } else {
        throw dlib::error("Unsupported image type, image path must end with one of [.bmp, .png, .dng, .jpg, .jpeg]");
    }
@@ -74,31 +51,22 @@ void save_rgb_image(py::object img, const std::string &path)
 // ----------------------------------------------------------------------------------------
-py::list get_jitter_images(py::object img, size_t num_jitters = 1, bool disturb_colors = false)
+py::list get_jitter_images(numpy_image<rgb_pixel> img, size_t num_jitters = 1, bool disturb_colors = false)
 {
    static dlib::rand rnd_jitter;
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");
-    // Convert the image to matrix<rgb_pixel> for processing
-    matrix<rgb_pixel> img_mat;
-    assign_image(img_mat, numpy_rgb_image(img));
    // The top level list (containing 1 or more images) to return to python
    py::list jitter_list;
    for (int i = 0; i < num_jitters; ++i) {
        // Get a jittered crop
-        matrix<rgb_pixel> crop = dlib::jitter_image(img_mat, rnd_jitter);
+        numpy_image<rgb_pixel> crop = dlib::jitter_image(img, rnd_jitter);
        // If required disturb colors of the image
        if(disturb_colors)
            dlib::disturb_colors(crop, rnd_jitter);
-        // Convert image to Numpy array
-        py::array_t<uint8_t> arr = convert_to_numpy(std::move(crop));
        // Append image to jittered image list
-        jitter_list.append(arr);
+        jitter_list.append(crop);
    }
    return jitter_list;
@@ -107,14 +75,12 @@ py::list get_jitter_images(py::object img, size_t num_jitters = 1, bool disturb_
 // ----------------------------------------------------------------------------------------
 py::list get_face_chips (
-    py::object img,
+    numpy_image<rgb_pixel> img,
    const std::vector<full_object_detection>& faces,
    size_t size = 150,
    float padding = 0.25
 )
 {
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");
    if (faces.size() < 1) {
        throw dlib::error("No face were specified in the faces array.");
@@ -125,33 +91,27 @@ py::list get_face_chips (
    std::vector<chip_details> dets;
    for (auto& f : faces)
        dets.push_back(get_face_chip_details(f, size, padding));
-    dlib::array<matrix<rgb_pixel>> face_chips;
+    dlib::array<numpy_image<rgb_pixel>> face_chips;
-    extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+    extract_image_chips(img, dets, face_chips);
    for (auto& chip : face_chips) 
    {
-        // Convert image to Numpy array
-        py::array_t<uint8_t> arr = convert_to_numpy(std::move(chip));
        // Append image to chips list
-        chips_list.append(arr);
+        chips_list.append(chip);
    }
    return chips_list;
 }
-py::array_t<uint8_t> get_face_chip (
+numpy_image<rgb_pixel> get_face_chip (
-    py::object img,
+    numpy_image<rgb_pixel> img,
    const full_object_detection& face,
    size_t size = 150,
    float padding = 0.25
 )
 {
-    if (!is_rgb_python_image(img))
+    numpy_image<rgb_pixel> chip;
-        throw dlib::error("Unsupported image type, must be RGB image.");
+    extract_image_chip(img, get_face_chip_details(face, size, padding), chip);
+    return chip;
-    matrix<rgb_pixel> chip;
-    extract_image_chip(numpy_rgb_image(img), get_face_chip_details(face, size, padding), chip);
-    return convert_to_numpy(std::move(chip));
 }
 // ----------------------------------------------------------------------------------------

--- a/tools/python/src/object_detection.cpp
+++ b/tools/python/src/object_detection.cpp
@@ -37,8 +37,8 @@ inline simple_object_detector_py train_simple_object_detector_on_images_py (
        throw dlib::error("The length of the boxes list must match the length of the images list.");
    // We never have any ignore boxes for this version of the API.
-    std::vector<std::vector<rectangle> > ignore(num_images), boxes(num_images);
+    std::vector<std::vector<rectangle>> ignore(num_images), boxes(num_images);
-    dlib::array<array2d<rgb_pixel> > images(num_images);
+    dlib::array<numpy_image<rgb_pixel>> images(num_images);
    images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes);
    return train_simple_object_detector_on_images("", images, boxes, ignore, options);
@@ -56,8 +56,8 @@ inline simple_test_results test_simple_object_detector_with_images_py (
        throw dlib::error("The length of the boxes list must match the length of the images list.");
    // We never have any ignore boxes for this version of the API.
-    std::vector<std::vector<rectangle> > ignore(num_images), boxes(num_images);
+    std::vector<std::vector<rectangle>> ignore(num_images), boxes(num_images);
-    dlib::array<array2d<rgb_pixel> > images(num_images);
+    dlib::array<numpy_image<rgb_pixel>> images(num_images);
    images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes);
    return test_simple_object_detector_with_images(images, upsampling_amount, boxes, ignore, detector);
@@ -86,22 +86,13 @@ inline simple_test_results test_simple_object_detector_py_with_images_py (
 // ----------------------------------------------------------------------------------------
 inline void find_candidate_object_locations_py (
-    py::object pyimage,
+    py::array pyimage,
    py::list& pyboxes,
    py::tuple pykvals,
    unsigned long min_size,
    unsigned long max_merging_iterations
 )
 {
-    // Copy the data into dlib based objects
-    array2d<rgb_pixel> image;
-    if (is_gray_python_image(pyimage))
-        assign_image(image, numpy_gray_image(pyimage));
-    else if (is_rgb_python_image(pyimage))
-        assign_image(image, numpy_rgb_image(pyimage));
-    else
-        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
    if (py::len(pykvals) != 3)
        throw dlib::error("kvals must be a tuple with three elements for start, end, num.");
@@ -117,7 +108,12 @@ inline void find_candidate_object_locations_py (
    for (long i = 0; i < count; ++i)
        rects.push_back(pyboxes[i].cast<rectangle>());
    // Find candidate objects
-    find_candidate_object_locations(image, rects, kvals, min_size, max_merging_iterations);
+    if (is_image<unsigned char>(pyimage))
+        find_candidate_object_locations(numpy_image<unsigned char>(pyimage), rects, kvals, min_size, max_merging_iterations);
+    else if (is_image<rgb_pixel>(pyimage))
+        find_candidate_object_locations(numpy_image<rgb_pixel>(pyimage), rects, kvals, min_size, max_merging_iterations);
+    else
+        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
    // Collect boxes containing candidate objects
    std::vector<rectangle>::iterator iter;

--- a/tools/python/src/shape_predictor.cpp
+++ b/tools/python/src/shape_predictor.cpp
@@ -17,18 +17,18 @@ namespace py = pybind11;
 full_object_detection run_predictor (
        shape_predictor& predictor,
-        py::object img,
+        py::array img,
        py::object rect
 )
 {
    rectangle box = rect.cast<rectangle>();
-    if (is_gray_python_image(img))
+    if (is_image<unsigned char>(img))
    {
-        return predictor(numpy_gray_image(img), box);
+        return predictor(numpy_image<unsigned char>(img), box);
    }
-    else if (is_rgb_python_image(img))
+    else if (is_image<rgb_pixel>(img))
    {
-        return predictor(numpy_rgb_image(img), box);
+        return predictor(numpy_image<rgb_pixel>(img), box);
    }
    else
    {
@@ -97,7 +97,7 @@ inline shape_predictor train_shape_predictor_on_images_py (
        throw dlib::error("The length of the detections list must match the length of the images list.");
    std::vector<std::vector<full_object_detection> > detections(num_images);
-    dlib::array<array2d<unsigned char> > images(num_images);
+    dlib::array<numpy_image<unsigned char>> images(num_images);
    images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);
    return train_shape_predictor_on_images(images, detections, options);
@@ -123,7 +123,7 @@ inline double test_shape_predictor_with_images_py (
    std::vector<std::vector<double> > scales;
    if (num_scales > 0)
        scales.resize(num_scales);
-    dlib::array<array2d<unsigned char> > images(num_images);
+    dlib::array<numpy_image<unsigned char>> images(num_images);
    // Now copy the data into dlib based objects so we can call the testing routine.
    for (unsigned long i = 0; i < num_images; ++i)
@@ -134,14 +134,12 @@ inline double test_shape_predictor_with_images_py (
             ++det_it)
          detections[i].push_back(det_it->cast<full_object_detection>());
-        pyimage_to_dlib_image(pyimages[i], images[i]);
+        images[i] = pyimages[i];
        if (num_scales > 0)
        {
            if (num_boxes != py::len(pyscales[i]))
                throw dlib::error("The length of the scales list must match the length of the detections list.");
-            for (py::iterator scale_it = pyscales[i].begin();
+            for (py::iterator scale_it = pyscales[i].begin(); scale_it != pyscales[i].end(); ++scale_it)
-                 scale_it != pyscales[i].end();
-                 ++scale_it)
                scales[i].push_back(scale_it->cast<double>());
        }
    }

--- a/tools/python/src/simple_object_detector_py.h
+++ b/tools/python/src/simple_object_detector_py.h
@@ -37,7 +37,7 @@ namespace dlib
    inline std::vector<dlib::rectangle> run_detector_with_upscale1 (
        dlib::simple_object_detector& detector,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount,
        const double adjust_threshold,
        std::vector<double>& detection_confidences,
@@ -49,19 +49,19 @@ namespace dlib
        std::vector<rectangle> rectangles;
        std::vector<rect_detection> rect_detections;
-        if (is_gray_python_image(img))
+        if (is_image<unsigned char>(img))
        {
            array2d<unsigned char> temp;
            if (upsampling_amount == 0)
            {
-                detector(numpy_gray_image(img), rect_detections, adjust_threshold);
+                detector(numpy_image<unsigned char>(img), rect_detections, adjust_threshold);
                split_rect_detections(rect_detections, rectangles,
                                      detection_confidences, weight_indices);
                return rectangles;
            }
            else
            {
-                pyramid_up(numpy_gray_image(img), temp, pyr);
+                pyramid_up(numpy_image<unsigned char>(img), temp, pyr);
                unsigned int levels = upsampling_amount-1;
                while (levels > 0)
                {
@@ -79,19 +79,19 @@ namespace dlib
                return rectangles;
            }
        }
-        else if (is_rgb_python_image(img))
+        else if (is_image<rgb_pixel>(img))
        {
            array2d<rgb_pixel> temp;
            if (upsampling_amount == 0)
            {
-                detector(numpy_rgb_image(img), rect_detections, adjust_threshold);
+                detector(numpy_image<rgb_pixel>(img), rect_detections, adjust_threshold);
                split_rect_detections(rect_detections, rectangles,
                                      detection_confidences, weight_indices);
                return rectangles;
            }
            else
            {
-                pyramid_up(numpy_rgb_image(img), temp, pyr);
+                pyramid_up(numpy_image<rgb_pixel>(img), temp, pyr);
                unsigned int levels = upsampling_amount-1;
                while (levels > 0)
                {
@@ -117,7 +117,7 @@ namespace dlib
    inline std::vector<dlib::rectangle> run_detectors_with_upscale1 (
        std::vector<simple_object_detector >& detectors,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount,
        const double adjust_threshold,
        std::vector<double>& detection_confidences,
@@ -129,19 +129,19 @@ namespace dlib
        std::vector<rectangle> rectangles;
        std::vector<rect_detection> rect_detections;
-        if (is_gray_python_image(img))
+        if (is_image<unsigned char>(img))
        {
            array2d<unsigned char> temp;
            if (upsampling_amount == 0)
            {
-                evaluate_detectors(detectors, numpy_gray_image(img), rect_detections, adjust_threshold);
+                evaluate_detectors(detectors, numpy_image<unsigned char>(img), rect_detections, adjust_threshold);
                split_rect_detections(rect_detections, rectangles,
                                      detection_confidences, weight_indices);
                return rectangles;
            }
            else
            {
-                pyramid_up(numpy_gray_image(img), temp, pyr);
+                pyramid_up(numpy_image<unsigned char>(img), temp, pyr);
                unsigned int levels = upsampling_amount-1;
                while (levels > 0)
                {
@@ -159,19 +159,19 @@ namespace dlib
                return rectangles;
            }
        }
-        else if (is_rgb_python_image(img))
+        else if (is_image<rgb_pixel>(img))
        {
            array2d<rgb_pixel> temp;
            if (upsampling_amount == 0)
            {
-                evaluate_detectors(detectors, numpy_rgb_image(img), rect_detections, adjust_threshold);
+                evaluate_detectors(detectors, numpy_image<rgb_pixel>(img), rect_detections, adjust_threshold);
                split_rect_detections(rect_detections, rectangles,
                                      detection_confidences, weight_indices);
                return rectangles;
            }
            else
            {
-                pyramid_up(numpy_rgb_image(img), temp, pyr);
+                pyramid_up(numpy_image<rgb_pixel>(img), temp, pyr);
                unsigned int levels = upsampling_amount-1;
                while (levels > 0)
                {
@@ -197,7 +197,7 @@ namespace dlib
    inline std::vector<dlib::rectangle> run_detector_with_upscale2 (
        dlib::simple_object_detector& detector,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount
    )
@@ -213,7 +213,7 @@ namespace dlib
    inline py::tuple run_rect_detector (
        dlib::simple_object_detector& detector,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount,
        const double adjust_threshold)
    {
@@ -234,7 +234,7 @@ namespace dlib
    inline py::tuple run_multiple_rect_detectors (
        py::list& detectors,
-        py::object img,
+        py::array img,
        const unsigned int upsampling_amount,
        const double adjust_threshold)
    {
@@ -272,13 +272,13 @@ namespace dlib
        simple_object_detector_py(simple_object_detector& _detector, unsigned int _upsampling_amount) :
            detector(_detector), upsampling_amount(_upsampling_amount) {}
-        std::vector<dlib::rectangle> run_detector1 (py::object img,
+        std::vector<dlib::rectangle> run_detector1 (py::array img,
                                                    const unsigned int upsampling_amount_)
        {
            return run_detector_with_upscale2(detector, img, upsampling_amount_);
        }
-        std::vector<dlib::rectangle> run_detector2 (py::object img)
+        std::vector<dlib::rectangle> run_detector2 (py::array img)
        {
            return run_detector_with_upscale2(detector, img, upsampling_amount);
        }