Commit 43e5f42e authored by Davis King's avatar Davis King

Added numpy_image templated class that makes a numpy array

conform to dlib's generic image interface.  This makes dealing with
numpy images in pybind11 modules much nicer.
parent f6651c2c
......@@ -6,7 +6,6 @@
#include "python/pybind_utils.h"
#include "python/pyassert.h"
#include "python/serialize_pickle.h"
#include "python/numpy.h"
#include "python/numpy_image.h"
#endif // DLIB_PYTHoN_TOP_
......
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_PYTHON_NuMPY_Hh_
#define DLIB_PYTHON_NuMPY_Hh_
#include <pybind11/pybind11.h>
#include <dlib/error.h>
#include <dlib/algs.h>
#include <dlib/string.h>
#include <dlib/array.h>
#include <dlib/pixel.h>
namespace py = pybind11;
// ----------------------------------------------------------------------------------------
template <typename TT>
void validate_numpy_array_type (
const py::object& obj
)
{
const char ch = obj.attr("dtype").attr("char").cast<char>();
using T = typename dlib::pixel_traits<TT>::basic_pixel_type;
if (dlib::is_same_type<T,double>::value)
{
if (ch != 'd')
throw dlib::error("Expected numpy.ndarray of float64");
}
else if (dlib::is_same_type<T,float>::value)
{
if (ch != 'f')
throw dlib::error("Expected numpy.ndarray of float32");
}
else if (dlib::is_same_type<T,dlib::int16>::value)
{
if (ch != 'h')
throw dlib::error("Expected numpy.ndarray of int16");
}
else if (dlib::is_same_type<T,dlib::uint16>::value)
{
if (ch != 'H')
throw dlib::error("Expected numpy.ndarray of uint16");
}
else if (dlib::is_same_type<T,dlib::int32>::value)
{
if (ch != 'i')
throw dlib::error("Expected numpy.ndarray of int32");
}
else if (dlib::is_same_type<T,dlib::uint32>::value)
{
if (ch != 'I')
throw dlib::error("Expected numpy.ndarray of uint32");
}
else if (dlib::is_same_type<T,unsigned char>::value)
{
if (ch != 'B')
throw dlib::error("Expected numpy.ndarray of uint8");
}
else if (dlib::is_same_type<T,signed char>::value)
{
if (ch != 'b')
throw dlib::error("Expected numpy.ndarray of int8");
}
else
{
throw dlib::error("validate_numpy_array_type() called with unsupported type.");
}
}
// ----------------------------------------------------------------------------------------
template <int dims>
void get_numpy_ndarray_shape (
const py::object& obj,
long (&shape)[dims]
)
/*!
ensures
- stores the shape of the array into #shape.
- the dimension of the given numpy array is not greater than #dims.
!*/
{
Py_buffer pybuf;
if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES ))
throw dlib::error("Expected numpy.ndarray with shape set.");
try
{
if (pybuf.ndim > dims)
throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
for (int i = 0; i < dims; ++i)
{
if (i < pybuf.ndim)
shape[i] = pybuf.shape[i];
else
shape[i] = 1;
}
}
catch(...)
{
PyBuffer_Release(&pybuf);
throw;
}
PyBuffer_Release(&pybuf);
}
// ----------------------------------------------------------------------------------------
template <typename T, int dims>
void get_numpy_ndarray_parts (
py::object& obj,
T*& data,
dlib::array<T>& contig_buf,
long (&shape)[dims]
)
/*!
ensures
- extracts the pointer to the data from the given numpy ndarray. Stores the shape
of the array into #shape.
- the dimension of the given numpy array is not greater than #dims.
- #shape[#dims-1] == pixel_traits<T>::num when #dims is greater than 2
!*/
{
Py_buffer pybuf;
if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES | PyBUF_WRITABLE ))
throw dlib::error("Expected writable numpy.ndarray with shape set.");
try
{
validate_numpy_array_type<T>(obj);
if (pybuf.ndim > dims)
throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
get_numpy_ndarray_shape(obj, shape);
if (dlib::pixel_traits<T>::num > 1 && dlib::pixel_traits<T>::num != shape[dims-1])
throw dlib::error("Expected numpy.ndarray with " + dlib::cast_to_string(dlib::pixel_traits<T>::num) + " channels.");
if (PyBuffer_IsContiguous(&pybuf, 'C'))
data = (T*)pybuf.buf;
else
{
contig_buf.resize(pybuf.len);
if (PyBuffer_ToContiguous(&contig_buf[0], &pybuf, pybuf.len, 'C'))
throw dlib::error("Can't copy numpy.ndarray to a contiguous buffer.");
data = &contig_buf[0];
}
}
catch(...)
{
PyBuffer_Release(&pybuf);
throw;
}
PyBuffer_Release(&pybuf);
}
// ----------------------------------------------------------------------------------------
template <typename T, int dims>
void get_numpy_ndarray_parts (
const py::object& obj,
const T*& data,
dlib::array<T>& contig_buf,
long (&shape)[dims]
)
/*!
ensures
- extracts the pointer to the data from the given numpy ndarray. Stores the shape
of the array into #shape.
- the dimension of the given numpy array is not greater than #dims.
- #shape[#dims-1] == pixel_traits<T>::num when #dims is greater than 2
!*/
{
Py_buffer pybuf;
if (PyObject_GetBuffer(obj.ptr(), &pybuf, PyBUF_STRIDES ))
throw dlib::error("Expected numpy.ndarray with shape set.");
try
{
validate_numpy_array_type<T>(obj);
if (pybuf.ndim > dims)
throw dlib::error("Expected array with " + dlib::cast_to_string(dims) + " dimensions.");
get_numpy_ndarray_shape(obj, shape);
if (dlib::pixel_traits<T>::num > 1 && dlib::pixel_traits<T>::num != shape[dims-1])
throw dlib::error("Expected numpy.ndarray with " + dlib::cast_to_string(dlib::pixel_traits<T>::num) + " channels.");
if (PyBuffer_IsContiguous(&pybuf, 'C'))
data = (const T*)pybuf.buf;
else
{
contig_buf.resize(pybuf.len);
if (PyBuffer_ToContiguous(&contig_buf[0], &pybuf, pybuf.len, 'C'))
throw dlib::error("Can't copy numpy.ndarray to a contiguous buffer.");
data = &contig_buf[0];
}
}
catch(...)
{
PyBuffer_Release(&pybuf);
throw;
}
PyBuffer_Release(&pybuf);
}
// ----------------------------------------------------------------------------------------
#endif // DLIB_PYTHON_NuMPY_Hh_
......@@ -3,126 +3,326 @@
#ifndef DLIB_PYTHON_NuMPY_IMAGE_Hh_
#define DLIB_PYTHON_NuMPY_IMAGE_Hh_
#include "numpy.h"
#include <dlib/pixel.h>
#include <dlib/algs.h>
#include <dlib/error.h>
#include <dlib/matrix.h>
#include <dlib/array.h>
#include <dlib/pixel.h>
#include <string>
#include <memory>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
namespace py = pybind11;
// ----------------------------------------------------------------------------------------
class numpy_gray_image
namespace dlib
{
public:
numpy_gray_image() : _data(0), _nr(0), _nc(0) {}
numpy_gray_image (py::object& img)
// ----------------------------------------------------------------------------------------
template <
typename pixel_type
>
bool is_image (
const py::array& obj
)
/*!
ensures
- returns true if and only if the given python numpy array can reasonably be
interpreted as an image containing pixel_type pixels.
!*/
{
long shape[2];
get_numpy_ndarray_parts(img, _data, _contig_buf, shape);
_nr = shape[0];
_nc = shape[1];
using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
constexpr size_t channels = pixel_traits<pixel_type>::num;
return obj.dtype().kind() == py::dtype::of<basic_pixel_type>().kind() &&
obj.itemsize() == sizeof(basic_pixel_type) &&
obj.ndim() == pixel_traits<pixel_type>::num;
}
friend inline long num_rows(const numpy_gray_image& img) { return img._nr; }
friend inline long num_columns(const numpy_gray_image& img) { return img._nc; }
friend inline void* image_data(numpy_gray_image& img) { return img._data; }
friend inline const void* image_data(const numpy_gray_image& img) { return img._data; }
friend inline long width_step(const numpy_gray_image& img) { return img._nc*sizeof(unsigned char); }
// ----------------------------------------------------------------------------------------
private:
template <
typename pixel_type
>
void assert_correct_num_channels_in_image (
const py::array& img
)
{
const size_t expected_channels = pixel_traits<pixel_type>::num;
if (expected_channels == 1)
{
if (img.ndim() != 2)
throw dlib::error("Expected a 2D numpy array, but instead got one with " + std::to_string(img.ndim()) + " dimensions.");
}
else
{
if (img.ndim() != 3)
{
throw dlib::error("Expected a numpy array with 3 dimensions, but instead got one with " + std::to_string(img.ndim()) + " dimensions.");
}
else if (img.shape(2) != expected_channels)
{
if (pixel_traits<pixel_type>::rgb)
throw dlib::error("Expected a RGB image with " + std::to_string(expected_channels) + " channels but got an image with " + std::to_string(img.shape(2)) + " channels.");
else
throw dlib::error("Expected an image with " + std::to_string(expected_channels) + " channels but got an image with " + std::to_string(img.shape(2)) + " channels.");
}
}
}
unsigned char* _data;
dlib::array<unsigned char> _contig_buf;
long _nr;
long _nc;
};
// ----------------------------------------------------------------------------------------
namespace dlib
{
template <>
struct image_traits<numpy_gray_image >
template <
typename pixel_type
>
void assert_is_image (
const py::array& obj
)
{
typedef unsigned char pixel_type;
if (!is_image<pixel_type>(obj))
{
assert_correct_num_channels_in_image<pixel_type>(obj);
using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
const char expected_type = py::dtype::of<basic_pixel_type>().kind();
const char got_type = obj.dtype().kind();
const size_t expected_size = sizeof(basic_pixel_type);
const size_t got_size = obj.itemsize();
auto toname = [](char type, size_t size) {
if (type == 'i' && size == 1) return "int8";
else if (type == 'i' && size == 2) return "int16";
else if (type == 'i' && size == 4) return "int32";
else if (type == 'i' && size == 8) return "int64";
else if (type == 'u' && size == 1) return "uint8";
else if (type == 'u' && size == 2) return "uint16";
else if (type == 'u' && size == 4) return "uint32";
else if (type == 'u' && size == 8) return "uint64";
else if (type == 'f' && size == 4) return "float32";
else if (type == 'd' && size == 8) return "float64";
else DLIB_CASSERT(false, "unknown type");
};
throw dlib::error("Expected numpy array with elements of type " + std::string(toname(expected_type,expected_size)) + " but got " + toname(got_type, got_size) + ".");
}
}
// ----------------------------------------------------------------------------------------
template <
typename pixel_type
>
class numpy_image : public py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type>
{
/*!
REQUIREMENTS ON pixel_type
- is a dlib pixel type, this just means that dlib::pixel_traits<pixel_type>
is defined.
WHAT THIS OBJECT REPRESENTS
This is an image object that implements dlib's generic image interface and
is backed by a numpy array. It therefore is easily interchanged with
python since there is no copying. It is functionally just a pybind11
array_t object with the additional routines needed to conform to dlib's
generic image API. It also includes appropriate runtime checks to make
sure that the numpy array is always typed and sized appropriately relative
to the supplied pixel_type.
!*/
public:
numpy_image() = default;
numpy_image(
py::array& img
) : py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type>(img)
{
assert_is_image<pixel_type>(img);
}
numpy_image& operator= (
const py::object& rhs
)
{
*this = rhs.cast<py::array>();
return *this;
}
numpy_image& operator= (
const py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type>& rhs
)
{
assert_is_image<pixel_type>(rhs);
py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type>::operator=(rhs);
return *this;
}
numpy_image& operator= (
matrix<pixel_type>&& rhs
)
{
*this = convert_to_numpy(std::move(rhs));
return *this;
}
void set_size(size_t rows, size_t cols)
{
using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
constexpr size_t channels = pixel_traits<pixel_type>::num;
*this = py::array_t<basic_pixel_type>({rows, cols, channels});
}
private:
static py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type> convert_to_numpy(matrix<pixel_type>&& img)
{
using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
const size_t dtype_size = sizeof(basic_pixel_type);
const auto rows = static_cast<const size_t>(num_rows(img));
const auto cols = static_cast<const size_t>(num_columns(img));
const size_t channels = pixel_traits<pixel_type>::num;
const size_t image_size = dtype_size * rows * cols * channels;
std::unique_ptr<pixel_type[]> arr_ptr = img.steal_memory();
basic_pixel_type* arr = (basic_pixel_type *) arr_ptr.release();
return pybind11::template array_t<basic_pixel_type>(
{rows, cols, channels}, // shape
{dtype_size * cols * channels, dtype_size * channels, dtype_size}, // strides
arr, // pointer
pybind11::capsule{ arr, [](void *arr_p) { delete[] reinterpret_cast<basic_pixel_type*>(arr_p); } }
);
}
};
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// BORING IMPLEMENTATION STUFF
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
inline bool is_gray_python_image (py::object& img)
{
try
template <typename pixel_type>
long num_rows(const numpy_image<pixel_type>& img)
{
if (img.size()==0)
return 0;
assert_correct_num_channels_in_image<pixel_type>(img);
return img.shape(0);
}
template <typename pixel_type>
long num_columns(const numpy_image<pixel_type>& img)
{
long shape[2];
get_numpy_ndarray_shape(img, shape);
return true;
if (img.size()==0)
return 0;
assert_correct_num_channels_in_image<pixel_type>(img);
return img.shape(1);
}
catch (dlib::error&)
template <typename pixel_type>
void set_image_size(numpy_image<pixel_type>& img, size_t rows, size_t cols)
{
return false;
img.set_size(rows, cols);
}
}
// ----------------------------------------------------------------------------------------
template <typename pixel_type>
void* image_data(numpy_image<pixel_type>& img)
{
if (img.size()==0)
return 0;
class numpy_rgb_image
{
public:
assert_is_image<pixel_type>(img);
return img.mutable_data(0);
}
numpy_rgb_image() : _data(0), _nr(0), _nc(0) {}
numpy_rgb_image (py::object& img)
template <typename pixel_type>
const void* image_data (const numpy_image<pixel_type>& img)
{
long shape[3];
get_numpy_ndarray_parts(img, _data, _contig_buf, shape);
_nr = shape[0];
_nc = shape[1];
if (shape[2] != 3)
throw dlib::error("Error, python object is not a three band image and therefore can't be a RGB image.");
if (img.size()==0)
return 0;
assert_is_image<pixel_type>(img);
return img.data(0);
}
friend inline long num_rows(const numpy_rgb_image& img) { return img._nr; }
friend inline long num_columns(const numpy_rgb_image& img) { return img._nc; }
friend inline void* image_data(numpy_rgb_image& img) { return img._data; }
friend inline const void* image_data(const numpy_rgb_image& img) { return img._data; }
friend inline long width_step(const numpy_rgb_image& img) { return img._nc*sizeof(dlib::rgb_pixel); }
template <typename pixel_type>
long width_step (const numpy_image<pixel_type>& img)
{
if (img.size()==0)
return 0;
assert_correct_num_channels_in_image<pixel_type>(img);
using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
if (img.strides(2) != sizeof(basic_pixel_type))
throw dlib::error("The stride of the 3rd dimension (the channel dimension) of the numpy array must be " + std::to_string(sizeof(basic_pixel_type)));
if (img.strides(1) != sizeof(pixel_type))
throw dlib::error("The stride of the 2nd dimension (the columns dimension) of the numpy array must be " + std::to_string(sizeof(pixel_type)));
return img.strides(0);
}
private:
template <typename pixel_type>
void swap(numpy_image<pixel_type>& a, numpy_image<pixel_type>& b)
{
std::swap(a,b);
}
dlib::rgb_pixel* _data;
dlib::array<dlib::rgb_pixel> _contig_buf;
long _nr;
long _nc;
};
namespace dlib
{
template <>
struct image_traits<numpy_rgb_image >
template <typename T>
struct image_traits<numpy_image<T>>
{
typedef rgb_pixel pixel_type;
typedef T pixel_type;
};
}
// ----------------------------------------------------------------------------------------
inline bool is_rgb_python_image (py::object& img)
namespace pybind11
{
try
{
long shape[3];
get_numpy_ndarray_shape(img, shape);
if (shape[2] == 3)
return true;
return false;
}
catch (dlib::error&)
namespace detail
{
return false;
template <typename pixel_type> struct handle_type_name<dlib::numpy_image<pixel_type>>
{
using basic_pixel_type = typename dlib::pixel_traits<pixel_type>::basic_pixel_type;
static PYBIND11_DESCR name() {
constexpr size_t channels = dlib::pixel_traits<pixel_type>::num;
if (channels == 1)
return _("numpy.ndarray[(rows,cols),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
else if (channels == 2)
return _("numpy.ndarray[(rows,cols,2),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
else if (channels == 3)
return _("numpy.ndarray[(rows,cols,3),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
else if (channels == 4)
return _("numpy.ndarray[(rows,cols,4),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
else
DLIB_CASSERT(false,"unsupported pixel type");
}
};
template <typename pixel_type>
struct pyobject_caster<dlib::numpy_image<pixel_type>> {
using type = dlib::numpy_image<pixel_type>;
bool load(handle src, bool convert) {
if (!convert && !type::check_(src))
return false;
value = type::ensure(src);
return static_cast<bool>(value);
}
static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) {
return src.inc_ref();
}
PYBIND11_TYPE_CASTER(type, handle_type_name<type>::name());
};
}
}
// ----------------------------------------------------------------------------------------
#endif // DLIB_PYTHON_NuMPY_IMAGE_Hh_
......
......@@ -6,7 +6,7 @@ set(USE_SSE4_INSTRUCTIONS ON CACHE BOOL "Use SSE4 instructions")
# Set this to disable link time optimization. The only reason for
# doing this to make the compile faster which is nice when developing
# new modules.
#set(PYBIND11_LTO_CXX_FLAGS "")
set(PYBIND11_LTO_CXX_FLAGS "")
# Avoid cmake warnings about changes in behavior of some Mac OS X path
......
......@@ -26,7 +26,7 @@ public:
}
std::vector<mmod_rect> detect (
py::object pyimage,
py::array pyimage,
const int upsample_num_times
)
{
......@@ -35,10 +35,10 @@ public:
// Copy the data into dlib based objects
matrix<rgb_pixel> image;
if (is_gray_python_image(pyimage))
assign_image(image, numpy_gray_image(pyimage));
else if (is_rgb_python_image(pyimage))
assign_image(image, numpy_rgb_image(pyimage));
if (is_image<unsigned char>(pyimage))
assign_image(image, numpy_image<unsigned char>(pyimage));
else if (is_image<rgb_pixel>(pyimage))
assign_image(image, numpy_image<rgb_pixel>(pyimage));
else
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
......@@ -63,25 +63,25 @@ public:
return rects;
}
std::vector<std::vector<mmod_rect> > detect_mult (
std::vector<std::vector<mmod_rect>> detect_mult (
py::list imgs,
const int upsample_num_times,
const int batch_size = 128
)
{
pyramid_down<2> pyr;
std::vector<matrix<rgb_pixel> > dimgs;
std::vector<matrix<rgb_pixel>> dimgs;
dimgs.reserve(len(imgs));
for(int i = 0; i < len(imgs); i++)
{
// Copy the data into dlib based objects
matrix<rgb_pixel> image;
py::object tmp = imgs[i].cast<py::object>();
if (is_gray_python_image(tmp))
assign_image(image, numpy_gray_image(tmp));
else if (is_rgb_python_image(tmp))
assign_image(image, numpy_rgb_image(tmp));
py::array tmp = imgs[i].cast<py::array>();
if (is_image<unsigned char>(tmp))
assign_image(image, numpy_image<unsigned char>(tmp));
else if (is_image<rgb_pixel>(tmp))
assign_image(image, numpy_image<rgb_pixel>(tmp));
else
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
......@@ -89,16 +89,12 @@ public:
{
pyramid_up(image);
}
dimgs.push_back(image);
dimgs.emplace_back(std::move(image));
}
for(int i = 1; i < dimgs.size(); i++)
{
if
(
dimgs[i - 1].nc() != dimgs[i].nc() ||
dimgs[i - 1].nr() != dimgs[i].nr()
)
if (dimgs[i - 1].nc() != dimgs[i].nc() || dimgs[i - 1].nr() != dimgs[i].nr())
throw dlib::error("Images in list must all have the same dimensions.");
}
......
......@@ -6,46 +6,32 @@
#include "opaque_types.h"
#include <dlib/python.h>
#include <dlib/pixel.h>
#include <dlib/python/numpy_image.h>
using namespace dlib;
using namespace std;
namespace py = pybind11;
template <typename dest_image_type>
void pyimage_to_dlib_image(py::object img, dest_image_type& image)
{
if (is_gray_python_image(img))
assign_image(image, numpy_gray_image(img));
else if (is_rgb_python_image(img))
assign_image(image, numpy_rgb_image(img));
else
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
}
template <typename image_array, typename param_type>
void images_and_nested_params_to_dlib(
const py::object& pyimages,
const py::object& pyparams,
image_array& images,
std::vector<std::vector<param_type> >& params
std::vector<std::vector<param_type>>& params
)
{
// Now copy the data into dlib based objects.
py::iterator image_it = pyimages.begin();
py::iterator params_it = pyparams.begin();
for (unsigned long image_idx = 0;
image_it != pyimages.end()
&& params_it != pyparams.end();
++image_it, ++params_it, ++image_idx)
for (unsigned long image_idx = 0; image_it != pyimages.end() && params_it != pyparams.end(); ++image_it, ++params_it, ++image_idx)
{
for (py::iterator param_it = params_it->begin();
param_it != params_it->end();
++param_it)
params[image_idx].push_back(param_it->cast<param_type>());
for (py::iterator param_it = params_it->begin(); param_it != params_it->end(); ++param_it)
params[image_idx].push_back(param_it->cast<param_type>());
pyimage_to_dlib_image(image_it->cast<py::object>(), images[image_idx]);
images[image_idx] = image_it->cast<py::object>();
}
}
......
......@@ -15,17 +15,17 @@ namespace py = pybind11;
void start_track (
correlation_tracker& tracker,
py::object img,
py::array img,
const drectangle& bounding_box
)
{
if (is_gray_python_image(img))
if (is_image<unsigned char>(img))
{
tracker.start_track(numpy_gray_image(img), bounding_box);
tracker.start_track(numpy_image<unsigned char>(img), bounding_box);
}
else if (is_rgb_python_image(img))
else if (is_image<rgb_pixel>(img))
{
tracker.start_track(numpy_rgb_image(img), bounding_box);
tracker.start_track(numpy_image<rgb_pixel>(img), bounding_box);
}
else
{
......@@ -35,7 +35,7 @@ void start_track (
void start_track_rec (
correlation_tracker& tracker,
py::object img,
py::array img,
const rectangle& bounding_box
)
{
......@@ -45,16 +45,16 @@ void start_track_rec (
double update (
correlation_tracker& tracker,
py::object img
py::array img
)
{
if (is_gray_python_image(img))
if (is_image<unsigned char>(img))
{
return tracker.update(numpy_gray_image(img));
return tracker.update(numpy_image<unsigned char>(img));
}
else if (is_rgb_python_image(img))
else if (is_image<rgb_pixel>(img))
{
return tracker.update(numpy_rgb_image(img));
return tracker.update(numpy_image<rgb_pixel>(img));
}
else
{
......@@ -64,17 +64,17 @@ double update (
double update_guess (
correlation_tracker& tracker,
py::object img,
py::array img,
const drectangle& bounding_box
)
{
if (is_gray_python_image(img))
if (is_image<unsigned char>(img))
{
return tracker.update(numpy_gray_image(img), bounding_box);
return tracker.update(numpy_image<unsigned char>(img), bounding_box);
}
else if (is_rgb_python_image(img))
else if (is_image<rgb_pixel>(img))
{
return tracker.update(numpy_rgb_image(img), bounding_box);
return tracker.update(numpy_image<rgb_pixel>(img), bounding_box);
}
else
{
......@@ -84,7 +84,7 @@ double update_guess (
double update_guess_rec (
correlation_tracker& tracker,
py::object img,
py::array img,
const rectangle& bounding_box
)
{
......
......@@ -32,7 +32,7 @@ public:
}
matrix<double,0,1> compute_face_descriptor (
py::object img,
numpy_image<rgb_pixel> img,
const full_object_detection& face,
const int num_jitters
)
......@@ -42,13 +42,11 @@ public:
}
std::vector<matrix<double,0,1>> compute_face_descriptors (
py::object img,
numpy_image<rgb_pixel> img,
const std::vector<full_object_detection>& faces,
const int num_jitters
)
{
if (!is_rgb_python_image(img))
throw dlib::error("Unsupported image type, must be RGB image.");
for (auto& f : faces)
{
......@@ -61,7 +59,7 @@ public:
for (auto& f : faces)
dets.push_back(get_face_chip_details(f, 150, 0.25));
dlib::array<matrix<rgb_pixel>> face_chips;
extract_image_chips(numpy_rgb_image(img), dets, face_chips);
extract_image_chips(img, dets, face_chips);
std::vector<matrix<double,0,1>> face_descriptors;
face_descriptors.reserve(face_chips.size());
......@@ -161,22 +159,20 @@ py::list chinese_whispers_clustering(py::list descriptors, float threshold)
}
void save_face_chips (
py::object img,
numpy_image<rgb_pixel> img,
const std::vector<full_object_detection>& faces,
const std::string& chip_filename,
size_t size = 150,
float padding = 0.25
)
{
if (!is_rgb_python_image(img))
throw dlib::error("Unsupported image type, must be RGB image.");
int num_faces = faces.size();
std::vector<chip_details> dets;
for (auto& f : faces)
dets.push_back(get_face_chip_details(f, size, padding));
dlib::array<matrix<rgb_pixel>> face_chips;
extract_image_chips(numpy_rgb_image(img), dets, face_chips);
extract_image_chips(numpy_image<rgb_pixel>(img), dets, face_chips);
int i=0;
for (auto& chip : face_chips)
{
......@@ -195,7 +191,7 @@ void save_face_chips (
}
void save_face_chip (
py::object img,
numpy_image<rgb_pixel> img,
const full_object_detection& face,
const std::string& chip_filename,
size_t size = 150,
......@@ -204,7 +200,6 @@ void save_face_chip (
{
std::vector<full_object_detection> faces(1, face);
save_face_chips(img, faces, chip_filename, size, padding);
return;
}
void bind_face_recognition(py::module &m)
......
......@@ -35,13 +35,13 @@ void image_window_set_image_simple_detector_py (
void image_window_set_image (
image_window& win,
py::object img
py::array img
)
{
if (is_gray_python_image(img))
return win.set_image(numpy_gray_image(img));
else if (is_rgb_python_image(img))
return win.set_image(numpy_rgb_image(img));
if (is_image<unsigned char>(img))
return win.set_image(numpy_image<unsigned char>(img));
else if (is_image<rgb_pixel>(img))
return win.set_image(numpy_image<rgb_pixel>(img));
else
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
}
......@@ -74,14 +74,14 @@ void add_overlay_parts (
win.add_overlay(render_face_detections(detection, color));
}
std::shared_ptr<image_window> make_image_window_from_image(py::object img)
std::shared_ptr<image_window> make_image_window_from_image(py::array img)
{
auto win = std::make_shared<image_window>();
image_window_set_image(*win, img);
return win;
}
std::shared_ptr<image_window> make_image_window_from_image_and_title(py::object img, const string& title)
std::shared_ptr<image_window> make_image_window_from_image_and_title(py::array img, const string& title)
{
auto win = std::make_shared<image_window>();
image_window_set_image(*win, img);
......
......@@ -10,36 +10,14 @@ using namespace std;
namespace py = pybind11;
py::array_t<uint8_t> convert_to_numpy(matrix<rgb_pixel>&& rgb_image)
{
const size_t dtype_size = sizeof(uint8_t);
const auto rows = static_cast<const size_t>(num_rows(rgb_image));
const auto cols = static_cast<const size_t>(num_columns(rgb_image));
const size_t channels = 3;
const size_t image_size = dtype_size * rows * cols * channels;
unique_ptr<rgb_pixel[]> arr_ptr = rgb_image.steal_memory();
uint8_t* arr = (uint8_t *) arr_ptr.release();
return pybind11::array_t<uint8_t>(
{rows, cols, channels}, // shape
{dtype_size * cols * channels, dtype_size * channels, dtype_size}, // strides
arr, // pointer
pybind11::capsule{
arr, [](void *arr_p) {
delete[] reinterpret_cast<uint8_t *>(arr_p);
}
}
);
}
// -------------------------------- Basic Image IO ----------------------------------------
py::array_t<uint8_t> load_rgb_image (const std::string &path)
numpy_image<rgb_pixel> load_rgb_image (const std::string &path)
{
matrix<rgb_pixel> img;
numpy_image<rgb_pixel> img;
load_image(img, path);
return convert_to_numpy(std::move(img));
return img;
}
bool has_ending (std::string const full_string, std::string const &ending) {
......@@ -50,22 +28,21 @@ bool has_ending (std::string const full_string, std::string const &ending) {
}
}
void save_rgb_image(py::object img, const std::string &path)
{
if (!is_rgb_python_image(img))
throw dlib::error("Unsupported image type, must be RGB image.");
// ----------------------------------------------------------------------------------------
void save_rgb_image(numpy_image<rgb_pixel> img, const std::string &path)
{
std::string lowered_path = path;
std::transform(lowered_path.begin(), lowered_path.end(), lowered_path.begin(), ::tolower);
if(has_ending(lowered_path, ".bmp")) {
save_bmp(numpy_rgb_image(img), path);
save_bmp(img, path);
} else if(has_ending(lowered_path, ".dng")) {
save_dng(numpy_rgb_image(img), path);
save_dng(img, path);
} else if(has_ending(lowered_path, ".png")) {
save_png(numpy_rgb_image(img), path);
save_png(img, path);
} else if(has_ending(lowered_path, ".jpg") || has_ending(lowered_path, ".jpeg")) {
save_jpeg(numpy_rgb_image(img), path);
save_jpeg(img, path);
} else {
throw dlib::error("Unsupported image type, image path must end with one of [.bmp, .png, .dng, .jpg, .jpeg]");
}
......@@ -74,31 +51,22 @@ void save_rgb_image(py::object img, const std::string &path)
// ----------------------------------------------------------------------------------------
py::list get_jitter_images(py::object img, size_t num_jitters = 1, bool disturb_colors = false)
py::list get_jitter_images(numpy_image<rgb_pixel> img, size_t num_jitters = 1, bool disturb_colors = false)
{
static dlib::rand rnd_jitter;
if (!is_rgb_python_image(img))
throw dlib::error("Unsupported image type, must be RGB image.");
// Convert the image to matrix<rgb_pixel> for processing
matrix<rgb_pixel> img_mat;
assign_image(img_mat, numpy_rgb_image(img));
// The top level list (containing 1 or more images) to return to python
py::list jitter_list;
for (int i = 0; i < num_jitters; ++i) {
// Get a jittered crop
matrix<rgb_pixel> crop = dlib::jitter_image(img_mat, rnd_jitter);
numpy_image<rgb_pixel> crop = dlib::jitter_image(img, rnd_jitter);
// If required disturb colors of the image
if(disturb_colors)
dlib::disturb_colors(crop, rnd_jitter);
// Convert image to Numpy array
py::array_t<uint8_t> arr = convert_to_numpy(std::move(crop));
// Append image to jittered image list
jitter_list.append(arr);
jitter_list.append(crop);
}
return jitter_list;
......@@ -107,14 +75,12 @@ py::list get_jitter_images(py::object img, size_t num_jitters = 1, bool disturb_
// ----------------------------------------------------------------------------------------
py::list get_face_chips (
py::object img,
numpy_image<rgb_pixel> img,
const std::vector<full_object_detection>& faces,
size_t size = 150,
float padding = 0.25
)
{
if (!is_rgb_python_image(img))
throw dlib::error("Unsupported image type, must be RGB image.");
if (faces.size() < 1) {
throw dlib::error("No face were specified in the faces array.");
......@@ -125,33 +91,27 @@ py::list get_face_chips (
std::vector<chip_details> dets;
for (auto& f : faces)
dets.push_back(get_face_chip_details(f, size, padding));
dlib::array<matrix<rgb_pixel>> face_chips;
extract_image_chips(numpy_rgb_image(img), dets, face_chips);
dlib::array<numpy_image<rgb_pixel>> face_chips;
extract_image_chips(img, dets, face_chips);
for (auto& chip : face_chips)
{
// Convert image to Numpy array
py::array_t<uint8_t> arr = convert_to_numpy(std::move(chip));
// Append image to chips list
chips_list.append(arr);
chips_list.append(chip);
}
return chips_list;
}
py::array_t<uint8_t> get_face_chip (
py::object img,
numpy_image<rgb_pixel> get_face_chip (
numpy_image<rgb_pixel> img,
const full_object_detection& face,
size_t size = 150,
float padding = 0.25
)
{
if (!is_rgb_python_image(img))
throw dlib::error("Unsupported image type, must be RGB image.");
matrix<rgb_pixel> chip;
extract_image_chip(numpy_rgb_image(img), get_face_chip_details(face, size, padding), chip);
return convert_to_numpy(std::move(chip));
numpy_image<rgb_pixel> chip;
extract_image_chip(img, get_face_chip_details(face, size, padding), chip);
return chip;
}
// ----------------------------------------------------------------------------------------
......
......@@ -37,8 +37,8 @@ inline simple_object_detector_py train_simple_object_detector_on_images_py (
throw dlib::error("The length of the boxes list must match the length of the images list.");
// We never have any ignore boxes for this version of the API.
std::vector<std::vector<rectangle> > ignore(num_images), boxes(num_images);
dlib::array<array2d<rgb_pixel> > images(num_images);
std::vector<std::vector<rectangle>> ignore(num_images), boxes(num_images);
dlib::array<numpy_image<rgb_pixel>> images(num_images);
images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes);
return train_simple_object_detector_on_images("", images, boxes, ignore, options);
......@@ -56,8 +56,8 @@ inline simple_test_results test_simple_object_detector_with_images_py (
throw dlib::error("The length of the boxes list must match the length of the images list.");
// We never have any ignore boxes for this version of the API.
std::vector<std::vector<rectangle> > ignore(num_images), boxes(num_images);
dlib::array<array2d<rgb_pixel> > images(num_images);
std::vector<std::vector<rectangle>> ignore(num_images), boxes(num_images);
dlib::array<numpy_image<rgb_pixel>> images(num_images);
images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes);
return test_simple_object_detector_with_images(images, upsampling_amount, boxes, ignore, detector);
......@@ -86,22 +86,13 @@ inline simple_test_results test_simple_object_detector_py_with_images_py (
// ----------------------------------------------------------------------------------------
inline void find_candidate_object_locations_py (
py::object pyimage,
py::array pyimage,
py::list& pyboxes,
py::tuple pykvals,
unsigned long min_size,
unsigned long max_merging_iterations
)
{
// Copy the data into dlib based objects
array2d<rgb_pixel> image;
if (is_gray_python_image(pyimage))
assign_image(image, numpy_gray_image(pyimage));
else if (is_rgb_python_image(pyimage))
assign_image(image, numpy_rgb_image(pyimage));
else
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
if (py::len(pykvals) != 3)
throw dlib::error("kvals must be a tuple with three elements for start, end, num.");
......@@ -117,7 +108,12 @@ inline void find_candidate_object_locations_py (
for (long i = 0; i < count; ++i)
rects.push_back(pyboxes[i].cast<rectangle>());
// Find candidate objects
find_candidate_object_locations(image, rects, kvals, min_size, max_merging_iterations);
if (is_image<unsigned char>(pyimage))
find_candidate_object_locations(numpy_image<unsigned char>(pyimage), rects, kvals, min_size, max_merging_iterations);
else if (is_image<rgb_pixel>(pyimage))
find_candidate_object_locations(numpy_image<rgb_pixel>(pyimage), rects, kvals, min_size, max_merging_iterations);
else
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
// Collect boxes containing candidate objects
std::vector<rectangle>::iterator iter;
......
......@@ -17,18 +17,18 @@ namespace py = pybind11;
full_object_detection run_predictor (
shape_predictor& predictor,
py::object img,
py::array img,
py::object rect
)
{
rectangle box = rect.cast<rectangle>();
if (is_gray_python_image(img))
if (is_image<unsigned char>(img))
{
return predictor(numpy_gray_image(img), box);
return predictor(numpy_image<unsigned char>(img), box);
}
else if (is_rgb_python_image(img))
else if (is_image<rgb_pixel>(img))
{
return predictor(numpy_rgb_image(img), box);
return predictor(numpy_image<rgb_pixel>(img), box);
}
else
{
......@@ -97,7 +97,7 @@ inline shape_predictor train_shape_predictor_on_images_py (
throw dlib::error("The length of the detections list must match the length of the images list.");
std::vector<std::vector<full_object_detection> > detections(num_images);
dlib::array<array2d<unsigned char> > images(num_images);
dlib::array<numpy_image<unsigned char>> images(num_images);
images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);
return train_shape_predictor_on_images(images, detections, options);
......@@ -123,7 +123,7 @@ inline double test_shape_predictor_with_images_py (
std::vector<std::vector<double> > scales;
if (num_scales > 0)
scales.resize(num_scales);
dlib::array<array2d<unsigned char> > images(num_images);
dlib::array<numpy_image<unsigned char>> images(num_images);
// Now copy the data into dlib based objects so we can call the testing routine.
for (unsigned long i = 0; i < num_images; ++i)
......@@ -134,14 +134,12 @@ inline double test_shape_predictor_with_images_py (
++det_it)
detections[i].push_back(det_it->cast<full_object_detection>());
pyimage_to_dlib_image(pyimages[i], images[i]);
images[i] = pyimages[i];
if (num_scales > 0)
{
if (num_boxes != py::len(pyscales[i]))
throw dlib::error("The length of the scales list must match the length of the detections list.");
for (py::iterator scale_it = pyscales[i].begin();
scale_it != pyscales[i].end();
++scale_it)
for (py::iterator scale_it = pyscales[i].begin(); scale_it != pyscales[i].end(); ++scale_it)
scales[i].push_back(scale_it->cast<double>());
}
}
......
......@@ -37,7 +37,7 @@ namespace dlib
inline std::vector<dlib::rectangle> run_detector_with_upscale1 (
dlib::simple_object_detector& detector,
py::object img,
py::array img,
const unsigned int upsampling_amount,
const double adjust_threshold,
std::vector<double>& detection_confidences,
......@@ -49,19 +49,19 @@ namespace dlib
std::vector<rectangle> rectangles;
std::vector<rect_detection> rect_detections;
if (is_gray_python_image(img))
if (is_image<unsigned char>(img))
{
array2d<unsigned char> temp;
if (upsampling_amount == 0)
{
detector(numpy_gray_image(img), rect_detections, adjust_threshold);
detector(numpy_image<unsigned char>(img), rect_detections, adjust_threshold);
split_rect_detections(rect_detections, rectangles,
detection_confidences, weight_indices);
return rectangles;
}
else
{
pyramid_up(numpy_gray_image(img), temp, pyr);
pyramid_up(numpy_image<unsigned char>(img), temp, pyr);
unsigned int levels = upsampling_amount-1;
while (levels > 0)
{
......@@ -79,19 +79,19 @@ namespace dlib
return rectangles;
}
}
else if (is_rgb_python_image(img))
else if (is_image<rgb_pixel>(img))
{
array2d<rgb_pixel> temp;
if (upsampling_amount == 0)
{
detector(numpy_rgb_image(img), rect_detections, adjust_threshold);
detector(numpy_image<rgb_pixel>(img), rect_detections, adjust_threshold);
split_rect_detections(rect_detections, rectangles,
detection_confidences, weight_indices);
return rectangles;
}
else
{
pyramid_up(numpy_rgb_image(img), temp, pyr);
pyramid_up(numpy_image<rgb_pixel>(img), temp, pyr);
unsigned int levels = upsampling_amount-1;
while (levels > 0)
{
......@@ -117,7 +117,7 @@ namespace dlib
inline std::vector<dlib::rectangle> run_detectors_with_upscale1 (
std::vector<simple_object_detector >& detectors,
py::object img,
py::array img,
const unsigned int upsampling_amount,
const double adjust_threshold,
std::vector<double>& detection_confidences,
......@@ -129,19 +129,19 @@ namespace dlib
std::vector<rectangle> rectangles;
std::vector<rect_detection> rect_detections;
if (is_gray_python_image(img))
if (is_image<unsigned char>(img))
{
array2d<unsigned char> temp;
if (upsampling_amount == 0)
{
evaluate_detectors(detectors, numpy_gray_image(img), rect_detections, adjust_threshold);
evaluate_detectors(detectors, numpy_image<unsigned char>(img), rect_detections, adjust_threshold);
split_rect_detections(rect_detections, rectangles,
detection_confidences, weight_indices);
return rectangles;
}
else
{
pyramid_up(numpy_gray_image(img), temp, pyr);
pyramid_up(numpy_image<unsigned char>(img), temp, pyr);
unsigned int levels = upsampling_amount-1;
while (levels > 0)
{
......@@ -159,19 +159,19 @@ namespace dlib
return rectangles;
}
}
else if (is_rgb_python_image(img))
else if (is_image<rgb_pixel>(img))
{
array2d<rgb_pixel> temp;
if (upsampling_amount == 0)
{
evaluate_detectors(detectors, numpy_rgb_image(img), rect_detections, adjust_threshold);
evaluate_detectors(detectors, numpy_image<rgb_pixel>(img), rect_detections, adjust_threshold);
split_rect_detections(rect_detections, rectangles,
detection_confidences, weight_indices);
return rectangles;
}
else
{
pyramid_up(numpy_rgb_image(img), temp, pyr);
pyramid_up(numpy_image<rgb_pixel>(img), temp, pyr);
unsigned int levels = upsampling_amount-1;
while (levels > 0)
{
......@@ -197,7 +197,7 @@ namespace dlib
inline std::vector<dlib::rectangle> run_detector_with_upscale2 (
dlib::simple_object_detector& detector,
py::object img,
py::array img,
const unsigned int upsampling_amount
)
......@@ -213,7 +213,7 @@ namespace dlib
inline py::tuple run_rect_detector (
dlib::simple_object_detector& detector,
py::object img,
py::array img,
const unsigned int upsampling_amount,
const double adjust_threshold)
{
......@@ -234,7 +234,7 @@ namespace dlib
inline py::tuple run_multiple_rect_detectors (
py::list& detectors,
py::object img,
py::array img,
const unsigned int upsampling_amount,
const double adjust_threshold)
{
......@@ -272,13 +272,13 @@ namespace dlib
simple_object_detector_py(simple_object_detector& _detector, unsigned int _upsampling_amount) :
detector(_detector), upsampling_amount(_upsampling_amount) {}
std::vector<dlib::rectangle> run_detector1 (py::object img,
std::vector<dlib::rectangle> run_detector1 (py::array img,
const unsigned int upsampling_amount_)
{
return run_detector_with_upscale2(detector, img, upsampling_amount_);
}
std::vector<dlib::rectangle> run_detector2 (py::object img)
std::vector<dlib::rectangle> run_detector2 (py::array img)
{
return run_detector_with_upscale2(detector, img, upsampling_amount);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment