Commit 03cfc68a authored by Davis E. King's avatar Davis E. King

Merge pull request #13 from patricksnape/py_corr_tracker

Add a Python interface for the Correlation Tracker
parents d3a3288e dd922c66
#!/usr/bin/python
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
#
# This example shows how to use the correlation_tracker from the dlib Python
# library. This object lets you track the position of an object as it moves
# from frame to frame in a video sequence. To use it, you give the
# correlation_tracker the bounding box of the object you want to track in the
# current video frame. Then it will identify the location of the object in
# subsequent frames.
#
# In this particular example, we are going to run on the
# video sequence that comes with dlib, which can be found in the
# examples/video_frames folder. This video shows a juice box sitting on a table
# and someone is waving the camera around. The task is to track the position of
# the juice box as the camera moves around.
#
# COMPILING THE DLIB PYTHON INTERFACE
# Dlib comes with a compiled python interface for python 2.7 on MS Windows. If
# you are using another python version or operating system then you need to
# compile the dlib python interface before you can use this file. To do this,
# run compile_dlib_python_module.bat. This should work on any operating
# system so long as you have CMake and boost-python installed.
# On Ubuntu, this can be done easily by running the command:
# sudo apt-get install libboost-python-dev cmake
#
# Also note that this example requires scikit-image which can be installed
# via the command:
# pip install -U scikit-image
# Or downloaded from http://scikit-image.org/download.html.
import os
import glob
import dlib
from skimage import io
# Path to the video frames
video_folder = os.path.join("..", "examples", "video_frames")
# Create the correlation tracker - the object needs to be initialized
# before it can be used
tracker = dlib.correlation_tracker()
win = dlib.image_window()
# We will track the frames as we load them off of disk
for k, f in enumerate(sorted(glob.glob(os.path.join(video_folder, "*.jpg")))):
print("Processing Frame {}".format(k))
img = io.imread(f)
# We need to initialize the tracker on the first frame
if k == 0:
# Start a track on the juice box. If you look at the first frame you
# will see that the juice box is contained within the bounding
# box (74, 67, 112, 153).
tracker.start_track(img, dlib.rectangle(74, 67, 112, 153))
else:
# Else we just attempt to track from the previous frame
tracker.update(img)
win.clear_overlay()
win.set_image(img)
win.add_overlay(tracker.get_position())
dlib.hit_enter_to_continue()
...@@ -18,8 +18,10 @@ set(python_srcs ...@@ -18,8 +18,10 @@ set(python_srcs
src/sequence_segmenter.cpp src/sequence_segmenter.cpp
src/svm_struct.cpp src/svm_struct.cpp
src/image.cpp src/image.cpp
src/rectangles.cpp
src/object_detection.cpp src/object_detection.cpp
src/shape_predictor.cpp src/shape_predictor.cpp
src/correlation_tracker.cpp
) )
# Only add the GUI module if requested # Only add the GUI module if requested
......
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/python.h>
#include <dlib/geometry.h>
#include <boost/python/args.hpp>
#include <dlib/image_processing.h>
using namespace dlib;
using namespace std;
using namespace boost::python;
// ----------------------------------------------------------------------------------------
void start_track (
correlation_tracker& tracker,
object img,
const drectangle& bounding_box
)
{
if (is_gray_python_image(img))
{
tracker.start_track(numpy_gray_image(img), bounding_box);
}
else if (is_rgb_python_image(img))
{
tracker.start_track(numpy_rgb_image(img), bounding_box);
}
else
{
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
}
}
void start_track_rec (
correlation_tracker& tracker,
object img,
const rectangle& bounding_box
)
{
drectangle dbounding_box(bounding_box);
start_track(tracker, img, dbounding_box);
}
double update (
correlation_tracker& tracker,
object img
)
{
if (is_gray_python_image(img))
{
return tracker.update(numpy_gray_image(img));
}
else if (is_rgb_python_image(img))
{
return tracker.update(numpy_rgb_image(img));
}
else
{
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
}
}
double update_guess (
correlation_tracker& tracker,
object img,
const drectangle& bounding_box
)
{
if (is_gray_python_image(img))
{
return tracker.update(numpy_gray_image(img), bounding_box);
}
else if (is_rgb_python_image(img))
{
return tracker.update(numpy_rgb_image(img), bounding_box);
}
else
{
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
}
}
double update_guess_rec (
correlation_tracker& tracker,
object img,
const rectangle& bounding_box
)
{
drectangle dbounding_box(bounding_box);
return update_guess(tracker, img, dbounding_box);
}
drectangle get_position (const correlation_tracker& tracker) { return tracker.get_position(); }
// ----------------------------------------------------------------------------------------
void bind_correlation_tracker()
{
using boost::python::arg;
{
typedef correlation_tracker type;
class_<type>("correlation_tracker", "This is a tool for tracking moving objects in a video stream. You give it \n\
the bounding box of an object in the first frame and it attempts to track the \n\
object in the box from frame to frame. \n\
This tool is an implementation of the method described in the following paper: \n\
Danelljan, Martin, et al. 'Accurate scale estimation for robust visual \n\
tracking.' Proceedings of the British Machine Vision Conference BMVC. 2014.")
.def("start_track", &::start_track, (arg("image"), arg("bounding_box")), "\
requires \n\
- image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
- bounding_box.is_empty() == false \n\
ensures \n\
- This object will start tracking the thing inside the bounding box in the \n\
given image. That is, if you call update() with subsequent video frames \n\
then it will try to keep track of the position of the object inside bounding_box. \n\
- #get_position() == bounding_box")
.def("start_track", &::start_track_rec, (arg("image"), arg("bounding_box")), "\
requires \n\
- image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
- bounding_box.is_empty() == false \n\
ensures \n\
- This object will start tracking the thing inside the bounding box in the \n\
given image. That is, if you call update() with subsequent video frames \n\
then it will try to keep track of the position of the object inside bounding_box. \n\
- #get_position() == bounding_box")
.def("update", &::update, arg("image"), "\
requires \n\
- image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
- get_position().is_empty() == false \n\
(i.e. you must have started tracking by calling start_track()) \n\
ensures \n\
- performs: return update(img, get_position())")
.def("update", &::update_guess, (arg("image"), arg("guess")), "\
requires \n\
- image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
- get_position().is_empty() == false \n\
(i.e. you must have started tracking by calling start_track()) \n\
ensures \n\
- When searching for the object in img, we search in the area around the \n\
provided guess. \n\
- #get_position() == the new predicted location of the object in img. This \n\
location will be a copy of guess that has been translated and scaled \n\
appropriately based on the content of img so that it, hopefully, bounds \n\
the object in img. \n\
- Returns the peak to side-lobe ratio. This is a number that measures how \n\
confident the tracker is that the object is inside #get_position(). \n\
Larger values indicate higher confidence.")
.def("update", &::update_guess_rec, (arg("image"), arg("guess")), "\
requires \n\
- image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
- get_position().is_empty() == false \n\
(i.e. you must have started tracking by calling start_track()) \n\
ensures \n\
- When searching for the object in img, we search in the area around the \n\
provided guess. \n\
- #get_position() == the new predicted location of the object in img. This \n\
location will be a copy of guess that has been translated and scaled \n\
appropriately based on the content of img so that it, hopefully, bounds \n\
the object in img. \n\
- Returns the peak to side-lobe ratio. This is a number that measures how \n\
confident the tracker is that the object is inside #get_position(). \n\
Larger values indicate higher confidence.")
.def("get_position", &::get_position, "returns the predicted position of the object under track.");
}
}
// Copyright (C) 2013 Davis E. King (davis@dlib.net) // Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license. // License: Boost Software License See LICENSE.txt for the full license.
#include <boost/python.hpp> #include <boost/python.hpp>
...@@ -14,8 +14,10 @@ void bind_cca(); ...@@ -14,8 +14,10 @@ void bind_cca();
void bind_sequence_segmenter(); void bind_sequence_segmenter();
void bind_svm_struct(); void bind_svm_struct();
void bind_image_classes(); void bind_image_classes();
void bind_rectangles();
void bind_object_detection(); void bind_object_detection();
void bind_shape_predictors(); void bind_shape_predictors();
void bind_correlation_tracker();
#ifndef DLIB_NO_GUI_SUPPORT #ifndef DLIB_NO_GUI_SUPPORT
void bind_gui(); void bind_gui();
...@@ -38,8 +40,10 @@ BOOST_PYTHON_MODULE(dlib) ...@@ -38,8 +40,10 @@ BOOST_PYTHON_MODULE(dlib)
bind_sequence_segmenter(); bind_sequence_segmenter();
bind_svm_struct(); bind_svm_struct();
bind_image_classes(); bind_image_classes();
bind_rectangles();
bind_object_detection(); bind_object_detection();
bind_shape_predictors(); bind_shape_predictors();
bind_correlation_tracker();
#ifndef DLIB_NO_GUI_SUPPORT #ifndef DLIB_NO_GUI_SUPPORT
bind_gui(); bind_gui();
#endif #endif
......
...@@ -54,6 +54,16 @@ void add_overlay_rect ( ...@@ -54,6 +54,16 @@ void add_overlay_rect (
win.add_overlay(rect, color); win.add_overlay(rect, color);
} }
void add_overlay_drect (
image_window& win,
const drectangle& drect,
const rgb_pixel& color
)
{
rectangle rect(drect.left(), drect.top(), drect.right(), drect.bottom());
win.add_overlay(rect, color);
}
void add_overlay_parts ( void add_overlay_parts (
image_window& win, image_window& win,
const full_object_detection& detection, const full_object_detection& detection,
...@@ -106,6 +116,8 @@ void bind_gui() ...@@ -106,6 +116,8 @@ void bind_gui()
"Add a list of rectangles to the image_window. They will be displayed as red boxes by default, but the color can be passed.") "Add a list of rectangles to the image_window. They will be displayed as red boxes by default, but the color can be passed.")
.def("add_overlay", add_overlay_rect, (arg("rectangle"), arg("color")=rgb_pixel(255, 0, 0)), .def("add_overlay", add_overlay_rect, (arg("rectangle"), arg("color")=rgb_pixel(255, 0, 0)),
"Add a rectangle to the image_window. It will be displayed as a red box by default, but the color can be passed.") "Add a rectangle to the image_window. It will be displayed as a red box by default, but the color can be passed.")
.def("add_overlay", add_overlay_drect, (arg("rectangle"), arg("color")=rgb_pixel(255, 0, 0)),
"Add a rectangle to the image_window. It will be displayed as a red box by default, but the color can be passed.")
.def("add_overlay", add_overlay_parts, (arg("detection"), arg("color")=rgb_pixel(0, 0, 255)), .def("add_overlay", add_overlay_parts, (arg("detection"), arg("color")=rgb_pixel(0, 0, 255)),
"Add full_object_detection parts to the image window. They will be displayed as blue lines by default, but the color can be passed.") "Add full_object_detection parts to the image window. They will be displayed as blue lines by default, but the color can be passed.")
.def("wait_until_closed", &type::wait_until_closed, .def("wait_until_closed", &type::wait_until_closed,
......
// Copyright (C) 2014 Davis E. King (davis@dlib.net) // Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license. // License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/python.h> #include <dlib/python.h>
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#include <boost/python/args.hpp> #include <boost/python/args.hpp>
#include <dlib/geometry.h> #include <dlib/geometry.h>
#include <dlib/image_processing/frontal_face_detector.h> #include <dlib/image_processing/frontal_face_detector.h>
#include "indexing.h"
#include "simple_object_detector.h" #include "simple_object_detector.h"
#include "simple_object_detector_py.h" #include "simple_object_detector_py.h"
#include "conversion.h" #include "conversion.h"
...@@ -26,29 +25,6 @@ string print_simple_test_results(const simple_test_results& r) ...@@ -26,29 +25,6 @@ string print_simple_test_results(const simple_test_results& r)
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
long left(const rectangle& r) { return r.left(); }
long top(const rectangle& r) { return r.top(); }
long right(const rectangle& r) { return r.right(); }
long bottom(const rectangle& r) { return r.bottom(); }
long width(const rectangle& r) { return r.width(); }
long height(const rectangle& r) { return r.height(); }
string print_rectangle_str(const rectangle& r)
{
std::ostringstream sout;
sout << r;
return sout.str();
}
string print_rectangle_repr(const rectangle& r)
{
std::ostringstream sout;
sout << "rectangle(" << r.left() << "," << r.top() << "," << r.right() << "," << r.bottom() << ")";
return sout.str();
}
// ----------------------------------------------------------------------------------------
inline simple_object_detector_py train_simple_object_detector_on_images_py ( inline simple_object_detector_py train_simple_object_detector_on_images_py (
const boost::python::list& pyimages, const boost::python::list& pyimages,
const boost::python::list& pyboxes, const boost::python::list& pyboxes,
...@@ -153,55 +129,45 @@ inline void find_candidate_object_locations_py ( ...@@ -153,55 +129,45 @@ inline void find_candidate_object_locations_py (
void bind_object_detection() void bind_object_detection()
{ {
using boost::python::arg; using boost::python::arg;
{
class_<simple_object_detector_training_options>("simple_object_detector_training_options", typedef simple_object_detector_training_options type;
class_<type>("simple_object_detector_training_options",
"This object is a container for the options to the train_simple_object_detector() routine.") "This object is a container for the options to the train_simple_object_detector() routine.")
.add_property("be_verbose", &simple_object_detector_training_options::be_verbose, .add_property("be_verbose", &type::be_verbose,
&simple_object_detector_training_options::be_verbose, &type::be_verbose,
"If true, train_simple_object_detector() will print out a lot of information to the screen while training.") "If true, train_simple_object_detector() will print out a lot of information to the screen while training.")
.add_property("add_left_right_image_flips", &simple_object_detector_training_options::add_left_right_image_flips, .add_property("add_left_right_image_flips", &type::add_left_right_image_flips,
&simple_object_detector_training_options::add_left_right_image_flips, &type::add_left_right_image_flips,
"if true, train_simple_object_detector() will assume the objects are \n\ "if true, train_simple_object_detector() will assume the objects are \n\
left/right symmetric and add in left right flips of the training \n\ left/right symmetric and add in left right flips of the training \n\
images. This doubles the size of the training dataset.") images. This doubles the size of the training dataset.")
.add_property("detection_window_size", &simple_object_detector_training_options::detection_window_size, .add_property("detection_window_size", &type::detection_window_size,
&simple_object_detector_training_options::detection_window_size, &type::detection_window_size,
"The sliding window used will have about this many pixels inside it.") "The sliding window used will have about this many pixels inside it.")
.add_property("C", &simple_object_detector_training_options::C, .add_property("C", &type::C,
&simple_object_detector_training_options::C, &type::C,
"C is the usual SVM C regularization parameter. So it is passed to \n\ "C is the usual SVM C regularization parameter. So it is passed to \n\
structural_object_detection_trainer::set_c(). Larger values of C \n\ structural_object_detection_trainer::set_c(). Larger values of C \n\
will encourage the trainer to fit the data better but might lead to \n\ will encourage the trainer to fit the data better but might lead to \n\
overfitting. Therefore, you must determine the proper setting of \n\ overfitting. Therefore, you must determine the proper setting of \n\
this parameter experimentally.") this parameter experimentally.")
.add_property("epsilon", &simple_object_detector_training_options::epsilon, .add_property("epsilon", &type::epsilon,
&simple_object_detector_training_options::epsilon, &type::epsilon,
"epsilon is the stopping epsilon. Smaller values make the trainer's \n\ "epsilon is the stopping epsilon. Smaller values make the trainer's \n\
solver more accurate but might take longer to train.") solver more accurate but might take longer to train.")
.add_property("num_threads", &simple_object_detector_training_options::num_threads, .add_property("num_threads", &type::num_threads,
&simple_object_detector_training_options::num_threads, &type::num_threads,
"train_simple_object_detector() will use this many threads of \n\ "train_simple_object_detector() will use this many threads of \n\
execution. Set this to the number of CPU cores on your machine to \n\ execution. Set this to the number of CPU cores on your machine to \n\
obtain the fastest training speed."); obtain the fastest training speed.");
}
class_<simple_test_results>("simple_test_results")
.add_property("precision", &simple_test_results::precision)
.add_property("recall", &simple_test_results::recall)
.add_property("average_precision", &simple_test_results::average_precision)
.def("__str__", &::print_simple_test_results);
{ {
typedef rectangle type; typedef simple_test_results type;
class_<type>("rectangle", "This object represents a rectangular area of an image.") class_<type>("simple_test_results")
.def(init<long,long,long,long>( (arg("left"),arg("top"),arg("right"),arg("bottom")) )) .add_property("precision", &type::precision)
.def("left", &::left) .add_property("recall", &type::recall)
.def("top", &::top) .add_property("average_precision", &type::average_precision)
.def("right", &::right) .def("__str__", &::print_simple_test_results);
.def("bottom", &::bottom)
.def("width", &::width)
.def("height", &::height)
.def("__str__", &::print_rectangle_str)
.def("__repr__", &::print_rectangle_repr)
.def_pickle(serialize_pickle<type>());
} }
// Here, kvals is actually the result of linspace(start, end, num) and it is different from kvals used // Here, kvals is actually the result of linspace(start, end, num) and it is different from kvals used
...@@ -398,14 +364,6 @@ ensures \n\ ...@@ -398,14 +364,6 @@ ensures \n\
.def("save", save_simple_object_detector_py, (arg("detector_output_filename")), "Save a simple_object_detector to the provided path.") .def("save", save_simple_object_detector_py, (arg("detector_output_filename")), "Save a simple_object_detector to the provided path.")
.def_pickle(serialize_pickle<type>()); .def_pickle(serialize_pickle<type>());
} }
{
typedef std::vector<rectangle> type;
class_<type>("rectangles", "An array of rectangle objects.")
.def(vector_indexing_suite<type>())
.def("clear", &type::clear)
.def("resize", resize<type>)
.def_pickle(serialize_pickle<type>());
}
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/python.h>
#include <boost/python/args.hpp>
#include <dlib/geometry.h>
#include "indexing.h"
using namespace dlib;
using namespace std;
using namespace boost::python;
// ----------------------------------------------------------------------------------------
long left(const rectangle& r) { return r.left(); }
long top(const rectangle& r) { return r.top(); }
long right(const rectangle& r) { return r.right(); }
long bottom(const rectangle& r) { return r.bottom(); }
long width(const rectangle& r) { return r.width(); }
long height(const rectangle& r) { return r.height(); }
unsigned long area(const rectangle& r) { return r.area(); }
double dleft(const drectangle& r) { return r.left(); }
double dtop(const drectangle& r) { return r.top(); }
double dright(const drectangle& r) { return r.right(); }
double dbottom(const drectangle& r) { return r.bottom(); }
double dwidth(const drectangle& r) { return r.width(); }
double dheight(const drectangle& r) { return r.height(); }
double darea(const drectangle& r) { return r.area(); }
template <typename rect_type>
bool is_empty(const rect_type& r) { return r.is_empty(); }
template <typename rect_type>
point center(const rect_type& r) { return center(r); }
template <typename rect_type>
point dcenter(const rect_type& r) { return dcenter(r); }
template <typename rect_type>
bool contains(const rect_type& r, const point& p) { return r.contains(p); }
template <typename rect_type>
bool contains_xy(const rect_type& r, const long x, const long y) { return r.contains(point(x, y)); }
template <typename rect_type>
bool contains_rec(const rect_type& r, const rect_type& r2) { return r.contains(r2); }
template <typename rect_type>
rect_type intersect(const rect_type& r, const rect_type& r2) { return r.intersect(r2); }
template <typename rect_type>
string print_rectangle_str(const rect_type& r)
{
std::ostringstream sout;
sout << r;
return sout.str();
}
template <typename rect_type>
string print_rectangle_repr(const rect_type& r)
{
std::ostringstream sout;
sout << "rectangle(" << r.left() << "," << r.top() << "," << r.right() << "," << r.bottom() << ")";
return sout.str();
}
// ----------------------------------------------------------------------------------------
void bind_rectangles()
{
using boost::python::arg;
{
typedef rectangle type;
class_<type>("rectangle", "This object represents a rectangular area of an image.")
.def(init<long,long,long,long>( (arg("left"),arg("top"),arg("right"),arg("bottom")) ))
.def("area", &::area)
.def("left", &::left)
.def("top", &::top)
.def("right", &::right)
.def("bottom", &::bottom)
.def("width", &::width)
.def("height", &::height)
.def("is_empty", &::is_empty<type>)
.def("center", &::center<type>)
.def("dcenter", &::dcenter<type>)
.def("contains", &::contains<type>, arg("point"))
.def("contains", &::contains_xy<type>, (arg("x"), arg("y")))
.def("contains", &::contains_rec<type>, (arg("rectangle")))
.def("intersect", &::intersect<type>, (arg("rectangle")))
.def("__str__", &::print_rectangle_str<type>)
.def("__repr__", &::print_rectangle_repr<type>)
.def_pickle(serialize_pickle<type>());
}
{
typedef drectangle type;
class_<type>("drectangle", "This object represents a rectangular area of an image with floating point coordinates.")
.def(init<double,double,double,double>( (arg("left"),arg("top"),arg("right"),arg("bottom")) ))
.def("area", &::darea)
.def("left", &::dleft)
.def("top", &::dtop)
.def("right", &::dright)
.def("bottom", &::dbottom)
.def("width", &::dwidth)
.def("height", &::dheight)
.def("is_empty", &::is_empty<type>)
.def("center", &::center<type>)
.def("dcenter", &::dcenter<type>)
.def("contains", &::contains<type>, arg("point"))
.def("contains", &::contains_xy<type>, (arg("x"), arg("y")))
.def("contains", &::contains_rec<type>, (arg("rectangle")))
.def("intersect", &::intersect<type>, (arg("rectangle")))
.def("__str__", &::print_rectangle_str<type>)
.def("__repr__", &::print_rectangle_repr<type>)
.def_pickle(serialize_pickle<type>());
}
{
typedef std::vector<rectangle> type;
class_<type>("rectangles", "An array of rectangle objects.")
.def(vector_indexing_suite<type>())
.def("clear", &type::clear)
.def("resize", resize<type>)
.def_pickle(serialize_pickle<type>());
}
}
// ----------------------------------------------------------------------------------------
// Copyright (C) 2014 Davis E. King (davis@dlib.net) // Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license. // License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SHAPE_PREDICTOR_DETECTOR_H__ #ifndef DLIB_SHAPE_PREDICTOR_H__
#define DLIB_SHAPE_PREDICTOR_DETECTOR_H__ #define DLIB_SHAPE_PREDICTOR_H__
#include "dlib/string.h" #include "dlib/string.h"
#include "dlib/geometry.h" #include "dlib/geometry.h"
...@@ -179,5 +179,5 @@ namespace dlib ...@@ -179,5 +179,5 @@ namespace dlib
} }
#endif // DLIB_SHAPE_PREDICTOR_DETECTOR_H__ #endif // DLIB_SHAPE_PREDICTOR_H__
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment