Commit 7f2e28c4 authored by Davis King's avatar Davis King

Added the correlation_tracker.

parent 6adfdac9
......@@ -15,6 +15,7 @@
#include "image_processing/remove_unobtainable_rectangles.h"
#include "image_processing/scan_fhog_pyramid.h"
#include "image_processing/shape_predictor.h"
#include "image_processing/correlation_tracker.h"
#endif // DLIB_IMAGE_PROCESSInG_H_h_
......
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_CORRELATION_TrACKER_H_
#define DLIB_CORRELATION_TrACKER_H_
#include "correlation_tracker_abstract.h"
#include "../geometry.h"
#include "../matrix.h"
#include "../array2d.h"
#include "../image_transforms/assign_image.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class correlation_tracker
{
public:
correlation_tracker (
)
{
// Create the cosine mask used for space filtering.
mask = make_cosine_mask();
// Create the cosine mask used for the scale filtering.
scale_cos_mask.resize(get_num_scale_levels());
const long max_level = get_num_scale_levels()/2;
for (unsigned long k = 0; k < get_num_scale_levels(); ++k)
{
double dist = std::abs((double)k-max_level)/max_level*pi/2;
dist = std::min(dist, pi/2);
scale_cos_mask[k] = std::cos(dist);
}
}
template <typename image_type>
void start_track (
const image_type& img,
const drectangle& p
)
{
DLIB_CASSERT(p.is_empty() == false,
"\t void correlation_tracker::start_track()"
<< "\n\t You can't give an empty rectangle."
);
B.set_size(0,0);
point_transform_affine tform = inv(make_chip(img, p, F));
for (unsigned long i = 0; i < F.size(); ++i)
fft_inplace(F[i]);
make_target_location_image(tform(center(p)), G);
A.resize(F.size());
for (unsigned long i = 0; i < F.size(); ++i)
{
A[i] = pointwise_multiply(G, F[i]);
B += squared(real(F[i]))+squared(imag(F[i]));
}
position = p;
// now do the scale space stuff
make_scale_space(img, Fs);
for (unsigned long i = 0; i < Fs.size(); ++i)
fft_inplace(Fs[i]);
make_scale_target_location_image(get_num_scale_levels()/2, Gs);
Bs.set_size(0,0);
As.resize(Fs.size());
for (unsigned long i = 0; i < Fs.size(); ++i)
{
As[i] = pointwise_multiply(Gs, Fs[i]);
Bs += squared(real(Fs[i]))+squared(imag(Fs[i]));
}
}
unsigned long get_filter_size (
) const { return 128/2; } // must be power of 2
long get_num_scale_levels(
) const { return 32; } // must be power of 2
unsigned long get_scale_window_size (
) const { return 23; }
double get_regularizer_space (
) const { return 0.001; }
inline double get_nu_space (
) const { return 0.025;}
double get_regularizer_scale (
) const { return 0.001; }
double get_nu_scale (
) const { return 0.025;}
drectangle get_position (
) const
{
return position;
}
double get_scale_pyramid_alpha (
) const
{
return 1.020;
}
template <typename image_type>
double update (
const image_type& img,
const drectangle& guess
)
{
DLIB_CASSERT(get_position().is_empty() == false,
"\t double correlation_tracker::update()"
<< "\n\t You must call start_track() first before calling update()."
);
const point_transform_affine tform = make_chip(img, guess, F);
for (unsigned long i = 0; i < F.size(); ++i)
fft_inplace(F[i]);
// use the current filter to predict the object's location
G = 0;
for (unsigned long i = 0; i < F.size(); ++i)
G += pointwise_multiply(F[i],conj(A[i]));
G = pointwise_multiply(G, reciprocal(B+get_regularizer_space()));
ifft_inplace(G);
const dlib::vector<double,2> pp = max_point_interpolated(real(G));
// Compute the peak to side lobe ratio.
const point p = pp;
running_stats<double> rs;
const rectangle peak = centered_rect(p, 8,8);
for (long r = 0; r < G.nr(); ++r)
{
for (long c = 0; c < G.nc(); ++c)
{
if (!peak.contains(point(c,r)))
rs.add(G(r,c).real());
}
}
const double psr = (G(p.y(),p.x()).real()-rs.mean())/rs.stddev();
// update the position of the object
position = translate_rect(guess,tform(pp)-center(guess));
// now update the position filters
make_target_location_image(pp, G);
B *= (1-get_nu_space());
for (unsigned long i = 0; i < F.size(); ++i)
{
A[i] = get_nu_space()*pointwise_multiply(G, F[i]) + (1-get_nu_space())*A[i];
B += get_nu_space()*(squared(real(F[i]))+squared(imag(F[i])));
}
// Now predict the scale change
make_scale_space(img, Fs);
for (unsigned long i = 0; i < Fs.size(); ++i)
fft_inplace(Fs[i]);
Gs = 0;
for (unsigned long i = 0; i < Fs.size(); ++i)
Gs += pointwise_multiply(Fs[i],conj(As[i]));
Gs = pointwise_multiply(Gs, reciprocal(Bs+get_regularizer_scale()));
ifft_inplace(Gs);
const double pos = max_point_interpolated(real(Gs)).y();
// update the rectangle's scale
position *= std::pow(get_scale_pyramid_alpha(), pos-get_num_scale_levels()/2);
// Now update the scale filters
make_scale_target_location_image(pos, Gs);
Bs *= (1-get_nu_scale());
for (unsigned long i = 0; i < Fs.size(); ++i)
{
As[i] = get_nu_scale()*pointwise_multiply(Gs, Fs[i]) + (1-get_nu_scale())*As[i];
Bs += get_nu_scale()*(squared(real(Fs[i]))+squared(imag(Fs[i])));
}
return psr;
}
template <typename image_type>
double update (
const image_type& img
)
{
return update(img, get_position());
}
private:
template <typename image_type>
void make_scale_space(
const image_type& img,
std::vector<matrix<std::complex<double>,0,1> >& Fs
) const
{
typedef typename image_traits<image_type>::pixel_type pixel_type;
// Make an image pyramid and put it into the chips array.
const long chip_size = get_scale_window_size();
drectangle ppp = position*std::pow(get_scale_pyramid_alpha(), -get_num_scale_levels()/2);
dlib::array<array2d<pixel_type> > chips;
std::vector<dlib::vector<double,2> > from_points, to_points;
from_points.push_back(point(0,0));
from_points.push_back(point(chip_size-1,0));
from_points.push_back(point(chip_size-1,chip_size-1));
for (long i = 0; i < get_num_scale_levels(); ++i)
{
array2d<pixel_type> chip(chip_size,chip_size);
// pull box into chip
to_points.clear();
to_points.push_back(ppp.tl_corner());
to_points.push_back(ppp.tr_corner());
to_points.push_back(ppp.br_corner());
transform_image(img,chip,interpolate_bilinear(),find_affine_transform(from_points, to_points));
chips.push_back(chip);
ppp *= get_scale_pyramid_alpha();
}
// extract HOG for each chip
dlib::array<dlib::array<array2d<float> > > hogs(chips.size());
for (unsigned long i = 0; i < chips.size(); ++i)
{
extract_fhog_features(chips[i], hogs[i], 4);
hogs[i].resize(32);
assign_image(hogs[i][31], chips[i]);
assign_image(hogs[i][31], mat(hogs[i][31])/255.0);
}
// Now copy the hog features into the Fs outputs and also apply the cosine
// windowing.
Fs.resize(hogs[0].size()*hogs[0][0].size());
unsigned long i = 0;
for (long r = 0; r < hogs[0][0].nr(); ++r)
{
for (long c = 0; c < hogs[0][0].nc(); ++c)
{
for (long j = 0; j < hogs[0].size(); ++j)
{
Fs[i].set_size(hogs.size());
for (unsigned long k = 0; k < hogs.size(); ++k)
{
Fs[i](k) = hogs[k][j][r][c]*scale_cos_mask[k];
}
++i;
}
}
}
}
template <typename image_type>
point_transform_affine make_chip (
const image_type& img,
drectangle p,
std::vector<matrix<std::complex<double> > >& chip
) const
{
typedef typename image_traits<image_type>::pixel_type pixel_type;
array2d<pixel_type> temp;
const double padding = 1.4;
const chip_details details(p*padding, chip_dims(get_filter_size(), get_filter_size()));
extract_image_chip(img, details, temp);
chip.resize(32);
dlib::array<array2d<float> > hog;
extract_fhog_features(temp, hog, 1, 3,3 );
for (unsigned long i = 0; i < hog.size(); ++i)
assign_image(chip[i], pointwise_multiply(matrix_cast<double>(mat(hog[i])), mask));
assign_image(chip[31], temp);
assign_image(chip[31], pointwise_multiply(mat(chip[31]), mask)/255.0);
return inv(get_mapping_to_chip(details));
}
void make_target_location_image (
const dlib::vector<double,2>& p,
matrix<std::complex<double> >& g
) const
{
g.set_size(get_filter_size(), get_filter_size());
g = 0;
rectangle area = centered_rect(p, 21,21).intersect(get_rect(g));
for (long r = area.top(); r <= area.bottom(); ++r)
{
for (long c = area.left(); c <= area.right(); ++c)
{
double dist = length(point(c,r)-p);
g(r,c) = std::exp(-dist/3.0);
}
}
fft_inplace(g);
g = conj(g);
}
void make_scale_target_location_image (
const double scale,
matrix<std::complex<double>,0,1>& g
) const
{
g.set_size(get_num_scale_levels());
for (long i = 0; i < g.size(); ++i)
{
double dist = std::pow((i-scale),2.0);
g(i) = std::exp(-dist/1.000);
}
fft_inplace(g);
g = conj(g);
}
matrix<double> make_cosine_mask (
) const
{
const long size = get_filter_size();
matrix<double> temp(size,size);
point cent = center(get_rect(temp));
for (long r = 0; r < temp.nr(); ++r)
{
for (long c = 0; c < temp.nc(); ++c)
{
point delta = point(c,r)-cent;
double dist = length(delta)/(size/2.0)*(pi/2);
dist = std::min(dist*1.0, pi/2);
temp(r,c) = std::cos(dist);
}
}
return temp;
}
std::vector<matrix<std::complex<double> > > A, F;
matrix<double> B;
std::vector<matrix<std::complex<double>,0,1> > As, Fs;
matrix<double,0,1> Bs;
drectangle position;
matrix<double> mask;
std::vector<double> scale_cos_mask;
// G and Gs do not logically contribute to the state of this object. They are
// here just so we can void reallocating them over and over.
matrix<std::complex<double> > G;
matrix<std::complex<double>,0,1> Gs;
};
}
#endif // DLIB_CORRELATION_TrACKER_H_
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_CORRELATION_TrACKER_ABSTRACT_H_
#ifdef DLIB_CORRELATION_TrACKER_ABSTRACT_H_
#include "../geometry/drectangle_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class correlation_tracker
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a tool for tracking moving objects in a video stream. You give it
the bounding box of an object in the first frame and it attempts to track the
object in the box from frame to frame.
This tool is an implementation of the method described in the following paper:
Danelljan, Martin, et al. "Accurate scale estimation for robust visual
tracking." Proceedings of the British Machine Vision Conference BMVC. 2014.
!*/
public:
correlation_tracker (
);
/*!
ensures
- #get_position().is_empty() == true
!*/
template <
typename image_type
>
void start_track (
const image_type& img,
const drectangle& p
);
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- p.is_empty() == false
ensures
- This object will start tracking the thing inside the bounding box in the
given image. That is, if you call update() with subsequent video frames
then it will try to keep track of the position of the object inside p.
- #get_position() == p
!*/
drectangle get_position (
) const;
/*!
ensures
- returns the predicted position of the object under track.
!*/
template <
typename image_type
>
double update (
const image_type& img,
const drectangle& guess
);
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- get_position().is_empty() == false
(i.e. you must have started tracking by calling start_track())
ensures
- When searching for the object in img, we search in the area around the
provided guess.
- #get_position() == the new predicted location of the object in img. This
location will be a copy of guess that has been translated and scaled
appropriately based on the content of img so that it, hopefully, bounds
the object in img.
- Returns the peak to side-lobe ratio. This is a number that measures how
confident the tracker is that the object is inside #get_position().
Larger values indicate higher confidence.
!*/
template <
typename image_type
>
double update (
const image_type& img
);
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- get_position().is_empty() == false
(i.e. you must have started tracking by calling start_track())
ensures
- performs: return update(img, get_position())
!*/
};
}
#endif // DLIB_CORRELATION_TrACKER_ABSTRACT_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment