Commit 226f5af1 authored by Davis King's avatar Davis King

Added the poly_image local feature extractor.

parent 2b4e363f
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "image_keypoint/surf.h" #include "image_keypoint/surf.h"
#include "image_keypoint/hessian_pyramid.h" #include "image_keypoint/hessian_pyramid.h"
#include "image_keypoint/hog.h" #include "image_keypoint/hog.h"
#include "image_keypoint/poly_image.h"
#include "image_keypoint/hashed_feature_image.h" #include "image_keypoint/hashed_feature_image.h"
#include "image_keypoint/nearest_neighbor_feature_image.h" #include "image_keypoint/nearest_neighbor_feature_image.h"
......
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_BUILD_SEPARABLE_PoLY_FILTERS_H__
#define DLIB_BUILD_SEPARABLE_PoLY_FILTERS_H__
#include "../matrix.h"
#include "surf.h"
#include "../uintn.h"
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
typedef std::pair<matrix<double,0,1>, matrix<double,0,1> > separable_filter_type;
typedef std::pair<matrix<int32,0,1>, matrix<int32,0,1> > separable_int32_filter_type;
// ----------------------------------------------------------------------------------------
std::vector<std::vector<separable_filter_type> > build_separable_poly_filters (
const long window_size,
const long order = 2
)
/*!
requires
- 1 <= order <= 6
- window_size >= 3 && window_size is odd
ensures
- the "first" element is the row_filter, the second is the col_filter.
- Some filters are not totally separable and that's why they are grouped
into vectors of vectors. The groups are all the parts of a partially
separable filter.
!*/
{
long num_filters = 6;
switch (order)
{
case 1: num_filters = 3; break;
case 2: num_filters = 6; break;
case 3: num_filters = 10; break;
case 4: num_filters = 15; break;
case 5: num_filters = 21; break;
case 6: num_filters = 28; break;
}
matrix<double> X(window_size*window_size,num_filters);
matrix<double,0,1> G(window_size*window_size,1);
const double sigma = window_size/4.0;
long cnt = 0;
for (double x = -window_size/2; x <= window_size/2; ++x)
{
for (double y = -window_size/2; y <= window_size/2; ++y)
{
X(cnt, 0) = 1;
X(cnt, 1) = x;
X(cnt, 2) = y;
if (X.nc() > 5)
{
X(cnt, 3) = x*y;
X(cnt, 4) = x*x;
X(cnt, 5) = y*y;
}
if (X.nc() > 9)
{
X(cnt, 6) = x*x*x;
X(cnt, 7) = y*x*x;
X(cnt, 8) = y*y*x;
X(cnt, 9) = y*y*y;
}
if (X.nc() > 14)
{
X(cnt, 10) = x*x*x*x;
X(cnt, 11) = y*x*x*x;
X(cnt, 12) = y*y*x*x;
X(cnt, 13) = y*y*y*x;
X(cnt, 14) = y*y*y*y;
}
if (X.nc() > 20)
{
X(cnt, 15) = x*x*x*x*x;
X(cnt, 16) = y*x*x*x*x;
X(cnt, 17) = y*y*x*x*x;
X(cnt, 18) = y*y*y*x*x;
X(cnt, 19) = y*y*y*y*x;
X(cnt, 20) = y*y*y*y*y;
}
if (X.nc() > 27)
{
X(cnt, 21) = x*x*x*x*x*x;
X(cnt, 22) = y*x*x*x*x*x;
X(cnt, 23) = y*y*x*x*x*x;
X(cnt, 24) = y*y*y*x*x*x;
X(cnt, 25) = y*y*y*y*x*x;
X(cnt, 26) = y*y*y*y*y*x;
X(cnt, 27) = y*y*y*y*y*y;
}
G(cnt) = std::sqrt(gaussian(x,y,sigma));
++cnt;
}
}
X = diagm(G)*X;
const matrix<double> S = inv(trans(X)*X)*trans(X)*diagm(G);
matrix<double,0,1> row_filter, col_filter;
matrix<double> u,v, temp;
matrix<double,0,1> w;
std::vector<std::vector<separable_filter_type> > results(num_filters);
for (long r = 0; r < S.nr(); ++r)
{
temp = reshape(rowm(S,r), window_size, window_size);
svd3(temp,u,w,v);
const double thresh = max(w)*1e-8;
for (long i = 0; i < w.size(); ++i)
{
if (w(i) > thresh)
{
col_filter = std::sqrt(w(i))*colm(u,i);
row_filter = std::sqrt(w(i))*colm(v,i);
results[r].push_back(std::make_pair(row_filter, col_filter));
}
}
}
return results;
}
// ----------------------------------------------------------------------------------------
std::vector<std::vector<separable_int32_filter_type> > build_separable_int32_poly_filters (
const long window_size,
const long order = 2,
const double max_range = 300.0
)
/*!
requires
- 1 <= order <= 6
- window_size >= 3 && window_size is odd
- max_range > 1
ensures
- the "first" element is the row_filter, the second is the col_filter.
!*/
{
const std::vector<std::vector<separable_filter_type> >& filters = build_separable_poly_filters(window_size, order);
std::vector<std::vector<separable_int32_filter_type> > int_filters(filters.size());
for (unsigned long i = 0; i < filters.size(); ++i)
{
double max_val = 0;
for (unsigned long j = 0; j < filters[i].size(); ++j)
{
const separable_filter_type& filt = filters[i][j];
max_val = std::max(max_val, max(abs(filt.first)));
max_val = std::max(max_val, max(abs(filt.second)));
}
if (max_val == 0)
max_val = 1;
int_filters[i].resize(filters[i].size());
for (unsigned long j = 0; j < filters[i].size(); ++j)
{
const separable_filter_type& filt = filters[i][j];
int_filters[i][j].first = matrix_cast<int32>(round(filt.first*max_range/max_val));
int_filters[i][j].second = matrix_cast<int32>(round(filt.second*max_range/max_val));
}
}
return int_filters;
}
}
// ----------------------------------------------------------------------------------------
#endif // DLIB_BUILD_SEPARABLE_PoLY_FILTERS_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_POLY_ImAGE_H__
#define DLIB_POLY_ImAGE_H__
#include "poly_image_abstract.h"
#include "build_separable_poly_filters.h"
#include "../algs.h"
#include "../matrix.h"
#include "../array2d.h"
#include "../geometry.h"
#include <cmath>
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
long downsample
>
class poly_image : noncopyable
{
COMPILE_TIME_ASSERT(downsample >= 1);
public:
typedef matrix<double, 0, 1> descriptor_type;
poly_image (
)
{
clear();
}
void clear (
)
{
poly_coef.clear();
order = 3;
window_size = 13;
border_size = (long)std::ceil(std::floor(window_size/2.0)/downsample);
num_rows = 0;
num_cols = 0;
filters = build_separable_poly_filters(window_size, order);
}
long get_order (
) const
{
return order;
}
long get_window_size (
) const
{
return window_size;
}
void setup (
long order_,
long window_size_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(1 <= order_ && order_ <= 6 &&
window_size_ >= 3 && (window_size_%2) == 1,
"\t descriptor_type poly_image::setup()"
<< "\n\t Invalid arguments were given to this function."
<< "\n\t order_: " << order_
<< "\n\t window_size_: " << window_size_
<< "\n\t this: " << this
);
poly_coef.clear();
order = order_;
window_size = window_size_;
border_size = (long)std::ceil(std::floor(window_size/2.0)/downsample);
num_rows = 0;
num_cols = 0;
filters = build_separable_poly_filters(window_size, order);
}
void copy_configuration (
const poly_image& item
)
{
if (order != item.order ||
window_size != item.window_size)
{
order = item.order;
window_size = item.window_size;
border_size = item.border_size;
filters = item.filters;
}
}
template <
typename image_type
>
inline void load (
const image_type& img
)
{
COMPILE_TIME_ASSERT( pixel_traits<typename image_type::type>::has_alpha == false );
poly_coef.resize(get_num_dimensions());
des.set_size(get_num_dimensions());
array2d<float> coef0;
rectangle rect = filter_image(img, coef0, filters[0]);
num_rows = rect.height();
num_cols = rect.width();
for (unsigned long i = 1; i < filters.size(); ++i)
{
filter_image(img, poly_coef[i-1], filters[i]);
// intensity normalize everything
for (long r = 0; r < coef0.nr(); ++r)
{
for (long c = 0; c < coef0.nc(); ++c)
{
if (coef0[r][c] >= 1)
poly_coef[i-1][r][c] /= coef0[r][c];
else
poly_coef[i-1][r][c] = 0;
}
}
}
}
void unload()
{
poly_coef.clear();
num_rows = 0;
num_cols = 0;
}
inline unsigned long size (
) const { return static_cast<unsigned long>(nr()*nc()); }
inline long nr (
) const { return num_rows; }
inline long nc (
) const { return num_cols; }
long get_num_dimensions (
) const
{
// -1 because we discard the constant term of the polynomial.
return filters.size()-1;
}
inline const descriptor_type& operator() (
long row,
long col
) const
{
// make sure requires clause is not broken
DLIB_ASSERT( 0 <= row && row < nr() &&
0 <= col && col < nc(),
"\t descriptor_type poly_image::operator()()"
<< "\n\t invalid row or col argument"
<< "\n\t row: " << row
<< "\n\t col: " << col
<< "\n\t nr(): " << nr()
<< "\n\t nc(): " << nc()
<< "\n\t this: " << this
);
// add because of the zero border around the poly_coef images
row += border_size;
col += border_size;
for (long i = 0; i < des.size(); ++i)
des(i) = poly_coef[i][row][col];
return des;
}
const rectangle get_block_rect (
long row,
long col
) const
{
return centered_rect(downsample*point(col+border_size, row+border_size),
window_size, window_size);
}
const point image_to_feat_space (
const point& p
) const
{
return p/downsample - point(border_size, border_size);
}
const rectangle image_to_feat_space (
const rectangle& rect
) const
{
return rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner()));
}
const point feat_to_image_space (
const point& p
) const
{
return (p + point(border_size, border_size))*downsample;
}
const rectangle feat_to_image_space (
const rectangle& rect
) const
{
return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner()));
}
friend void serialize (const poly_image& item, std::ostream& out)
{
serialize(item.poly_coef, out);
serialize(item.order, out);
serialize(item.window_size, out);
serialize(item.border_size, out);
serialize(item.num_rows, out);
serialize(item.num_cols, out);
}
friend void deserialize (poly_image& item, std::istream& in )
{
deserialize(item.poly_coef, in);
deserialize(item.order, in);
deserialize(item.window_size, in);
deserialize(item.border_size, in);
deserialize(item.num_rows, in);
deserialize(item.num_cols, in);
// just rebuild the filters instead of loading them
item.filters = build_separable_poly_filters(item.window_size, item.order);
}
private:
template <typename image_type>
rectangle filter_image (
const image_type& img,
array2d<float>& out,
const std::vector<separable_filter_type>& filter
) const
{
rectangle rect = spatially_filter_image_separable_down(downsample, img, out, filter[0].first, filter[0].second);
for (unsigned long i = 1; i < filter.size(); ++i)
{
spatially_filter_image_separable_down(downsample, img, out, filter[i].first, filter[i].second, 1, false, true);
}
return rect;
}
std::vector<std::vector<separable_filter_type> > filters;
dlib::array<array2d<float> >::expand_1b poly_coef;
long order;
long window_size;
long border_size;
long num_rows;
long num_cols;
mutable descriptor_type des;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_POLY_ImAGE_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_POLY_ImAGE_ABSTRACT_H__
#ifdef DLIB_POLY_ImAGE_ABSTRACT_H__
#include "../algs.h"
#include "../matrix.h"
#include "../geometry/rectangle_abstract.h"
#include <cmath>
namespace dlib
{
template <
long downsample
>
class poly_image : noncopyable
{
/*!
REQUIREMENTS ON TEMPLATE PARAMETERS
- downsample >= 1
INITIAL VALUE
- size() == 0
- get_order() == 3
- get_window_size() == 13
WHAT THIS OBJECT REPRESENTS
This object is a tool for extracting local feature descriptors from an image.
In particular, it fits a polynomial to every local pixel patch in an image and
allows you to query the coefficients of this polynomial. The coefficients
are intensity normalized by dividing them by the constant term of the fitted
polynomial and then the constant term is discarded.
Additionally, the user can specify a downsampling rate. If the template argument
downsample is set to 1 then feature extraction is performed at every pixel of
an input image (except for a small area around the image border). However,
if downsample is set to 2 then feature extraction is only performed at every
other pixel location. More generally, if downsample is set to N then feature
extraction is performed only every N pixels.
THREAD SAFETY
Concurrent access to an instance of this object is not safe and should be protected
by a mutex lock except for the case where you are copying the configuration
(via copy_configuration()) of a poly_image object to many other threads.
In this case, it is safe to copy the configuration of a shared object so long
as no other operations are performed on it.
!*/
public:
typedef matrix<double, 0, 1> descriptor_type;
poly_image (
);
/*!
ensures
- this object is properly initialized
!*/
void clear (
);
/*!
ensures
- this object will have its initial value
!*/
void setup (
long order,
long window_size
);
/*!
requires
- 1 <= order <= 6
- window_size >= 3 && window_size is odd
ensures
- #get_order() == order
- #get_window_size() == window_size
!*/
long get_order (
) const;
/*!
ensures
- returns the order of the polynomial that will be fitted to
each local pixel patch during feature extraction.
!*/
long get_window_size (
) const;
/*!
ensures
- returns the size of the window used for local feature extraction.
This is the width and height of the window in pixels.
!*/
void copy_configuration (
const poly_image& item
);
/*!
ensures
- copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two poly_image
objects H1 and H2, the following sequence of instructions should always
result in both of them having the exact same state.
H2.copy_configuration(H1);
H1.load(img);
H2.load(img);
!*/
template <
typename image_type
>
inline void load (
const image_type& img
);
/*!
requires
- image_type == is an implementation of array2d/array2d_kernel_abstract.h
- pixel_traits<typename image_type::type>::has_alpha == false
ensures
- Performs the feature extraction described in the WHAT THIS OBJECT REPRESENTS
section above. This means after load() finishes you can call (*this)(row,col)
to obtain the polynomial coefficients for an order get_order() polynomial which
was fitted to the image patch get_block_rect(row,col).
- #size() > 0
!*/
void unload(
);
/*!
ensures
- #nr() == 0
- #nc() == 0
- clears only the state information which is populated by load(). For
example, let H be a poly_image object. Then consider the two sequences
of instructions:
Sequence 1:
H.load(img);
H.unload();
H.load(img);
Sequence 2:
H.load(img);
Both sequence 1 and sequence 2 should have the same effect on H.
!*/
inline unsigned long size (
) const;
/*!
ensures
- returns nr()*nc()
!*/
inline long nr (
) const;
/*!
ensures
- returns the number of rows in this polynomial feature image
!*/
inline long nc (
) const;
/*!
ensures
- returns the number of columns in this polynomial feature image
!*/
long get_num_dimensions (
) const;
/*!
ensures
- returns the number of dimensions in the feature vectors generated by
this object.
- In this case, this will be the number of coefficients in an order
get_order() polynomial, except for the constant term of the polynomial.
!*/
inline const descriptor_type& operator() (
long row,
long col
) const;
/*!
requires
- 0 <= row < nr()
- 0 <= col < nc()
ensures
- returns the descriptor for the polynomial filtering block at the given row and column.
This vector will contain the polynomial coefficients for a polynomial fitted to the
image patch located at get_block_rect(row,col) in the original image given to load().
- The returned descriptor vector will have get_num_dimensions() elements.
!*/
const rectangle get_block_rect (
long row,
long col
) const;
/*!
ensures
- returns a rectangle that tells you what part of the original image is associated
with a particular polynomial filter block. That is, what part of the input image
is associated with (*this)(row,col).
- The returned rectangle will be get_window_size() pixels wide and tall.
!*/
const point image_to_feat_space (
const point& p
) const;
/*!
ensures
- Each local feature is extracted from a certain point in the input image.
This function returns the identity of the local feature corresponding
to the image location p. Or in other words, let P == image_to_feat_space(p),
then (*this)(P.y(),P.x()) == the local feature closest to, or centered at,
the point p in the input image. Note that some image points might not have
corresponding feature locations. E.g. border points or points outside the
image. In these cases the returned point will be outside get_rect(*this).
!*/
const rectangle image_to_feat_space (
const rectangle& rect
) const;
/*!
ensures
- returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner()));
(i.e. maps a rectangle from image space to feature space)
!*/
const point feat_to_image_space (
const point& p
) const;
/*!
ensures
- returns the location in the input image space corresponding to the center
of the local feature at point p. In other words, this function computes
the inverse of image_to_feat_space(). Note that it may only do so approximately,
since more than one image location might correspond to the same local feature.
That is, image_to_feat_space() might not be invertible so this function gives
the closest possible result.
!*/
const rectangle feat_to_image_space (
const rectangle& rect
) const;
/*!
ensures
- return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner()));
(i.e. maps a rectangle from feature space to image space)
!*/
};
// ----------------------------------------------------------------------------------------
template <
long downsample
>
void serialize (
const poly_image<downsample>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
template <
long downsample
>
void deserialize (
poly_image<downsample>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_POLY_ImAGE_ABSTRACT_H__
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment