Commit 311075a7 authored by Davis King's avatar Davis King

Added initial version of scan_image_boxes object.

parent b6b3cf1f
......@@ -9,6 +9,7 @@
#include "image_processing/object_detector.h"
#include "image_processing/box_overlap_testing.h"
#include "image_processing/scan_image_pyramid_tools.h"
#include "image_processing/scan_image_boxes.h"
#endif // DLIB_IMAGE_PROCESSInG_H___
......
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_IMAGE_bOXES_H__
#define DLIB_SCAN_IMAGE_bOXES_H__
#include "scan_image_boxes_abstract.h"
#include "../matrix.h"
#include "../geometry.h"
#include "../image_processing.h"
#include "../array2d.h"
#include <vector>
#include "../image_processing/full_object_detection.h"
#include "../image_transforms.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class default_box_generator
{
public:
template <typename image_type>
void operator() (
const image_type& img,
std::vector<rectangle>& rects
) const
{
rects.clear();
find_candidate_object_locations(img, rects);
}
void copy_configuration (
const default_box_generator&
){}
};
inline void serialize(const default_box_generator&, std::ostream& ) {}
inline void deserialize(default_box_generator&, std::istream& ) {}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator = default_box_generator
>
class scan_image_boxes : noncopyable
{
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Feature_extractor_type feature_extractor_type;
typedef Box_generator box_generator;
scan_image_boxes (
);
template <
typename image_type
>
void load (
const image_type& img
);
inline bool is_loaded_with_image (
) const;
inline void copy_configuration(
const feature_extractor_type& fe
);
inline void copy_configuration(
const box_generator& bg
);
inline void copy_configuration (
const scan_image_boxes& item
);
inline long get_num_dimensions (
) const;
unsigned long get_num_spatial_pyramid_levels (
) const;
void set_num_spatial_pyramid_levels (
unsigned long levels
);
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- is_loaded_with_image() == true
!*/
inline unsigned long get_num_detection_templates (
) const { return 1; }
inline unsigned long get_num_movable_components_per_detection_template (
) const { return 0; }
template <typename T, typename U>
friend void serialize (
const scan_image_boxes<T,U>& item,
std::ostream& out
);
template <typename T, typename U>
friend void deserialize (
scan_image_boxes<T,U>& item,
std::istream& in
);
private:
static bool compare_pair_rect (
const std::pair<double, rectangle>& a,
const std::pair<double, rectangle>& b
)
{
return a.first < b.first;
}
void test_coordinate_transforms()
{
for (long x = -10; x <= 10; x += 10)
{
for (long y = -10; y <= 10; y += 10)
{
const rectangle rect = centered_rect(x,y,5,6);
rectangle a;
a = feats.image_to_feat_space(rect);
if (a.width() > 10000000 || a.height() > 10000000 )
{
DLIB_CASSERT(false, "The image_to_feat_space() routine is outputting rectangles of an implausibly "
<< "\nlarge size. This means there is probably a bug in your feature extractor.");
}
a = feats.feat_to_image_space(rect);
if (a.width() > 10000000 || a.height() > 10000000 )
{
DLIB_CASSERT(false, "The feat_to_image_space() routine is outputting rectangles of an implausibly "
<< "\nlarge size. This means there is probably a bug in your feature extractor.");
}
}
}
}
static void add_grid_rects (
std::vector<rectangle>& rects,
const rectangle& object_box,
unsigned int cells_x,
unsigned int cells_y
)
{
// make sure requires clause is not broken
DLIB_ASSERT(cells_x > 0 && cells_y > 0,
"\t void add_grid_rects()"
<< "\n\t The number of cells along a dimension can't be zero. "
<< "\n\t cells_x: " << cells_x
<< "\n\t cells_y: " << cells_y
);
const matrix_range_exp<double>& x = linspace(object_box.left(), object_box.right(), cells_x+1);
const matrix_range_exp<double>& y = linspace(object_box.top(), object_box.bottom(), cells_y+1);
for (long j = 0; j+1 < y.size(); ++j)
{
for (long i = 0; i+1 < x.size(); ++i)
{
const dlib::vector<double,2> tl(x(i),y(j));
const dlib::vector<double,2> br(x(i+1),y(j+1));
rects.push_back(rectangle(tl,br));
}
}
}
void get_feature_extraction_regions (
const rectangle& rect,
std::vector<rectangle>& regions
) const
/*!
ensures
- #regions.size() is always the same number no matter what the input is. The
regions also have a consistent ordering.
- all the output rectangles are contained within rect.
!*/
{
regions.clear();
for (unsigned int l = 1; l <= num_spatial_pyramid_levels; ++l)
{
const int cells = (int)std::pow(2.0, l-1);
add_grid_rects(regions, rect, cells, cells);
}
}
unsigned int get_num_components_per_detection_template(
) const
{
return (unsigned int)(std::pow(4.0,num_spatial_pyramid_levels)-1)/3;
}
feature_extractor_type feats;
std::vector<rectangle> search_rects;
bool loaded_with_image;
unsigned int num_spatial_pyramid_levels;
box_generator detect_boxes;
};
// ----------------------------------------------------------------------------------------
template <typename T, typename U>
void serialize (
const scan_image_boxes<T,U>& item,
std::ostream& out
)
{
int version = 1;
serialize(version, out);
serialize(item.feats, out);
serialize(item.search_rects, out);
serialize(item.loaded_with_image, out);
serialize(item.num_spatial_pyramid_levels, out);
serialize(item.detect_boxes, out);
}
// ----------------------------------------------------------------------------------------
template <typename T, typename U>
void deserialize (
scan_image_boxes<T,U>& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unsupported version found when deserializing a scan_image_boxes object.");
deserialize(item.feats, in);
deserialize(item.search_rects, in);
deserialize(item.loaded_with_image, in);
deserialize(item.num_spatial_pyramid_levels, in);
deserialize(item.detect_boxes, in);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// scan_image_boxes member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
scan_image_boxes<Feature_extractor_type,Box_generator>::
scan_image_boxes (
) :
loaded_with_image(false),
num_spatial_pyramid_levels(3)
{
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
template <
typename image_type
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
load (
const image_type& img
)
{
feats.load(img);
detect_boxes(img, search_rects);
loaded_with_image = true;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
bool scan_image_boxes<Feature_extractor_type,Box_generator>::
is_loaded_with_image (
) const
{
return loaded_with_image;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
copy_configuration(
const feature_extractor_type& fe
)
{
test_coordinate_transforms();
feats.copy_configuration(fe);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
copy_configuration(
const box_generator& bg
)
{
detect_boxes.copy_configuration(bg);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
copy_configuration (
const scan_image_boxes& item
)
{
feats.copy_configuration(item.feats);
detect_boxes.copy_configuration(item.detect_boxes);
num_spatial_pyramid_levels = item.num_spatial_pyramid_levels;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
unsigned long scan_image_boxes<Feature_extractor_type,Box_generator>::
get_num_spatial_pyramid_levels (
) const
{
return num_spatial_pyramid_levels;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
set_num_spatial_pyramid_levels (
unsigned long levels
)
{
// make sure requires clause is not broken
DLIB_ASSERT(levels > 0,
"\t void scan_image_boxes::set_num_spatial_pyramid_levels()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t levels: " << levels
<< "\n\t this: " << this
);
num_spatial_pyramid_levels = levels;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
long scan_image_boxes<Feature_extractor_type,Box_generator>::
get_num_dimensions (
) const
{
return feats.get_num_dimensions()*get_num_components_per_detection_template();
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
w.size() >= get_num_dimensions(),
"\t void scan_image_boxes::detect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t w.size(): " << w.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
dets.clear();
array<integral_image_generic<double> > saliency_images;
saliency_images.set_max_size(get_num_components_per_detection_template());
saliency_images.set_size(get_num_components_per_detection_template());
array2d<double> temp_img(feats.nr(), feats.nc());
// build saliency images
for (unsigned long i = 0; i < saliency_images.size(); ++i)
{
const unsigned long offset = feats.get_num_dimensions()*i;
// make the basic saliency image for the i-th feature extraction region
for (long r = 0; r < feats.nr(); ++r)
{
for (long c = 0; c < feats.nc(); ++c)
{
const typename feature_extractor_type::descriptor_type& descriptor = feats(r,c);
double sum = 0;
for (unsigned long k = 0; k < descriptor.size(); ++k)
{
sum += w(descriptor[k].first + offset)*descriptor[k].second;
}
temp_img[r][c] = sum;
}
}
// now convert base saliency image into final integral image
saliency_images[i].load(temp_img);
}
// now search the saliency images
std::vector<rectangle> regions;
const rectangle bounds = get_rect(feats);
for (unsigned long i = 0; i < search_rects.size(); ++i)
{
const rectangle rect = feats.image_to_feat_space(search_rects[i]).intersect(bounds);
if (rect.is_empty())
continue;
get_feature_extraction_regions(rect, regions);
DLIB_CASSERT(saliency_images.size() == regions.size(),"");
double score = 0;
for (unsigned long k = 0; k < regions.size(); ++k)
{
DLIB_CASSERT(get_rect(saliency_images[k]).contains(regions[k]), search_rects[i]
<< " getrect:" << get_rect(saliency_images[k]) << " region:" << regions[k] << " rect: "<< rect);
score += saliency_images[k].get_sum_of_area(regions[k]);
}
if (score >= thresh)
{
dets.push_back(std::make_pair(score, search_rects[i]));
}
}
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
const rectangle scan_image_boxes<Feature_extractor_type,Box_generator>::
get_best_matching_rect (
const rectangle& rect
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image(),
"\t const rectangle scan_image_boxes::get_best_matching_rect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t this: " << this
);
double best_score = -1;
rectangle best_rect;
for (unsigned long i = 0; i < search_rects.size(); ++i)
{
const double score = (rect.intersect(search_rects[i])).area()/(double)(rect+search_rects[i]).area();
if (score > best_score)
{
best_score = score;
best_rect = search_rects[i];
}
}
return best_rect;
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
full_object_detection scan_image_boxes<Feature_extractor_type,Box_generator>::
get_full_object_detection (
const rectangle& rect,
const feature_vector_type& /*w*/
) const
{
return full_object_detection(rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void scan_image_boxes<Feature_extractor_type,Box_generator>::
get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
psi.size() >= get_num_dimensions() &&
obj.num_parts() == 0,
"\t void scan_image_boxes::get_feature_vector()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t psi.size(): " << psi.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t obj.num_parts(): " << obj.num_parts()
<< "\n\t this: " << this
);
rectangle mapped_rect = get_best_matching_rect(obj.get_rect());
mapped_rect = feats.image_to_feat_space(mapped_rect).intersect(get_rect(feats));
std::vector<rectangle> regions;
get_feature_extraction_regions(mapped_rect, regions);
// pull features out of all the boxes in regions.
for (unsigned long j = 0; j < regions.size(); ++j)
{
const rectangle rect = regions[j];
DLIB_CASSERT(get_rect(feats).contains(regions[j]),"");
const unsigned long template_region_id = j;
const unsigned long offset = feats.get_num_dimensions()*template_region_id;
for (long r = rect.top(); r <= rect.bottom(); ++r)
{
for (long c = rect.left(); c <= rect.right(); ++c)
{
const typename feature_extractor_type::descriptor_type& descriptor = feats(r,c);
for (unsigned long k = 0; k < descriptor.size(); ++k)
{
psi(descriptor[k].first + offset) += descriptor[k].second;
}
}
}
}
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMAGE_bOXES_H__
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment