Commit 4f275bd7 authored by Davis King's avatar Davis King

Added evaluate_detectors() to make it easy to run a bunch of HOG detectors

efficiently, even when their window sizes differ.
parent 09af3eb8
...@@ -226,7 +226,6 @@ namespace dlib ...@@ -226,7 +226,6 @@ namespace dlib
return num; return num;
} }
private:
std::vector<matrix<float> > filters; std::vector<matrix<float> > filters;
std::vector<std::vector<matrix<float,0,1> > > row_filters, col_filters; std::vector<std::vector<matrix<float,0,1> > > row_filters, col_filters;
}; };
...@@ -361,14 +360,6 @@ namespace dlib ...@@ -361,14 +360,6 @@ namespace dlib
height = temp.height(); height = temp.height();
} }
static bool compare_pair_rect (
const std::pair<double, rectangle>& a,
const std::pair<double, rectangle>& b
)
{
return a.first < b.first;
}
void get_mapped_rect_and_metadata ( void get_mapped_rect_and_metadata (
const unsigned long number_pyramid_levels, const unsigned long number_pyramid_levels,
const rectangle& rect, const rectangle& rect,
...@@ -389,12 +380,6 @@ namespace dlib ...@@ -389,12 +380,6 @@ namespace dlib
typedef array<array2d<float> > fhog_image; typedef array<array2d<float> > fhog_image;
static rectangle apply_filters_to_fhog (
const fhog_filterbank& w,
const fhog_image& feats,
array2d<float>& saliency_image
);
feature_extractor_type fe; feature_extractor_type fe;
array<fhog_image> feats; array<fhog_image> feats;
int cell_size; int cell_size;
...@@ -422,11 +407,12 @@ namespace dlib ...@@ -422,11 +407,12 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename T, typename U> namespace impl
rectangle scan_fhog_pyramid<T,U>:: {
apply_filters_to_fhog ( template <typename fhog_filterbank>
rectangle apply_filters_to_fhog (
const fhog_filterbank& w, const fhog_filterbank& w,
const fhog_image& feats, const array<array2d<float> >& feats,
array2d<float>& saliency_image array2d<float>& saliency_image
) )
{ {
...@@ -471,6 +457,7 @@ namespace dlib ...@@ -471,6 +457,7 @@ namespace dlib
} }
return area; return area;
} }
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -563,16 +550,23 @@ namespace dlib ...@@ -563,16 +550,23 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
namespace impl
{
template < template <
typename Pyramid_type, typename pyramid_type,
typename image_type,
typename feature_extractor_type typename feature_extractor_type
> >
template < void create_fhog_pyramid (
typename image_type const image_type& img,
> const feature_extractor_type& fe,
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>:: array<array<array2d<float> > >& feats,
load ( int cell_size,
const image_type& img int filter_rows_padding,
int filter_cols_padding,
unsigned long min_pyramid_layer_width,
unsigned long min_pyramid_layer_height,
unsigned long max_pyramid_levels
) )
{ {
unsigned long levels = 0; unsigned long levels = 0;
...@@ -591,14 +585,12 @@ namespace dlib ...@@ -591,14 +585,12 @@ namespace dlib
feats.set_max_size(levels); feats.set_max_size(levels);
feats.set_size(levels); feats.set_size(levels);
unsigned long width, height;
compute_fhog_window_size(width,height);
typedef typename image_type::type pixel_type; typedef typename image_type::type pixel_type;
typedef typename image_type::mem_manager_type mem_manager_type; typedef typename image_type::mem_manager_type mem_manager_type;
// build our feature pyramid // build our feature pyramid
fe(img, feats[0], cell_size,height,width); fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding);
DLIB_ASSERT(feats[0].size() == fe.get_num_planes(), DLIB_ASSERT(feats[0].size() == fe.get_num_planes(),
"Invalid feature extractor used with dlib::scan_fhog_pyramid. The output does not have the \n" "Invalid feature extractor used with dlib::scan_fhog_pyramid. The output does not have the \n"
"indicated number of planes."); "indicated number of planes.");
...@@ -607,17 +599,39 @@ namespace dlib ...@@ -607,17 +599,39 @@ namespace dlib
{ {
array2d<pixel_type,mem_manager_type> temp1, temp2; array2d<pixel_type,mem_manager_type> temp1, temp2;
pyr(img, temp1); pyr(img, temp1);
fe(temp1, feats[1], cell_size,height,width); fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding);
swap(temp1,temp2); swap(temp1,temp2);
for (unsigned long i = 2; i < feats.size(); ++i) for (unsigned long i = 2; i < feats.size(); ++i)
{ {
pyr(temp2, temp1); pyr(temp2, temp1);
fe(temp1, feats[i], cell_size,height,width); fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding);
swap(temp1,temp2); swap(temp1,temp2);
} }
} }
} }
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
template <
typename image_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
load (
const image_type& img
)
{
unsigned long width, height;
compute_fhog_window_size(width,height);
impl::create_fhog_pyramid<Pyramid_type>(img, fe, feats, cell_size, height,
width, min_pyramid_layer_width, min_pyramid_layer_height,
max_pyramid_levels);
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -732,33 +746,36 @@ namespace dlib ...@@ -732,33 +746,36 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
namespace impl
{
inline bool compare_pair_rect (
const std::pair<double, rectangle>& a,
const std::pair<double, rectangle>& b
)
{
return a.first < b.first;
}
template < template <
typename Pyramid_type, typename pyramid_type,
typename feature_extractor_type typename feature_extractor_type,
typename fhog_filterbank
> >
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>:: void detect_from_fhog_pyramid (
detect ( const array<array<array2d<float> > >& feats,
const feature_extractor_type& fe,
const fhog_filterbank& w, const fhog_filterbank& w,
std::vector<std::pair<double, rectangle> >& dets, const double thresh,
const double thresh const unsigned long det_box_height,
) const const unsigned long det_box_width,
const int cell_size,
const int filter_rows_padding,
const int filter_cols_padding,
std::vector<std::pair<double, rectangle> >& dets
)
{ {
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
w.get_num_dimensions() == get_num_dimensions(),
"\t void scan_fhog_pyramid::detect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t w.get_num_dimensions(): " << w.get_num_dimensions()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
dets.clear(); dets.clear();
unsigned long width, height;
compute_fhog_window_size(width,height);
array2d<float> saliency_image; array2d<float> saliency_image;
pyramid_type pyr; pyramid_type pyr;
...@@ -775,7 +792,8 @@ namespace dlib ...@@ -775,7 +792,8 @@ namespace dlib
// if we found a detection // if we found a detection
if (saliency_image[r][c] >= thresh) if (saliency_image[r][c] >= thresh)
{ {
rectangle rect = fe.feats_to_image(centered_rect(point(c,r),width-2*padding,height-2*padding), cell_size, height,width); rectangle rect = fe.feats_to_image(centered_rect(point(c,r),det_box_width,det_box_height),
cell_size, filter_rows_padding, filter_cols_padding);
rect = pyr.rect_up(rect, l); rect = pyr.rect_up(rect, l);
dets.push_back(std::make_pair(saliency_image[r][c], rect)); dets.push_back(std::make_pair(saliency_image[r][c], rect));
} }
...@@ -786,6 +804,53 @@ namespace dlib ...@@ -786,6 +804,53 @@ namespace dlib
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect); std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
} }
inline bool overlaps_any_box (
const test_box_overlap& tester,
const std::vector<rect_detection>& rects,
const rect_detection& rect
)
{
for (unsigned long i = 0; i < rects.size(); ++i)
{
if (tester(rects[i].rect, rect.rect))
return true;
}
return false;
}
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
detect (
const fhog_filterbank& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
w.get_num_dimensions() == get_num_dimensions(),
"\t void scan_fhog_pyramid::detect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t w.get_num_dimensions(): " << w.get_num_dimensions()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
unsigned long width, height;
compute_fhog_window_size(width,height);
impl::detect_from_fhog_pyramid<pyramid_type>(feats, fe, w, thresh,
height-2*padding, width-2*padding, cell_size, height, width, dets);
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
...@@ -1145,6 +1210,134 @@ namespace dlib ...@@ -1145,6 +1210,134 @@ namespace dlib
}; };
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename pyramid_type,
typename image_type
>
void evaluate_detectors (
const std::vector<object_detector<scan_fhog_pyramid<pyramid_type> > >& detectors,
const image_type& img,
std::vector<rect_detection>& dets,
const double adjust_threshold = 0
)
{
typedef scan_fhog_pyramid<pyramid_type> scanner_type;
dets.clear();
if (detectors.size() == 0)
return;
const int cell_size = detectors[0].get_scanner().get_cell_size();
// Find the maximum sized filters and also most extreme pyramiding settings used.
unsigned long max_filter_width = 0;
unsigned long max_filter_height = 0;
unsigned long min_pyramid_layer_width = std::numeric_limits<unsigned long>::max();
unsigned long min_pyramid_layer_height = std::numeric_limits<unsigned long>::max();
unsigned long max_pyramid_levels = 0;
bool all_cell_sizes_the_same = true;
for (unsigned long i = 0; i < detectors.size(); ++i)
{
const scanner_type& scanner = detectors[i].get_scanner();
max_filter_width = std::max(max_filter_width, scanner.get_fhog_window_width());
max_filter_height = std::max(max_filter_height, scanner.get_fhog_window_height());
max_pyramid_levels = std::max(max_pyramid_levels, scanner.get_max_pyramid_levels());
min_pyramid_layer_width = std::min(min_pyramid_layer_width, scanner.get_min_pyramid_layer_width());
min_pyramid_layer_height = std::min(min_pyramid_layer_height, scanner.get_min_pyramid_layer_height());
if (cell_size != scanner.get_cell_size())
all_cell_sizes_the_same = false;
}
std::vector<rect_detection> dets_accum;
// Do to the HOG feature extraction to make the fhog pyramid. Again, note that we
// are making a pyramid that will work with any of the detectors. But only if all
// the cell sizes are the same. If they aren't then we have to calculate the
// pyramid for each detector individually.
array<array<array2d<float> > > feats;
if (all_cell_sizes_the_same)
{
impl::create_fhog_pyramid<pyramid_type>(img,
detectors[0].get_scanner().get_feature_extractor(), feats, cell_size,
max_filter_height, max_filter_width, min_pyramid_layer_width,
min_pyramid_layer_height, max_pyramid_levels);
}
std::vector<std::pair<double, rectangle> > temp_dets;
for (unsigned long i = 0; i < detectors.size(); ++i)
{
const scanner_type& scanner = detectors[i].get_scanner();
if (!all_cell_sizes_the_same)
{
impl::create_fhog_pyramid<pyramid_type>(img,
scanner.get_feature_extractor(), feats, scanner.get_cell_size(),
max_filter_height, max_filter_width, min_pyramid_layer_width,
min_pyramid_layer_height, max_pyramid_levels);
}
const unsigned long det_box_width = scanner.get_fhog_window_width() - 2*scanner.get_padding();
const unsigned long det_box_height = scanner.get_fhog_window_height() - 2*scanner.get_padding();
// A single detector object might itself have multiple weight vectors in it. So
// we need to evaluate all of them.
for (unsigned d = 0; d < detectors[i].num_detectors(); ++d)
{
const double thresh = detectors[i].get_processed_w(d).w(scanner.get_num_dimensions());
impl::detect_from_fhog_pyramid<pyramid_type>(feats, scanner.get_feature_extractor(),
detectors[i].get_processed_w(d).get_detect_argument(), thresh,
det_box_height, det_box_width, cell_size, max_filter_height,
max_filter_width, temp_dets);
for (unsigned long j = 0; j < temp_dets.size(); ++j)
{
rect_detection temp;
temp.detection_confidence = temp_dets[j].first-thresh;
temp.weight_index = i;
temp.rect = temp_dets[j].second;
dets_accum.push_back(temp);
}
}
}
// Do non-max suppression
dets.clear();
if (detectors.size() > 1)
std::sort(dets_accum.rbegin(), dets_accum.rend());
for (unsigned long i = 0; i < dets_accum.size(); ++i)
{
const test_box_overlap tester = detectors[dets_accum[i].weight_index].get_overlap_tester();
if (impl::overlaps_any_box(tester, dets, dets_accum[i]))
continue;
dets.push_back(dets_accum[i]);
}
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename image_type
>
std::vector<rectangle> evaluate_detectors (
const std::vector<object_detector<scan_fhog_pyramid<Pyramid_type> > >& detectors,
const image_type& img,
const double adjust_threshold = 0
)
{
std::vector<rectangle> out_dets;
std::vector<rect_detection> dets;
evaluate_detectors(detectors, img, dets, adjust_threshold);
out_dets.reserve(dets.size());
for (unsigned long i = 0; i < dets.size(); ++i)
out_dets.push_back(dets[i].rect);
return out_dets;
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
} }
......
...@@ -693,6 +693,70 @@ namespace dlib ...@@ -693,6 +693,70 @@ namespace dlib
provides deserialization support provides deserialization support
!*/ !*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename pyramid_type,
typename image_type
>
void evaluate_detectors (
const std::vector<object_detector<scan_fhog_pyramid<pyramid_type>>>& detectors,
const image_type& img,
std::vector<rect_detection>& dets,
const double adjust_threshold = 0
);
/*!
ensures
- This function runs each of the provided object_detector objects over img and
stores the resulting detections into #dets. Importantly, this function is
faster than running each detector individually because it computes the HOG
features only once and then reuses them for each detector. However, it is
important to note that this speedup is only possible if all the detectors use
the same cell_size parameter that determines how HOG features are computed.
If different cell_size values are used then this function will not be any
faster than running the detectors individually.
- This function applies non-max suppression to the outputs from all detectors
and therefore none of the outputs will overlap with each other.
- To be precise, this function performs object detection on the given image and
stores the detected objects into #dets. In particular, we will have that:
- #dets is sorted such that the highest confidence detections come first.
E.g. element 0 is the best detection, element 1 the next best, and so on.
- #dets.size() == the number of detected objects.
- #dets[i].detection_confidence == The strength of the i-th detection.
Larger values indicate that the detector is more confident that #dets[i]
is a correct detection rather than being a false alarm. Moreover, the
detection_confidence is equal to the detection value output by the
scanner minus the threshold value stored at the end of the weight vector.
- #dets[i].rect == the bounding box for the i-th detection.
- The detection #dets[i].rect was produced by detectors[#dets[i].weight_index].
- The detection threshold is adjusted by having adjust_threshold added to it.
Therefore, an adjust_threshold value > 0 makes detecting objects harder while
a negative value makes it easier. Moreover, the following will be true for
all valid i:
- #dets[i].detection_confidence >= adjust_threshold
This means that, for example, you can obtain the maximum possible number of
detections by setting adjust_threshold equal to negative infinity.
!*/
// ----------------------------------------------------------------------------------------
template <
typename pyramid_type,
typename image_type
>
std::vector<rectangle> evaluate_detectors (
const std::vector<object_detector<scan_fhog_pyramid<pyramid_type>>>& detectors,
const image_type& img,
const double adjust_threshold = 0
);
/*!
ensures
- This function just calls the above evaluate_detectors() routine and copies
the output dets into a vector<rectangle> object and returns it. Therefore,
this function is provided for convenience.
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment