Commit 3ebf0f2e authored by Davis King's avatar Davis King

Refined the scan_image_pyramid interface a little. In particular, I split the

get_feature_vector() method into two separate functions so the interface
is a little simpler and more flexible.
parent 578322dc
...@@ -86,8 +86,11 @@ namespace dlib ...@@ -86,8 +86,11 @@ namespace dlib
void get_feature_vector ( void get_feature_vector (
const std::vector<rectangle>& rects, const std::vector<rectangle>& rects,
feature_vector_type& psi, feature_vector_type& psi
std::vector<rectangle>& mapped_rects ) const;
const rectangle get_best_matching_rect (
const rectangle& rect
) const; ) const;
template <typename T, typename U> template <typename T, typename U>
...@@ -128,6 +131,13 @@ namespace dlib ...@@ -128,6 +131,13 @@ namespace dlib
deserialize(item.rects, in); deserialize(item.rects, in);
} }
void get_mapped_rect_and_metadata (
rectangle rect,
rectangle& mapped_rect,
detection_template& best_template,
unsigned long& best_level
) const;
feature_extractor_type feats_config; // just here to hold configuration. use it to populate the feats elements. feature_extractor_type feats_config; // just here to hold configuration. use it to populate the feats elements.
typename array<feature_extractor_type>::kernel_2a feats; typename array<feature_extractor_type>::kernel_2a feats;
...@@ -527,6 +537,107 @@ namespace dlib ...@@ -527,6 +537,107 @@ namespace dlib
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect); std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
} }
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
const rectangle scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_best_matching_rect (
const rectangle& rect
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(get_num_detection_templates() > 0 &&
is_loaded_with_image(),
"\t const rectangle scan_image_pyramid::get_best_matching_rect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t this: " << this
);
rectangle mapped_rect;
detection_template best_template;
unsigned long best_level;
get_mapped_rect_and_metadata(rect, mapped_rect, best_template, best_level);
return mapped_rect;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_mapped_rect_and_metadata (
rectangle rect,
rectangle& mapped_rect,
detection_template& best_template,
unsigned long& best_level
) const
{
pyramid_type pyr;
// Figure out the pyramid level which best matches rect against one of our
// detection template object boxes.
best_level = 0;
double match_score = std::numeric_limits<double>::infinity();
const dlib::vector<double,2> p(rect.width(), rect.height());
// for all the levels
for (unsigned long l = 0; l < feats.size(); ++l)
{
// Run the center point through the feature/image space transformation just to make
// sure we exactly replicate the procedure for shifting an object_box used elsewhere
// in this file.
const point origin = feats[l].feat_to_image_space(feats[l].image_to_feat_space(center(pyr.rect_down(rect,l))));
for (unsigned long t = 0; t < det_templates.size(); ++t)
{
// Map this detection template into the normal image space and see how
// close it is to the rect we are looking for. We do the translation here
// because the rect_up() routine takes place using integer arithmetic and
// could potentially give slightly different results with and without the
// translation.
rectangle mapped_rect = translate_rect(det_templates[t].object_box, origin);
mapped_rect = pyr.rect_up(mapped_rect, l);
const dlib::vector<double,2> p2(mapped_rect.width(),
mapped_rect.height());
if ((p-p2).length() < match_score)
{
match_score = (p-p2).length();
best_level = l;
best_template = det_templates[t];
}
}
}
// Now get the features out of feats[best_level]. But first translate best_template
// into the right spot (it should be centered at the location determined by rect)
// and convert it into the feature image coordinate system.
rect = pyr.rect_down(rect,best_level);
const point offset = -feats[best_level].image_to_feat_space(point(0,0));
const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
for (unsigned long k = 0; k < best_template.rects.size(); ++k)
{
rectangle temp = best_template.rects[k];
temp = feats[best_level].image_to_feat_space(temp);
temp = translate_rect(temp, origin);
temp = get_rect(feats[best_level]).intersect(temp);
best_template.rects[k] = temp;
}
// The input rectangle was mapped to one of the detection templates. Reverse the process
// to figure out what the mapped rectangle is in the original input space.
mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
mapped_rect = pyr.rect_up(mapped_rect, best_level);
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
...@@ -536,8 +647,7 @@ namespace dlib ...@@ -536,8 +647,7 @@ namespace dlib
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_feature_vector ( get_feature_vector (
const std::vector<rectangle>& rects, const std::vector<rectangle>& rects,
feature_vector_type& psi, feature_vector_type& psi
std::vector<rectangle>& mapped_rects
) const ) const
{ {
// make sure requires clause is not broken // make sure requires clause is not broken
...@@ -555,74 +665,18 @@ namespace dlib ...@@ -555,74 +665,18 @@ namespace dlib
psi = 0; psi = 0;
mapped_rects.clear();
pyramid_type pyr; pyramid_type pyr;
for (unsigned long i = 0; i < rects.size(); ++i) for (unsigned long i = 0; i < rects.size(); ++i)
{ {
// Figure out the pyramid level which best matches rects[i] against one of our rectangle mapped_rect;
// detection template object boxes.
unsigned long best_level = 0;
double match_score = std::numeric_limits<double>::infinity();
detection_template best_template; detection_template best_template;
unsigned long best_level;
rectangle rect = rects[i]; get_mapped_rect_and_metadata (rects[i], mapped_rect, best_template, best_level);
const dlib::vector<double,2> p(rect.width(), rect.height());
// for all the levels
for (unsigned long l = 0; l < feats.size(); ++l)
{
// Run the center point through the feature/image space transformation just to make
// sure we exactly replicate the procedure for shifting an object_box used elsewhere
// in this file.
const point origin = feats[l].feat_to_image_space(feats[l].image_to_feat_space(center(pyr.rect_down(rect,l))));
for (unsigned long t = 0; t < det_templates.size(); ++t)
{
// Map this detection template into the normal image space and see how
// close it is to the rect we are looking for. We do the translation here
// because the rect_up() routine takes place using integer arithmetic and
// could potentially give slightly different results with and without the
// translation.
rectangle mapped_rect = translate_rect(det_templates[t].object_box, origin);
mapped_rect = pyr.rect_up(mapped_rect, l);
const dlib::vector<double,2> p2(mapped_rect.width(),
mapped_rect.height());
if ((p-p2).length() < match_score)
{
match_score = (p-p2).length();
best_level = l;
best_template = det_templates[t];
}
}
}
// Now get the features out of feats[best_level]. But first translate best_template
// into the right spot (it should be centered at the location determined by rects[i])
// and convert it into the feature image coordinate system.
rect = pyr.rect_down(rects[i],best_level);
const point offset = -feats[best_level].image_to_feat_space(point(0,0));
const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
for (unsigned long k = 0; k < best_template.rects.size(); ++k)
{
rectangle temp = best_template.rects[k];
temp = feats[best_level].image_to_feat_space(temp);
temp = translate_rect(temp, origin);
temp = get_rect(feats[best_level]).intersect(temp);
best_template.rects[k] = temp;
}
// The input rectangle was mapped to one of the detection templates. Reverse the process
// to figure out what the mapped rectangle is in the original input space.
rectangle mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
mapped_rect = pyr.rect_up(mapped_rect, best_level);
mapped_rects.push_back(mapped_rect);
for (unsigned long j = 0; j < best_template.rects.size(); ++j) for (unsigned long j = 0; j < best_template.rects.size(); ++j)
{ {
rect = best_template.rects[j]; const rectangle rect = best_template.rects[j];
const unsigned long template_region_id = j; const unsigned long template_region_id = j;
const unsigned long offset = feats_config.get_num_dimensions()*template_region_id; const unsigned long offset = feats_config.get_num_dimensions()*template_region_id;
for (long r = rect.top(); r <= rect.bottom(); ++r) for (long r = rect.top(); r <= rect.bottom(); ++r)
......
...@@ -289,10 +289,22 @@ namespace dlib ...@@ -289,10 +289,22 @@ namespace dlib
been reached). been reached).
!*/ !*/
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
ensures
- Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles
can be represented. Therefore, this function allows you to supply a rectangle and obtain the
nearest possible sliding window rectangle.
!*/
void get_feature_vector ( void get_feature_vector (
const std::vector<rectangle>& rects, const std::vector<rectangle>& rects,
feature_vector_type& psi, feature_vector_type& psi
std::vector<rectangle>& mapped_rects
) const; ) const;
/*! /*!
requires requires
...@@ -305,7 +317,6 @@ namespace dlib ...@@ -305,7 +317,6 @@ namespace dlib
- if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then - if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then
- #psi == the sum of feature vectors corresponding to the sliding window locations contained - #psi == the sum of feature vectors corresponding to the sliding window locations contained
in rects. in rects.
- #mapped_rects == rects
- Let w denote the w vector given to detect(), then we have: - Let w denote the w vector given to detect(), then we have:
- dot(w,#psi) == sum of scores of the dets produced by detect() - dot(w,#psi) == sum of scores of the dets produced by detect()
- else - else
...@@ -313,8 +324,8 @@ namespace dlib ...@@ -313,8 +324,8 @@ namespace dlib
be output by detect(). So in the case where rects contains rectangles which could not arise be output by detect(). So in the case where rects contains rectangles which could not arise
from a call to detect(), this function will map the rectangles in rects to the nearest possible from a call to detect(), this function will map the rectangles in rects to the nearest possible
object boxes and then store the sum of feature vectors for the mapped rectangles into #psi. object boxes and then store the sum of feature vectors for the mapped rectangles into #psi.
- for all valid i: #mapped_rects[i] == the rectangle rects[i] gets mapped to for feature extraction. - for all valid i: get_best_matching_rect(rects[i]) == the rectangle rects[i] gets mapped to for
- #mapped_rects.size() == rects.size() feature extraction.
!*/ !*/
}; };
......
...@@ -156,7 +156,11 @@ namespace dlib ...@@ -156,7 +156,11 @@ namespace dlib
scanner.load(images[idx]); scanner.load(images[idx]);
psi.set_size(get_num_dimensions()); psi.set_size(get_num_dimensions());
std::vector<rectangle> mapped_rects; std::vector<rectangle> mapped_rects;
scanner.get_feature_vector(truth_rects[idx], psi, mapped_rects); scanner.get_feature_vector(truth_rects[idx], psi);
for (unsigned long i = 0; i < truth_rects[idx].size(); ++i)
{
mapped_rects.push_back(scanner.get_best_matching_rect(truth_rects[idx][i]));
}
psi(scanner.get_num_dimensions()) = -1.0*truth_rects[idx].size(); psi(scanner.get_num_dimensions()) = -1.0*truth_rects[idx].size();
// check if any of the boxes overlap. If they do then it is impossible for // check if any of the boxes overlap. If they do then it is impossible for
...@@ -328,8 +332,7 @@ namespace dlib ...@@ -328,8 +332,7 @@ namespace dlib
psi.set_size(get_num_dimensions()); psi.set_size(get_num_dimensions());
psi = 0; psi = 0;
std::vector<rectangle> mapped_rects; scanner.get_feature_vector(final_dets, psi);
scanner.get_feature_vector(final_dets, psi, mapped_rects);
psi(scanner.get_num_dimensions()) = -1.0*final_dets.size(); psi(scanner.get_num_dimensions()) = -1.0*final_dets.size();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment