Commit 3ebf0f2e authored by Davis King's avatar Davis King

Refined the scan_image_pyramid interface a little. In particular, I split the

get_feature_vector() method into two separate functions so the interface
is a little simpler and more flexible.
parent 578322dc
......@@ -86,8 +86,11 @@ namespace dlib
void get_feature_vector (
const std::vector<rectangle>& rects,
feature_vector_type& psi,
std::vector<rectangle>& mapped_rects
feature_vector_type& psi
) const;
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
template <typename T, typename U>
......@@ -128,6 +131,13 @@ namespace dlib
deserialize(item.rects, in);
}
void get_mapped_rect_and_metadata (
rectangle rect,
rectangle& mapped_rect,
detection_template& best_template,
unsigned long& best_level
) const;
feature_extractor_type feats_config; // just here to hold configuration. use it to populate the feats elements.
typename array<feature_extractor_type>::kernel_2a feats;
......@@ -533,40 +543,48 @@ namespace dlib
typename Pyramid_type,
typename Feature_extractor_type
>
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_feature_vector (
const std::vector<rectangle>& rects,
feature_vector_type& psi,
std::vector<rectangle>& mapped_rects
const rectangle scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_best_matching_rect (
const rectangle& rect
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(get_num_detection_templates() > 0 &&
is_loaded_with_image() &&
psi.size() >= get_num_dimensions(),
"\t void scan_image_pyramid::get_feature_vector()"
is_loaded_with_image(),
"\t const rectangle scan_image_pyramid::get_best_matching_rect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t psi.size(): " << psi.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
psi = 0;
rectangle mapped_rect;
detection_template best_template;
unsigned long best_level;
get_mapped_rect_and_metadata(rect, mapped_rect, best_template, best_level);
return mapped_rect;
}
mapped_rects.clear();
// ----------------------------------------------------------------------------------------
pyramid_type pyr;
for (unsigned long i = 0; i < rects.size(); ++i)
template <
typename Pyramid_type,
typename Feature_extractor_type
>
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_mapped_rect_and_metadata (
rectangle rect,
rectangle& mapped_rect,
detection_template& best_template,
unsigned long& best_level
) const
{
// Figure out the pyramid level which best matches rects[i] against one of our
pyramid_type pyr;
// Figure out the pyramid level which best matches rect against one of our
// detection template object boxes.
unsigned long best_level = 0;
best_level = 0;
double match_score = std::numeric_limits<double>::infinity();
detection_template best_template;
rectangle rect = rects[i];
const dlib::vector<double,2> p(rect.width(), rect.height());
// for all the levels
......@@ -600,9 +618,9 @@ namespace dlib
// Now get the features out of feats[best_level]. But first translate best_template
// into the right spot (it should be centered at the location determined by rects[i])
// into the right spot (it should be centered at the location determined by rect)
// and convert it into the feature image coordinate system.
rect = pyr.rect_down(rects[i],best_level);
rect = pyr.rect_down(rect,best_level);
const point offset = -feats[best_level].image_to_feat_space(point(0,0));
const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
for (unsigned long k = 0; k < best_template.rects.size(); ++k)
......@@ -616,13 +634,49 @@ namespace dlib
// The input rectangle was mapped to one of the detection templates. Reverse the process
// to figure out what the mapped rectangle is in the original input space.
rectangle mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
mapped_rect = pyr.rect_up(mapped_rect, best_level);
mapped_rects.push_back(mapped_rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_feature_vector (
const std::vector<rectangle>& rects,
feature_vector_type& psi
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(get_num_detection_templates() > 0 &&
is_loaded_with_image() &&
psi.size() >= get_num_dimensions(),
"\t void scan_image_pyramid::get_feature_vector()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t psi.size(): " << psi.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
psi = 0;
pyramid_type pyr;
for (unsigned long i = 0; i < rects.size(); ++i)
{
rectangle mapped_rect;
detection_template best_template;
unsigned long best_level;
get_mapped_rect_and_metadata (rects[i], mapped_rect, best_template, best_level);
for (unsigned long j = 0; j < best_template.rects.size(); ++j)
{
rect = best_template.rects[j];
const rectangle rect = best_template.rects[j];
const unsigned long template_region_id = j;
const unsigned long offset = feats_config.get_num_dimensions()*template_region_id;
for (long r = rect.top(); r <= rect.bottom(); ++r)
......
......@@ -289,10 +289,22 @@ namespace dlib
been reached).
!*/
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
ensures
- Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles
can be represented. Therefore, this function allows you to supply a rectangle and obtain the
nearest possible sliding window rectangle.
!*/
void get_feature_vector (
const std::vector<rectangle>& rects,
feature_vector_type& psi,
std::vector<rectangle>& mapped_rects
feature_vector_type& psi
) const;
/*!
requires
......@@ -305,7 +317,6 @@ namespace dlib
- if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then
- #psi == the sum of feature vectors corresponding to the sliding window locations contained
in rects.
- #mapped_rects == rects
- Let w denote the w vector given to detect(), then we have:
- dot(w,#psi) == sum of scores of the dets produced by detect()
- else
......@@ -313,8 +324,8 @@ namespace dlib
be output by detect(). So in the case where rects contains rectangles which could not arise
from a call to detect(), this function will map the rectangles in rects to the nearest possible
object boxes and then store the sum of feature vectors for the mapped rectangles into #psi.
- for all valid i: #mapped_rects[i] == the rectangle rects[i] gets mapped to for feature extraction.
- #mapped_rects.size() == rects.size()
- for all valid i: get_best_matching_rect(rects[i]) == the rectangle rects[i] gets mapped to for
feature extraction.
!*/
};
......
......@@ -156,7 +156,11 @@ namespace dlib
scanner.load(images[idx]);
psi.set_size(get_num_dimensions());
std::vector<rectangle> mapped_rects;
scanner.get_feature_vector(truth_rects[idx], psi, mapped_rects);
scanner.get_feature_vector(truth_rects[idx], psi);
for (unsigned long i = 0; i < truth_rects[idx].size(); ++i)
{
mapped_rects.push_back(scanner.get_best_matching_rect(truth_rects[idx][i]));
}
psi(scanner.get_num_dimensions()) = -1.0*truth_rects[idx].size();
// check if any of the boxes overlap. If they do then it is impossible for
......@@ -328,8 +332,7 @@ namespace dlib
psi.set_size(get_num_dimensions());
psi = 0;
std::vector<rectangle> mapped_rects;
scanner.get_feature_vector(final_dets, psi, mapped_rects);
scanner.get_feature_vector(final_dets, psi);
psi(scanner.get_num_dimensions()) = -1.0*final_dets.size();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment