Commit 838caffb authored by Davis King's avatar Davis King

Refactored the interfaces and objects related to object detection so that

they can support movable object part models.  Now all that needs to be
done is to implement the TODO inside the scan_image_pyramid object and
the movable part model support should be up and working.
parent 6f57d405
......@@ -157,8 +157,8 @@ namespace dlib
minus the threshold, therefore this is a value > 0.
- #dets[i].second == the bounding box for the i-th detection.
- #get_scanner() will have been loaded with img. Therefore, you can call
#get_scanner().get_feature_vector() to obtain the feature vectors for
the resulting object detection boxes.
#get_scanner().get_feature_vector() to obtain the feature vectors or
full_object_detections for the resulting object detection boxes.
- The detection threshold is adjusted by having adjust_threshold added
to it. Therefore, an adjust_threshold value > 0 makes detecting
objects harder while a negative one makes it easier.
......
......@@ -9,6 +9,7 @@
#include "../image_processing.h"
#include "../array2d.h"
#include <vector>
#include "full_object_detection.h"
namespace dlib
{
......@@ -52,12 +53,24 @@ namespace dlib
void add_detection_template (
const rectangle& object_box,
const std::vector<rectangle>& feature_extraction_regions
const std::vector<rectangle>& stationary_feature_extraction_regions,
const std::vector<rectangle>& movable_feature_extraction_regions
);
void add_detection_template (
const rectangle& object_box,
const std::vector<rectangle>& stationary_feature_extraction_regions
);
inline unsigned long get_num_detection_templates (
) const;
inline unsigned long get_num_movable_components_per_detection_template (
) const;
inline unsigned long get_num_stationary_components_per_detection_template (
) const;
inline unsigned long get_num_components_per_detection_template (
) const;
......@@ -96,7 +109,13 @@ namespace dlib
) const;
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
full_object_detection get_feature_vector (
const rectangle& rect,
const feature_vector_type& w,
feature_vector_type& psi
) const;
......@@ -129,6 +148,7 @@ namespace dlib
{
rectangle object_box; // always centered at (0,0)
std::vector<rectangle> rects; // template with respect to (0,0)
std::vector<rectangle> movable_rects;
};
friend void serialize(const detection_template& item, std::ostream& out)
......@@ -394,27 +414,61 @@ namespace dlib
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
add_detection_template (
const rectangle& object_box,
const std::vector<rectangle>& feature_extraction_regions
const std::vector<rectangle>& stationary_feature_extraction_regions,
const std::vector<rectangle>& movable_feature_extraction_regions
)
{
#ifdef ENABLE_ASSERTS
// make sure requires clause is not broken
DLIB_ASSERT((get_num_detection_templates() == 0 ||
get_num_components_per_detection_template() == feature_extraction_regions.size()) &&
(get_num_stationary_components_per_detection_template() == stationary_feature_extraction_regions.size() &&
get_num_movable_components_per_detection_template() == movable_feature_extraction_regions.size())) &&
center(object_box) == point(0,0),
"\t void scan_image_pyramid::add_detection_template()"
<< "\n\t The number of rects in this new detection template doesn't match "
<< "\n\t the number in previous detection templates."
<< "\n\t get_num_components_per_detection_template(): " << get_num_components_per_detection_template()
<< "\n\t feature_extraction_regions.size(): " << feature_extraction_regions.size()
<< "\n\t get_num_stationary_components_per_detection_template(): " << get_num_stationary_components_per_detection_template()
<< "\n\t stationary_feature_extraction_regions.size(): " << stationary_feature_extraction_regions.size()
<< "\n\t get_num_movable_components_per_detection_template(): " << get_num_movable_components_per_detection_template()
<< "\n\t movable_feature_extraction_regions.size(): " << movable_feature_extraction_regions.size()
<< "\n\t this: " << this
);
for (unsigned long i = 0; i < movable_feature_extraction_regions.size(); ++i)
{
DLIB_ASSERT(center(movable_feature_extraction_regions[i]) == point(0,0),
"Invalid inputs were given to this function."
<< "\n\t center(movable_feature_extraction_regions["<<i<<"]): " << center(movable_feature_extraction_regions[i])
<< "\n\t this: " << this
);
}
#endif
detection_template temp;
temp.object_box = object_box;
temp.rects = feature_extraction_regions;
temp.rects = stationary_feature_extraction_regions;
temp.movable_rects = movable_feature_extraction_regions;
det_templates.push_back(temp);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
add_detection_template (
const rectangle& object_box,
const std::vector<rectangle>& stationary_feature_extraction_regions
)
{
// an empty set of movable feature regions
const std::vector<rectangle> movable_feature_extraction_regions;
add_detection_template(object_box, stationary_feature_extraction_regions,
movable_feature_extraction_regions);
}
// ----------------------------------------------------------------------------------------
template <
......@@ -428,6 +482,48 @@ namespace dlib
return det_templates.size();
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_num_stationary_components_per_detection_template (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(get_num_detection_templates() > 0 ,
"\t unsigned long scan_image_pyramid::get_num_stationary_components_per_detection_template()"
<< "\n\t You need to give some detection templates before calling this function. "
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
<< "\n\t this: " << this
);
return det_templates[0].rects.size();
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_num_movable_components_per_detection_template (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(get_num_detection_templates() > 0 ,
"\t unsigned long scan_image_pyramid::get_num_movable_components_per_detection_template()"
<< "\n\t You need to give some detection templates before calling this function. "
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
<< "\n\t this: " << this
);
return det_templates[0].movable_rects.size();
}
// ----------------------------------------------------------------------------------------
template <
......@@ -446,7 +542,8 @@ namespace dlib
<< "\n\t this: " << this
);
return det_templates[0].rects.size();
return get_num_movable_components_per_detection_template() +
get_num_stationary_components_per_detection_template();
}
// ----------------------------------------------------------------------------------------
......@@ -697,25 +794,48 @@ namespace dlib
typename Pyramid_type,
typename Feature_extractor_type
>
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
full_object_detection scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_feature_vector (
const rectangle& rect,
const feature_vector_type&,// w,
feature_vector_type& psi
) const
{
// TODO
get_feature_vector(full_object_detection(rect), psi);
return full_object_detection(rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(get_num_detection_templates() > 0 &&
is_loaded_with_image() &&
psi.size() >= get_num_dimensions(),
psi.size() >= get_num_dimensions() &&
obj.movable_parts.size() == get_num_movable_components_per_detection_template(),
"\t void scan_image_pyramid::get_feature_vector()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t psi.size(): " << psi.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t get_num_movable_components_per_detection_template(): " << get_num_movable_components_per_detection_template()
<< "\n\t obj.movable_parts.size(): " << obj.movable_parts.size()
<< "\n\t this: " << this
);
const rectangle rect = obj.rect;
pyramid_type pyr;
rectangle mapped_rect;
detection_template best_template;
......
......@@ -9,6 +9,7 @@
#include "structural_svm_object_detection_problem.h"
#include "../image_processing/object_detector.h"
#include "../image_processing/box_overlap_testing.h"
#include "../image_processing/full_object_detection.h"
namespace dlib
......@@ -54,6 +55,12 @@ namespace dlib
auto_overlap_tester = is_same_type<overlap_tester_type,test_box_overlap>::value;
}
const image_scanner_type& get_scanner (
) const
{
return scanner;
}
bool auto_set_overlap_tester (
) const
{
......@@ -239,29 +246,45 @@ namespace dlib
>
const trained_function_type train (
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_rects
const std::vector<std::vector<full_object_detection> >& truth_object_detections
) const
{
#ifdef ENABLE_ASSERTS
// make sure requires clause is not broken
DLIB_ASSERT(is_learning_problem(images,truth_rects) == true,
"\t trained_function_type structural_object_detection_trainer::train(x,y)"
DLIB_ASSERT(is_learning_problem(images,truth_object_detections) == true,
"\t trained_function_type structural_object_detection_trainer::train()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t images.size(): " << images.size()
<< "\n\t truth_rects.size(): " << truth_rects.size()
<< "\n\t is_learning_problem(images,truth_rects): " << is_learning_problem(images,truth_rects)
<< "\n\t truth_object_detections.size(): " << truth_object_detections.size()
<< "\n\t is_learning_problem(images,truth_object_detections): " << is_learning_problem(images,truth_object_detections)
);
for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
{
for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
{
DLIB_ASSERT(truth_object_detections[i][j].movable_parts.size() == get_scanner().get_num_movable_components_per_detection_template(),
"\t trained_function_type structural_object_detection_trainer::train()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t truth_object_detections["<<i<<"]["<<j<<"].movable_parts.size(): " <<
truth_object_detections[i][j].movable_parts.size()
<< "\n\t get_scanner().get_num_movable_components_per_detection_template(): " <<
get_scanner().get_num_movable_components_per_detection_template()
);
}
}
#endif
overlap_tester_type local_overlap_tester;
if (auto_overlap_tester)
{
std::vector<std::vector<rectangle> > mapped_rects(truth_rects.size());
for (unsigned long i = 0; i < truth_rects.size(); ++i)
std::vector<std::vector<rectangle> > mapped_rects(truth_object_detections.size());
for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
{
mapped_rects[i].resize(truth_rects[i].size());
for (unsigned long j = 0; j < truth_rects[i].size(); ++j)
mapped_rects[i].resize(truth_object_detections[i].size());
for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
{
mapped_rects[i][j] = scanner.get_best_matching_rect(truth_rects[i][j]);
mapped_rects[i][j] = scanner.get_best_matching_rect(truth_object_detections[i][j].rect);
}
}
......@@ -273,7 +296,7 @@ namespace dlib
}
structural_svm_object_detection_problem<image_scanner_type,overlap_tester_type,image_array_type >
svm_prob(scanner, local_overlap_tester, images, truth_rects, num_threads);
svm_prob(scanner, local_overlap_tester, images, truth_object_detections, num_threads);
if (verbose)
svm_prob.be_verbose();
......@@ -293,6 +316,25 @@ namespace dlib
return object_detector<image_scanner_type,overlap_tester_type>(scanner, local_overlap_tester, w);
}
template <
typename image_array_type
>
const trained_function_type train (
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_object_detections
) const
{
std::vector<std::vector<full_object_detection> > truth_dets(truth_object_detections.size());
for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
{
for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
{
truth_dets[i].push_back(full_object_detection(truth_object_detections[i][j]));
}
}
return train(images, truth_dets);
}
private:
......
......@@ -6,6 +6,7 @@
#include "structural_svm_object_detection_problem_abstract.h"
#include "../image_processing/object_detector_abstract.h"
#include "../image_processing/box_overlap_testing_abstract.h"
#include "../image_processing/full_object_detection_abstract.h"
namespace dlib
......@@ -60,12 +61,22 @@ namespace dlib
- #get_loss_per_false_alarm() == 1
- This object will attempt to learn a model for the given
scanner object when train() is called.
- #get_scanner() == scanner
(note that only the "configuration" of scanner is copied.
I.e. the copy is done using copy_configuration())
- if (overlap_tester_type == test_box_overlap) then
- #auto_set_overlap_tester() == true
- else
- #auto_set_overlap_tester() == false
!*/
const image_scanner_type& get_scanner (
) const;
/*!
ensures
- returns the image scanner used by this object.
!*/
bool auto_set_overlap_tester (
) const;
/*!
......@@ -74,7 +85,7 @@ namespace dlib
state for the overlap tester used for non-max suppression.) then
- returns true
- In this case, it is determined using the find_tight_overlap_tester()
routine based on the truth_rects given to the
routine based on the truth_object_detections given to the
structural_object_detection_trainer::train() method.
- else
- returns false
......@@ -276,20 +287,43 @@ namespace dlib
>
const trained_function_type train (
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_rects
const std::vector<std::vector<full_object_detection> >& truth_object_detections
) const;
/*!
requires
- is_learning_problem(images, truth_rects) == true
- is_learning_problem(images, truth_object_detections) == true
- it must be valid to pass images[0] into the image_scanner_type::load() method.
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
- for all valid i, j:
- truth_object_detections[i][j].movable_parts.size() == get_scanner().get_num_movable_components_per_detection_template()
ensures
- Uses the structural_svm_object_detection_problem to train an object_detector
on the given images and truth_rects.
on the given images and truth_object_detections.
- returns a function F with the following properties:
- F(new_image) == A prediction of what objects are present in new_image. This
is a set of rectangles indicating their positions.
!*/
template <
typename image_array_type
>
const trained_function_type train (
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_object_detections
) const;
/*!
requires
- is_learning_problem(images, truth_object_detections) == true
- it must be valid to pass images[0] into the image_scanner_type::load() method.
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
- get_scanner().get_num_movable_components_per_detection_template() == 0
ensures
- This function is identical to the above train(), except that it converts
each element of truth_object_detections into a full_object_detection by
passing it to full_object_detection's constructor taking only a rectangle.
Therefore, this version of train() is a convenience function for for the
case where you don't have any movable components of the detection templates.
!*/
};
// ----------------------------------------------------------------------------------------
......
......@@ -6,6 +6,7 @@
#include "../matrix.h"
#include "structural_svm_problem_threaded_abstract.h"
#include <sstream>
#include "../image_processing/full_object_detection_abstract.h"
namespace dlib
{
......@@ -81,23 +82,25 @@ namespace dlib
const image_scanner_type& scanner,
const overlap_tester_type& overlap_tester,
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_rects,
const std::vector<std::vector<full_object_detection> >& truth_object_detections,
unsigned long num_threads = 2
);
/*!
requires
- is_learning_problem(images, truth_rects)
- is_learning_problem(images, truth_object_detections)
- scanner.get_num_detection_templates() > 0
- scanner.load(images[0]) must be a valid expression.
- for all valid i, j:
- truth_object_detections[i][j].movable_rects.size() == scanner.get_num_movable_components_per_detection_template()
ensures
- This object attempts to learn a mapping from the given images to the
object locations given in truth_rects. In particular, it attempts to
learn to predict truth_rects[i] based on images[i].
object locations given in truth_object_detections. In particular, it attempts to
learn to predict truth_object_detections[i] based on images[i].
Or in other words, this object can be used to learn a parameter vector, w, such that
an object_detector declared as:
object_detector<image_scanner_type,overlap_tester_type> detector(scanner,overlap_tester,w)
results in a detector object which attempts to compute the following mapping:
truth_rects[i] == detector(images[i])
truth_object_detections[i].rect == detector(images[i])
- #get_match_eps() == 0.5
- This object will use num_threads threads during the optimization
procedure. You should set this parameter equal to the number of
......
......@@ -57,6 +57,7 @@ namespace
detector(images[i], dets2);
matrix<double,0,1> psi(detector.get_w().size());
matrix<double,0,1> psi2(detector.get_w().size());
const double thresh = detector.get_w()(detector.get_w().size()-1);
DLIB_TEST(dets.size() == dets2.size());
......@@ -65,10 +66,19 @@ namespace
DLIB_TEST(dets[j] == dets2[j].second);
psi = 0;
detector.get_scanner().get_feature_vector(dets[j], psi);
const full_object_detection fdet = detector.get_scanner().get_feature_vector(dets[j], detector.get_w(), psi);
const double check_score = dot(psi,detector.get_w()) - thresh;
double check_score = dot(psi,detector.get_w()) - thresh;
DLIB_TEST(std::abs(check_score - dets2[j].first) < 1e-10);
// Make sure fdet works the way it is supposed to with get_feature_vector().
psi2 = 0;
detector.get_scanner().get_feature_vector(fdet, psi2);
check_score = dot(psi2,detector.get_w()) - thresh;
DLIB_TEST(std::abs(check_score - dets2[j].first) < 1e-10);
DLIB_TEST(max(abs(psi-psi2)) < 1e-10);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment