Commit a6b44e0e authored by Davis King's avatar Davis King

Cleaned up the full_object_detection's interface and improved some comments

here and there.
parent 3bcab68a
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_FULL_OBJECT_DeTECTION_H__
#define DLIB_FULL_OBJECT_DeTECTION_H__
......@@ -12,17 +12,18 @@ namespace dlib
// ----------------------------------------------------------------------------------------
const static point MOVABLE_PART_NOT_PRESENT(0x7FFFFFFF,
const static point OBJECT_PART_NOT_PRESENT(0x7FFFFFFF,
0x7FFFFFFF);
// ----------------------------------------------------------------------------------------
struct full_object_detection
class full_object_detection
{
public:
full_object_detection(
const rectangle& rect_,
const std::vector<point>& movable_parts_
) : rect(rect_), movable_parts(movable_parts_) {}
const std::vector<point>& parts_
) : rect(rect_), parts(parts_) {}
full_object_detection(){}
......@@ -30,8 +31,27 @@ namespace dlib
const rectangle& rect_
) : rect(rect_) {}
const rectangle& get_rect() const { return rect; }
unsigned long num_parts() const { return parts.size(); }
const point& part(
unsigned long idx
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(idx < num_parts(),
"\t point full_object_detection::part()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t idx: " << idx
<< "\n\t num_parts(): " << num_parts()
<< "\n\t this: " << this
);
return parts[idx];
}
private:
rectangle rect;
std::vector<point> movable_parts; // it should always be the case that rect.contains(movable_parts[i]) == true
std::vector<point> parts;
};
// ----------------------------------------------------------------------------------------
......@@ -40,10 +60,10 @@ namespace dlib
const full_object_detection& obj
)
{
for (unsigned long i = 0; i < obj.movable_parts.size(); ++i)
for (unsigned long i = 0; i < obj.num_parts(); ++i)
{
if (obj.rect.contains(obj.movable_parts[i]) == false &&
obj.movable_parts[i] != MOVABLE_PART_NOT_PRESENT)
if (obj.get_rect().contains(obj.part(i)) == false &&
obj.part(i) != OBJECT_PART_NOT_PRESENT)
return false;
}
return true;
......
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_FULL_OBJECT_DeTECTION_ABSTRACT_H__
#ifdef DLIB_FULL_OBJECT_DeTECTION_ABSTRACT_H__
......@@ -11,26 +11,92 @@ namespace dlib
// ----------------------------------------------------------------------------------------
const static point MOVABLE_PART_NOT_PRESENT(0x7FFFFFFF,
const static point OBJECT_PART_NOT_PRESENT(0x7FFFFFFF,
0x7FFFFFFF);
// ----------------------------------------------------------------------------------------
struct full_object_detection
class full_object_detection
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents the location of an object in an image along with the
positions of each of its constituent parts.
!*/
public:
full_object_detection(
const rectangle& rect_,
const std::vector<point>& movable_parts_
) : rect(rect_), movable_parts(movable_parts_) {}
const rectangle& rect,
const std::vector<point>& parts
);
/*!
ensures
- #get_rect() == rect
- #num_parts() == parts.size()
- for all valid i:
- part(i) == parts[i]
!*/
full_object_detection(
);
/*!
ensures
- #get_rect().is_empty() == true
- #num_parts() == 0
!*/
explicit full_object_detection(
const rectangle& rect_
) : rect(rect_) {}
const rectangle& rect
);
/*!
ensures
- #get_rect() == rect
- #num_parts() == 0
!*/
const rectangle& get_rect(
) const;
/*!
ensures
- returns the rectangle that indicates where this object is. In general,
this should be the bounding box for the object.
!*/
rectangle rect;
std::vector<point> movable_parts; // it should always be the case that rect.contains(movable_parts[i]) == true
unsigned long num_parts(
) const;
/*!
ensures
- returns the number of parts in this object.
!*/
const point& part(
unsigned long idx
) const;
/*!
requires
- idx < num_parts()
ensures
- returns the location of the center of the idx-th part of this object.
Note that it is valid for a part to be "not present". This is indicated
when the return value of part() is equal to OBJECT_PART_NOT_PRESENT.
This is useful for modeling object parts that are not always observed.
!*/
};
// ----------------------------------------------------------------------------------------
bool all_parts_in_rect (
const full_object_detection& obj
);
/*!
ensures
- returns true if all the parts in obj are contained within obj.get_rect().
That is, returns true if and only if, for all valid i, the following is
always true:
obj.get_rect().contains(obj.parts()[i]) == true || obj.parts()[i] == OBJECT_PART_NOT_PRESENT
!*/
// ----------------------------------------------------------------------------------------
}
......
......@@ -838,7 +838,6 @@ namespace dlib
const feature_vector_type& w
) const
{
full_object_detection obj(rect);
// fill in movable part positions.
rectangle mapped_rect;
......@@ -855,6 +854,8 @@ namespace dlib
// convert into feature space.
object_box = object_box.intersect(get_rect(feats[best_level]));
std::vector<point> movable_parts;
movable_parts.reserve(get_num_movable_components_per_detection_template());
for (unsigned long i = 0; i < get_num_movable_components_per_detection_template(); ++i)
{
// make the saliency_image for the ith movable part.
......@@ -912,7 +913,7 @@ namespace dlib
if (max_val <= 0)
{
max_loc = MOVABLE_PART_NOT_PRESENT;
max_loc = OBJECT_PART_NOT_PRESENT;
}
else
{
......@@ -923,13 +924,13 @@ namespace dlib
// now convert from feature space to image space.
max_loc = feats[best_level].feat_to_image_space(max_loc);
max_loc = pyr.point_up(max_loc, best_level);
max_loc = nearest_point(obj.rect, max_loc);
max_loc = nearest_point(rect, max_loc);
}
obj.movable_parts.push_back(max_loc);
movable_parts.push_back(max_loc);
}
return obj;
return full_object_detection(rect, movable_parts);
}
// ----------------------------------------------------------------------------------------
......@@ -948,7 +949,7 @@ namespace dlib
DLIB_ASSERT(get_num_detection_templates() > 0 &&
is_loaded_with_image() &&
psi.size() >= get_num_dimensions() &&
obj.movable_parts.size() == get_num_movable_components_per_detection_template(),
obj.num_parts() == get_num_movable_components_per_detection_template(),
"\t void scan_image_pyramid::get_feature_vector()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
......@@ -956,35 +957,34 @@ namespace dlib
<< "\n\t psi.size(): " << psi.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t get_num_movable_components_per_detection_template(): " << get_num_movable_components_per_detection_template()
<< "\n\t obj.movable_parts.size(): " << obj.movable_parts.size()
<< "\n\t obj.num_parts(): " << obj.num_parts()
<< "\n\t this: " << this
);
DLIB_ASSERT(all_parts_in_rect(obj),
"\t void scan_image_pyramid::get_feature_vector()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t obj.rect: " << obj.rect
<< "\n\t obj.get_rect(): " << obj.get_rect()
<< "\n\t this: " << this
);
const rectangle rect = obj.rect;
rectangle mapped_rect;
detection_template best_template;
unsigned long best_level;
rectangle object_box;
get_mapped_rect_and_metadata (feats.size(), rect, mapped_rect, best_template, object_box, best_level);
get_mapped_rect_and_metadata (feats.size(), obj.get_rect(), mapped_rect, best_template, object_box, best_level);
Pyramid_type pyr;
// put the movable rects at the places indicated by obj.
std::vector<rectangle> rects = best_template.rects;
for (unsigned long i = 0; i < obj.movable_parts.size(); ++i)
for (unsigned long i = 0; i < obj.num_parts(); ++i)
{
if (obj.movable_parts[i] != MOVABLE_PART_NOT_PRESENT)
if (obj.part(i) != OBJECT_PART_NOT_PRESENT)
{
// map from the original image to scaled feature space.
point loc = feats[best_level].image_to_feat_space(pyr.point_down(obj.movable_parts[i], best_level));
point loc = feats[best_level].image_to_feat_space(pyr.point_down(obj.part(i), best_level));
// Make sure the movable part always stays within the object_box.
// Otherwise it would be at a place that the detect() function can never
// look.
......
......@@ -398,7 +398,7 @@ namespace dlib
/*!
requires
- all_parts_in_rect(obj) == true
- obj.movable_parts.size() == get_num_movable_components_per_detection_template()
- obj.num_parts() == get_num_movable_components_per_detection_template()
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
- psi.size() >= get_num_dimensions()
......@@ -410,11 +410,11 @@ namespace dlib
detect() into the needed full_object_detection.
- Since scan_image_pyramid is a sliding window classifier system, not all
possible rectangles can be output by detect(). So in the case where
obj.rect could not arise from a call to detect(), this function will map
obj.rect to the nearest possible object box and then add the feature
vector for the mapped rectangle into #psi.
- get_best_matching_rect(obj.rect) == the rectangle obj.rect gets mapped to
for feature extraction.
obj.get_rect() could not arise from a call to detect(), this function
will map obj.get_rect() to the nearest possible object box and then add
the feature vector for the mapped rectangle into #psi.
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
gets mapped to for feature extraction.
!*/
full_object_detection get_full_object_detection (
......@@ -436,9 +436,10 @@ namespace dlib
Then the corresponding fully populated full_object_detection will be
returned.
- returns a full_object_detection, OBJ, such that:
- OBJ.rect == rect
- OBJ.movable_parts.size() == get_num_movable_components_per_detection_template()
- OBJ.movable_parts == the locations of the movable parts inside this detection.
- OBJ.get_rect() == rect
- OBJ.num_parts() == get_num_movable_components_per_detection_template()
- OBJ.part(i) == the location of the i-th movable part inside this detection,
or OBJECT_PART_NOT_PRESENT if the part was not found.
!*/
};
......
......@@ -48,7 +48,7 @@ namespace dlib
if (used[j])
continue;
const double overlap = truth_boxes[i].rect.intersect(boxes[j]).area() / (double)(truth_boxes[i].rect+boxes[j]).area();
const double overlap = truth_boxes[i].get_rect().intersect(boxes[j]).area() / (double)(truth_boxes[i].get_rect()+boxes[j]).area();
if (overlap > best_overlap)
{
best_overlap = overlap;
......@@ -76,16 +76,16 @@ namespace dlib
const matrix<double,1,2> test_object_detection_function (
object_detector_type& detector,
const image_array_type& images,
const std::vector<std::vector<full_object_detection> >& truth_rects,
const std::vector<std::vector<full_object_detection> >& truth_dets,
const double overlap_eps = 0.5
)
{
// make sure requires clause is not broken
DLIB_ASSERT( is_learning_problem(images,truth_rects) == true &&
DLIB_ASSERT( is_learning_problem(images,truth_dets) == true &&
0 < overlap_eps && overlap_eps <= 1,
"\t matrix test_object_detection_function()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t is_learning_problem(images,truth_rects): " << is_learning_problem(images,truth_rects)
<< "\n\t is_learning_problem(images,truth_dets): " << is_learning_problem(images,truth_dets)
<< "\n\t overlap_eps: "<< overlap_eps
);
......@@ -100,8 +100,8 @@ namespace dlib
const std::vector<rectangle>& hits = detector(images[i]);
total_hits += hits.size();
correct_hits += impl::number_of_truth_hits(truth_rects[i], hits, overlap_eps);
total_true_targets += truth_rects[i].size();
correct_hits += impl::number_of_truth_hits(truth_dets[i], hits, overlap_eps);
total_true_targets += truth_dets[i].size();
}
......@@ -129,17 +129,17 @@ namespace dlib
const matrix<double,1,2> test_object_detection_function (
object_detector_type& detector,
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_rects,
const std::vector<std::vector<rectangle> >& truth_dets,
const double overlap_eps = 0.5
)
{
// convert into a list of regular rectangles.
std::vector<std::vector<full_object_detection> > rects(truth_rects.size());
for (unsigned long i = 0; i < truth_rects.size(); ++i)
std::vector<std::vector<full_object_detection> > rects(truth_dets.size());
for (unsigned long i = 0; i < truth_dets.size(); ++i)
{
for (unsigned long j = 0; j < truth_rects[i].size(); ++j)
for (unsigned long j = 0; j < truth_dets[i].size(); ++j)
{
rects[i].push_back(full_object_detection(truth_rects[i][j]));
rects[i].push_back(full_object_detection(truth_dets[i][j]));
}
}
......@@ -188,18 +188,18 @@ namespace dlib
const matrix<double,1,2> cross_validate_object_detection_trainer (
const trainer_type& trainer,
const image_array_type& images,
const std::vector<std::vector<full_object_detection> >& truth_object_detections,
const std::vector<std::vector<full_object_detection> >& truth_dets,
const long folds,
const double overlap_eps = 0.5
)
{
// make sure requires clause is not broken
DLIB_ASSERT( is_learning_problem(images,truth_object_detections) == true &&
DLIB_ASSERT( is_learning_problem(images,truth_dets) == true &&
0 < overlap_eps && overlap_eps <= 1 &&
1 < folds && folds <= static_cast<long>(images.size()),
"\t matrix cross_validate_object_detection_trainer()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t is_learning_problem(images,truth_object_detections): " << is_learning_problem(images,truth_object_detections)
<< "\n\t is_learning_problem(images,truth_dets): " << is_learning_problem(images,truth_dets)
<< "\n\t overlap_eps: "<< overlap_eps
<< "\n\t folds: "<< folds
);
......@@ -223,7 +223,7 @@ namespace dlib
std::vector<std::vector<full_object_detection> > training_rects;
for (unsigned long i = 0; i < images.size()-test_size; ++i)
{
training_rects.push_back(truth_object_detections[train_idx]);
training_rects.push_back(truth_dets[train_idx]);
train_idx_set.push_back(train_idx);
train_idx = (train_idx+1)%images.size();
}
......@@ -236,8 +236,8 @@ namespace dlib
const std::vector<rectangle>& hits = detector(images[test_idx_set[i]]);
total_hits += hits.size();
correct_hits += impl::number_of_truth_hits(truth_object_detections[test_idx_set[i]], hits, overlap_eps);
total_true_targets += truth_object_detections[test_idx_set[i]].size();
correct_hits += impl::number_of_truth_hits(truth_dets[test_idx_set[i]], hits, overlap_eps);
total_true_targets += truth_dets[test_idx_set[i]].size();
}
}
......@@ -268,18 +268,18 @@ namespace dlib
const matrix<double,1,2> cross_validate_object_detection_trainer (
const trainer_type& trainer,
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_object_detections,
const std::vector<std::vector<rectangle> >& truth_dets,
const long folds,
const double overlap_eps = 0.5
)
{
// convert into a list of regular rectangles.
std::vector<std::vector<full_object_detection> > dets(truth_object_detections.size());
for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
std::vector<std::vector<full_object_detection> > dets(truth_dets.size());
for (unsigned long i = 0; i < truth_dets.size(); ++i)
{
for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
for (unsigned long j = 0; j < truth_dets[i].size(); ++j)
{
dets[i].push_back(full_object_detection(truth_object_detections[i][j]));
dets[i].push_back(full_object_detection(truth_dets[i][j]));
}
}
......
......@@ -20,12 +20,12 @@ namespace dlib
const matrix<double,1,2> test_object_detection_function (
object_detector_type& detector,
const image_array_type& images,
const std::vector<std::vector<full_object_detection> >& truth_rects,
const std::vector<std::vector<full_object_detection> >& truth_dets,
const double overlap_eps = 0.5
);
/*!
requires
- is_learning_problem(images,truth_rects)
- is_learning_problem(images,truth_dets)
- 0 < overlap_eps <= 1
- object_detector_type == some kind of object detector function object
(e.g. object_detector)
......@@ -34,7 +34,7 @@ namespace dlib
ensures
- Tests the given detector against the supplied object detection problem
and returns the precision and recall. Note that the task is to predict,
for each images[i], the set of object locations given by truth_rects[i].
for each images[i], the set of object locations given by truth_dets[i].
- In particular, returns a matrix M such that:
- M(0) == the precision of the detector object. This is a number
in the range [0,1] which measures the fraction of detector outputs
......@@ -44,7 +44,7 @@ namespace dlib
- M(1) == the recall of the detector object. This is a number in the
range [0,1] which measure the fraction of targets found by the
detector. A value of 1 means the detector found all the targets
in truth_rects while a value of 0 means the detector didn't locate
in truth_dets while a value of 0 means the detector didn't locate
any of the targets.
- The rule for deciding if a detector output, D, matches a truth rectangle,
T, is the following:
......@@ -58,14 +58,14 @@ namespace dlib
const matrix<double,1,2> test_object_detection_function (
object_detector_type& detector,
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_rects,
const std::vector<std::vector<rectangle> >& truth_dets,
const double overlap_eps = 0.5
);
/*!
requires
- all the requirements of the above test_object_detection_function() routine.
ensures
- converts all the rectangles in truth_rects into full_object_detection objects
- converts all the rectangles in truth_dets into full_object_detection objects
via full_object_detection's rectangle constructor. Then invokes
test_object_detection_function() on the full_object_detections and returns
the results.
......@@ -80,19 +80,19 @@ namespace dlib
const matrix<double,1,2> cross_validate_object_detection_trainer (
const trainer_type& trainer,
const image_array_type& images,
const std::vector<std::vector<full_object_detection> >& truth_rects,
const std::vector<std::vector<full_object_detection> >& truth_dets,
const long folds,
const double overlap_eps = 0.5
);
/*!
requires
- is_learning_problem(images,truth_rects)
- is_learning_problem(images,truth_dets)
- 0 < overlap_eps <= 1
- 1 < folds <= images.size()
- trainer_type == some kind of object detection trainer (e.g structural_object_detection_trainer)
- image_array_type must be an implementation of dlib/array/array_kernel_abstract.h
and it must contain objects which can be accepted by detector().
- it is legal to call trainer.train(images, truth_rects)
- it is legal to call trainer.train(images, truth_dets)
ensures
- Performs k-fold cross-validation by using the given trainer to solve an
object detection problem for the given number of folds. Each fold is tested
......@@ -109,7 +109,7 @@ namespace dlib
const matrix<double,1,2> cross_validate_object_detection_trainer (
const trainer_type& trainer,
const image_array_type& images,
const std::vector<std::vector<rectangle> >& truth_rects,
const std::vector<std::vector<rectangle> >& truth_dets,
const long folds,
const double overlap_eps = 0.5
);
......@@ -117,7 +117,7 @@ namespace dlib
requires
- all the requirements of the above cross_validate_object_detection_trainer() routine.
ensures
- converts all the rectangles in truth_rects into full_object_detection objects
- converts all the rectangles in truth_dets into full_object_detection objects
via full_object_detection's rectangle constructor. Then invokes
cross_validate_object_detection_trainer() on the full_object_detections and
returns the results.
......
......@@ -262,12 +262,12 @@ namespace dlib
{
for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
{
DLIB_ASSERT(truth_object_detections[i][j].movable_parts.size() == get_scanner().get_num_movable_components_per_detection_template() &&
DLIB_ASSERT(truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() &&
all_parts_in_rect(truth_object_detections[i][j]) == true,
"\t trained_function_type structural_object_detection_trainer::train()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t truth_object_detections["<<i<<"]["<<j<<"].movable_parts.size(): " <<
truth_object_detections[i][j].movable_parts.size()
<< "\n\t truth_object_detections["<<i<<"]["<<j<<"].num_parts(): " <<
truth_object_detections[i][j].num_parts()
<< "\n\t get_scanner().get_num_movable_components_per_detection_template(): " <<
get_scanner().get_num_movable_components_per_detection_template()
<< "\n\t all_parts_in_rect(truth_object_detections["<<i<<"]["<<j<<"]): " << all_parts_in_rect(truth_object_detections[i][j])
......@@ -286,7 +286,7 @@ namespace dlib
mapped_rects[i].resize(truth_object_detections[i].size());
for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
{
mapped_rects[i][j] = scanner.get_best_matching_rect(truth_object_detections[i][j].rect);
mapped_rects[i][j] = scanner.get_best_matching_rect(truth_object_detections[i][j].get_rect());
}
}
......
......@@ -295,7 +295,7 @@ namespace dlib
- it must be valid to pass images[0] into the image_scanner_type::load() method.
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
- for all valid i, j:
- truth_object_detections[i][j].movable_parts.size() == get_scanner().get_num_movable_components_per_detection_template()
- truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template()
- all_parts_in_rect(truth_object_detections[i][j]) == true
ensures
- Uses the structural_svm_object_detection_problem to train an object_detector
......
......@@ -63,11 +63,11 @@ namespace dlib
{
for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
{
DLIB_ASSERT(truth_object_detections[i][j].movable_parts.size() == scanner.get_num_movable_components_per_detection_template(),
DLIB_ASSERT(truth_object_detections[i][j].num_parts() == scanner.get_num_movable_components_per_detection_template(),
"\t trained_function_type structural_object_detection_trainer::train()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t truth_object_detections["<<i<<"]["<<j<<"].movable_parts.size(): " <<
truth_object_detections[i][j].movable_parts.size()
<< "\n\t truth_object_detections["<<i<<"]["<<j<<"].num_parts(): " <<
truth_object_detections[i][j].num_parts()
<< "\n\t scanner.get_num_movable_components_per_detection_template(): " <<
scanner.get_num_movable_components_per_detection_template()
<< "\n\t all_parts_in_rect(truth_object_detections["<<i<<"]["<<j<<"]): " << all_parts_in_rect(truth_object_detections[i][j])
......@@ -180,7 +180,7 @@ namespace dlib
psi = 0;
for (unsigned long i = 0; i < truth_object_detections[idx].size(); ++i)
{
mapped_rects.push_back(scanner.get_best_matching_rect(truth_object_detections[idx][i].rect));
mapped_rects.push_back(scanner.get_best_matching_rect(truth_object_detections[idx][i].get_rect()));
scanner.get_feature_vector(truth_object_detections[idx][i], psi);
}
psi(scanner.get_num_dimensions()) = -1.0*truth_object_detections[idx].size();
......@@ -225,8 +225,8 @@ namespace dlib
// truth rectangles.
for (unsigned long i = 0; i < mapped_rects.size(); ++i)
{
const double area = (truth_object_detections[idx][i].rect.intersect(mapped_rects[i])).area();
const double total_area = (truth_object_detections[idx][i].rect + mapped_rects[i]).area();
const double area = (truth_object_detections[idx][i].get_rect().intersect(mapped_rects[i])).area();
const double total_area = (truth_object_detections[idx][i].get_rect() + mapped_rects[i]).area();
if (area/total_area <= match_eps)
{
using namespace std;
......@@ -249,9 +249,9 @@ namespace dlib
sout << "image index "<< idx << endl;
sout << "match_eps: "<< match_eps << endl;
sout << "best possible match: "<< area/total_area << endl;
sout << "truth rect: "<< truth_object_detections[idx][i].rect << endl;
sout << "truth rect width/height: "<< truth_object_detections[idx][i].rect.width()/(double)truth_object_detections[idx][i].rect.height() << endl;
sout << "truth rect area: "<< truth_object_detections[idx][i].rect.area() << endl;
sout << "truth rect: "<< truth_object_detections[idx][i].get_rect() << endl;
sout << "truth rect width/height: "<< truth_object_detections[idx][i].get_rect().width()/(double)truth_object_detections[idx][i].get_rect().height() << endl;
sout << "truth rect area: "<< truth_object_detections[idx][i].get_rect().area() << endl;
sout << "nearest detection template rect: "<< mapped_rects[i] << endl;
sout << "nearest detection template rect width/height: "<< mapped_rects[i].width()/(double)mapped_rects[i].height() << endl;
sout << "nearest detection template rect area: "<< mapped_rects[i].area() << endl;
......@@ -422,10 +422,10 @@ namespace dlib
for (unsigned long i = 0; i < boxes.size(); ++i)
{
const unsigned long area = rect.intersect(boxes[i].rect).area();
const unsigned long area = rect.intersect(boxes[i].get_rect()).area();
if (area != 0)
{
const double new_match = area / static_cast<double>((rect + boxes[i].rect).area());
const double new_match = area / static_cast<double>((rect + boxes[i].get_rect()).area());
if (new_match > match)
{
match = new_match;
......
......@@ -91,17 +91,20 @@ namespace dlib
- scanner.get_num_detection_templates() > 0
- scanner.load(images[0]) must be a valid expression.
- for all valid i, j:
- truth_object_detections[i][j].movable_rects.size() == scanner.get_num_movable_components_per_detection_template()
- truth_object_detections[i][j].num_parts() == scanner.get_num_movable_components_per_detection_template()
- all_parts_in_rect(truth_object_detections[i][j]) == true
ensures
- This object attempts to learn a mapping from the given images to the
object locations given in truth_object_detections. In particular, it attempts to
learn to predict truth_object_detections[i] based on images[i].
Or in other words, this object can be used to learn a parameter vector, w, such that
an object_detector declared as:
object locations given in truth_object_detections. In particular, it
attempts to learn to predict truth_object_detections[i] based on
images[i]. Or in other words, this object can be used to learn a
parameter vector, w, such that an object_detector declared as:
object_detector<image_scanner_type,overlap_tester_type> detector(scanner,overlap_tester,w)
results in a detector object which attempts to compute the following mapping:
truth_object_detections[i].rect == detector(images[i])
results in a detector object which attempts to compute the locations of
all the objects in truth_object_detections. So if you called
detector(images[i]) you would hopefully get a list of rectangles back
that had truth_object_detections[i].size() elements and contained exactly
the rectangles indicated by truth_object_detections[i].
- #get_match_eps() == 0.5
- This object will use num_threads threads during the optimization
procedure. You should set this parameter equal to the number of
......
......@@ -275,48 +275,61 @@ namespace
// Now make some squares and draw them onto our black images. All the
// squares will be 70 pixels wide and tall.
const int shrink = 0;
std::vector<full_object_detection> temp;
temp.push_back(full_object_detection(centered_rect(point(100,100), 70,71)));
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tr_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).bl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).br_corner());
fill_rect(images[0],temp.back().rect,255); // Paint the square white
temp.push_back(full_object_detection(centered_rect(point(200,300), 70,71)));
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tr_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).bl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).br_corner());
fill_rect(images[0],temp.back().rect,255); // Paint the square white
object_locations.push_back(temp);
rectangle rect = centered_rect(point(100,100), 70,71);
std::vector<point> movable_parts;
movable_parts.push_back(shrink_rect(rect,shrink).tl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).tr_corner());
movable_parts.push_back(shrink_rect(rect,shrink).bl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).br_corner());
temp.push_back(full_object_detection(rect, movable_parts));
fill_rect(images[0],rect,255); // Paint the square white
rect = centered_rect(point(200,200), 70,71);
movable_parts.clear();
movable_parts.push_back(shrink_rect(rect,shrink).tl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).tr_corner());
movable_parts.push_back(shrink_rect(rect,shrink).bl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).br_corner());
temp.push_back(full_object_detection(rect, movable_parts));
fill_rect(images[0],rect,255); // Paint the square white
object_locations.push_back(temp);
// ------------------------------------
temp.clear();
temp.push_back(full_object_detection(centered_rect(point(140,200), 70,71)));
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tr_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).bl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).br_corner());
fill_rect(images[1],temp.back().rect,255); // Paint the square white
temp.push_back(full_object_detection(centered_rect(point(303,200), 70,71)));
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tr_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).bl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).br_corner());
fill_rect(images[1],temp.back().rect,255); // Paint the square white
object_locations.push_back(temp);
rect = centered_rect(point(140,200), 70,71);
movable_parts.clear();
movable_parts.push_back(shrink_rect(rect,shrink).tl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).tr_corner());
movable_parts.push_back(shrink_rect(rect,shrink).bl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).br_corner());
temp.push_back(full_object_detection(rect, movable_parts));
fill_rect(images[1],rect,255); // Paint the square white
rect = centered_rect(point(303,200), 70,71);
movable_parts.clear();
movable_parts.push_back(shrink_rect(rect,shrink).tl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).tr_corner());
movable_parts.push_back(shrink_rect(rect,shrink).bl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).br_corner());
temp.push_back(full_object_detection(rect, movable_parts));
fill_rect(images[1],rect,255); // Paint the square white
object_locations.push_back(temp);
// ------------------------------------
temp.clear();
temp.push_back(full_object_detection(centered_rect(point(123,121), 70,71)));
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).tr_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).bl_corner());
temp.back().movable_parts.push_back(shrink_rect(temp.back().rect,shrink).br_corner());
fill_rect(images[2],temp.back().rect,255); // Paint the square white
rect = centered_rect(point(123,121), 70,71);
movable_parts.clear();
movable_parts.push_back(shrink_rect(rect,shrink).tl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).tr_corner());
movable_parts.push_back(shrink_rect(rect,shrink).bl_corner());
movable_parts.push_back(shrink_rect(rect,shrink).br_corner());
temp.push_back(full_object_detection(rect, movable_parts));
fill_rect(images[2],rect,255); // Paint the square white
object_locations.push_back(temp);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment