Commit 825ae091 authored by Davis King's avatar Davis King

merged

parents f1c734d6 c42fdead
......@@ -59,7 +59,9 @@ namespace dlib
for (long r = 0; r < v.nr(); ++r)
{
spec_samps.push_back(trans(rowm(v,r)));
spec_samps.back() /= length(spec_samps.back());
const double len = length(spec_samps.back());
if (len != 0)
spec_samps.back() /= len;
}
// Finally do the K-means clustering
pick_initial_centers(num_clusters, centers, spec_samps);
......
......@@ -341,7 +341,7 @@ namespace dlib
boxes_overlap(overlap_tester)
{
// make sure requires clause is not broken
DLIB_ASSERT(scanner_.get_num_detection_templates() > 0 && w_.size() > 0,
DLIB_CASSERT(scanner_.get_num_detection_templates() > 0 && w_.size() > 0,
"\t object_detector::object_detector(scanner_,overlap_tester,w_)"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
......@@ -349,10 +349,9 @@ namespace dlib
<< "\n\t this: " << this
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < w_.size(); ++i)
{
DLIB_ASSERT(w_[i].size() == scanner_.get_num_dimensions() + 1,
DLIB_CASSERT(w_[i].size() == scanner_.get_num_dimensions() + 1,
"\t object_detector::object_detector(scanner_,overlap_tester,w_)"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
......@@ -361,7 +360,6 @@ namespace dlib
<< "\n\t this: " << this
);
}
#endif
scanner.copy_configuration(scanner_);
w.resize(w_.size());
......@@ -382,7 +380,7 @@ namespace dlib
const std::vector<object_detector>& detectors
)
{
DLIB_ASSERT(detectors.size() != 0,
DLIB_CASSERT(detectors.size() != 0,
"\t object_detector::object_detector(detectors)"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t this: " << this
......
......@@ -10,6 +10,7 @@
#include "../geometry.h"
#include "../pixel.h"
#include "../console_progress_indicator.h"
#include <utility>
namespace dlib
{
......@@ -57,8 +58,11 @@ namespace dlib
std::vector<split_feature> splits;
std::vector<matrix<float,0,1> > leaf_values;
unsigned long num_leaves() const { return leaf_values.size(); }
inline const matrix<float,0,1>& operator()(
const std::vector<float>& feature_pixel_values
const std::vector<float>& feature_pixel_values,
unsigned long& i
) const
/*!
requires
......@@ -69,9 +73,10 @@ namespace dlib
(i.e. there needs to be the right number of leaves given the number of splits in the tree)
ensures
- runs through the tree and returns the vector at the leaf we end up in.
- #i == the selected leaf node index.
!*/
{
unsigned long i = 0;
i = 0;
while (i < splits.size())
{
if (feature_pixel_values[splits[i].idx1] - feature_pixel_values[splits[i].idx2] > splits[i].thresh)
......@@ -79,7 +84,8 @@ namespace dlib
else
i = right_child(i);
}
return leaf_values[i - splits.size()];
i = i - splits.size();
return leaf_values[i];
}
friend void serialize (const regression_tree& item, std::ostream& out)
......@@ -319,6 +325,16 @@ namespace dlib
return initial_shape.size()/2;
}
unsigned long num_features (
) const
{
unsigned long num = 0;
for (unsigned long iter = 0; iter < forests.size(); ++iter)
for (unsigned long i = 0; i < forests[iter].size(); ++i)
num += forests[iter][i].num_leaves();
return num;
}
template <typename image_type>
full_object_detection operator()(
const image_type& img,
......@@ -330,10 +346,47 @@ namespace dlib
std::vector<float> feature_pixel_values;
for (unsigned long iter = 0; iter < forests.size(); ++iter)
{
extract_feature_pixel_values(img, rect, current_shape, initial_shape, anchor_idx[iter], deltas[iter], feature_pixel_values);
extract_feature_pixel_values(img, rect, current_shape, initial_shape,
anchor_idx[iter], deltas[iter], feature_pixel_values);
unsigned long leaf_idx;
// evaluate all the trees at this level of the cascade.
for (unsigned long i = 0; i < forests[iter].size(); ++i)
current_shape += forests[iter][i](feature_pixel_values);
current_shape += forests[iter][i](feature_pixel_values, leaf_idx);
}
// convert the current_shape into a full_object_detection
const point_transform_affine tform_to_img = unnormalizing_tform(rect);
std::vector<point> parts(current_shape.size()/2);
for (unsigned long i = 0; i < parts.size(); ++i)
parts[i] = tform_to_img(location(current_shape, i));
return full_object_detection(rect, parts);
}
template <typename image_type, typename T, typename U>
full_object_detection operator()(
const image_type& img,
const rectangle& rect,
std::vector<std::pair<T,U> >& feats
) const
{
feats.clear();
using namespace impl;
matrix<float,0,1> current_shape = initial_shape;
std::vector<float> feature_pixel_values;
unsigned long feat_offset = 0;
for (unsigned long iter = 0; iter < forests.size(); ++iter)
{
extract_feature_pixel_values(img, rect, current_shape, initial_shape,
anchor_idx[iter], deltas[iter], feature_pixel_values);
// evaluate all the trees at this level of the cascade.
for (unsigned long i = 0; i < forests[iter].size(); ++i)
{
unsigned long leaf_idx;
current_shape += forests[iter][i](feature_pixel_values, leaf_idx);
feats.push_back(std::make_pair(feat_offset+leaf_idx, 1));
feat_offset += forests[iter][i].num_leaves();
}
}
// convert the current_shape into a full_object_detection
......@@ -563,6 +616,7 @@ namespace dlib
// make sure the objects agree on the number of parts and that there is at
// least one full_object_detection.
unsigned long num_parts = 0;
std::vector<int> part_present;
for (unsigned long i = 0; i < objects.size(); ++i)
{
for (unsigned long j = 0; j < objects[i].size(); ++j)
......@@ -574,6 +628,7 @@ namespace dlib
"\t shape_predictor shape_predictor_trainer::train()"
<< "\n\t You can't give objects that don't have any parts to the trainer."
);
part_present.resize(num_parts);
}
else
{
......@@ -584,12 +639,22 @@ namespace dlib
<< "\n\t num_parts: " << num_parts
);
}
for (unsigned long p = 0; p < objects[i][j].num_parts(); ++p)
{
if (objects[i][j].part(p) != OBJECT_PART_NOT_PRESENT)
part_present[p] = 1;
}
}
}
DLIB_CASSERT(num_parts != 0,
"\t shape_predictor shape_predictor_trainer::train()"
<< "\n\t You must give at least one full_object_detection if you want to train a shape model and it must have parts."
);
DLIB_CASSERT(sum(mat(part_present)) == (long)num_parts,
"\t shape_predictor shape_predictor_trainer::train()"
<< "\n\t Each part must appear at least once in this training data. That is, "
<< "\n\t you can't have a part that is always set to OBJECT_PART_NOT_PRESENT."
);
......@@ -646,19 +711,33 @@ namespace dlib
private:
static matrix<float,0,1> object_to_shape (
const full_object_detection& obj
static void object_to_shape (
const full_object_detection& obj,
matrix<float,0,1>& shape,
matrix<float,0,1>& present // a mask telling which elements of #shape are present.
)
{
matrix<float,0,1> shape(obj.num_parts()*2);
shape.set_size(obj.num_parts()*2);
present.set_size(obj.num_parts()*2);
const point_transform_affine tform_from_img = impl::normalizing_tform(obj.get_rect());
for (unsigned long i = 0; i < obj.num_parts(); ++i)
{
vector<float,2> p = tform_from_img(obj.part(i));
shape(2*i) = p.x();
shape(2*i+1) = p.y();
if (obj.part(i) != OBJECT_PART_NOT_PRESENT)
{
vector<float,2> p = tform_from_img(obj.part(i));
shape(2*i) = p.x();
shape(2*i+1) = p.y();
present(2*i) = 1;
present(2*i+1) = 1;
}
else
{
shape(2*i) = 0;
shape(2*i+1) = 0;
present(2*i) = 0;
present(2*i+1) = 0;
}
}
return shape;
}
struct training_sample
......@@ -671,7 +750,9 @@ namespace dlib
pixel when you look it up relative to the shape in current_shape.
- target_shape == The truth shape. Stays constant during the whole
training process.
training process (except for the parts that are not present, those are
always equal to the current_shape values).
- present == 0/1 mask saying which parts of target_shape are present.
- rect == the position of the object in the image_idx-th image. All shape
coordinates are coded relative to this rectangle.
!*/
......@@ -679,6 +760,7 @@ namespace dlib
unsigned long image_idx;
rectangle rect;
matrix<float,0,1> target_shape;
matrix<float,0,1> present;
matrix<float,0,1> current_shape;
std::vector<float> feature_pixel_values;
......@@ -688,6 +770,7 @@ namespace dlib
std::swap(image_idx, item.image_idx);
std::swap(rect, item.rect);
target_shape.swap(item.target_shape);
present.swap(item.present);
current_shape.swap(item.current_shape);
feature_pixel_values.swap(item.feature_pixel_values);
}
......@@ -727,17 +810,38 @@ namespace dlib
// Now all the parts contain the ranges for the leaves so we can use them to
// compute the average leaf values.
matrix<float,0,1> present_counts(samples[0].target_shape.size());
tree.leaf_values.resize(parts.size());
for (unsigned long i = 0; i < parts.size(); ++i)
{
// Get the present counts for each dimension so we can divide each
// dimension by the number of observations we have on it to find the mean
// displacement in each leaf.
present_counts = 0;
for (unsigned long j = parts[i].first; j < parts[i].second; ++j)
present_counts += samples[j].present;
present_counts = dlib::reciprocal(present_counts);
if (parts[i].second != parts[i].first)
tree.leaf_values[i] = sums[num_split_nodes+i]*get_nu()/(parts[i].second - parts[i].first);
tree.leaf_values[i] = pointwise_multiply(present_counts,sums[num_split_nodes+i]*get_nu());
else
tree.leaf_values[i] = zeros_matrix(samples[0].target_shape);
// now adjust the current shape based on these predictions
for (unsigned long j = parts[i].first; j < parts[i].second; ++j)
{
samples[j].current_shape += tree.leaf_values[i];
// For parts that aren't present in the training data, we just make
// sure that the target shape always matches and therefore gives zero
// error. So this makes the algorithm simply ignore non-present
// landmarks.
for (long k = 0; k < samples[j].present.size(); ++k)
{
// if this part is not present
if (samples[j].present(k) == 0)
samples[j].target_shape(k) = samples[j].current_shape(k);
}
}
}
return tree;
......@@ -867,7 +971,7 @@ namespace dlib
{
samples.clear();
matrix<float,0,1> mean_shape;
long count = 0;
matrix<float,0,1> count;
// first fill out the target shapes
for (unsigned long i = 0; i < objects.size(); ++i)
{
......@@ -876,15 +980,15 @@ namespace dlib
training_sample sample;
sample.image_idx = i;
sample.rect = objects[i][j].get_rect();
sample.target_shape = object_to_shape(objects[i][j]);
object_to_shape(objects[i][j], sample.target_shape, sample.present);
for (unsigned long itr = 0; itr < get_oversampling_amount(); ++itr)
samples.push_back(sample);
mean_shape += sample.target_shape;
++count;
count += sample.present;
}
}
mean_shape /= count;
mean_shape = pointwise_multiply(mean_shape,reciprocal(count));
// now go pick random initial shapes
for (unsigned long i = 0; i < samples.size(); ++i)
......@@ -897,12 +1001,35 @@ namespace dlib
}
else
{
// Pick a random convex combination of two of the target shapes and use
// that as the initial shape for this sample.
const unsigned long rand_idx = rnd.get_random_32bit_number()%samples.size();
const unsigned long rand_idx2 = rnd.get_random_32bit_number()%samples.size();
const double alpha = rnd.get_random_double();
samples[i].current_shape = alpha*samples[rand_idx].target_shape + (1-alpha)*samples[rand_idx2].target_shape;
samples[i].current_shape.set_size(0);
matrix<float,0,1> hits(mean_shape.size());
hits = 0;
int iter = 0;
// Pick a few samples at random and randomly average them together to
// make the initial shape. Note that we make sure we get at least one
// observation (i.e. non-OBJECT_PART_NOT_PRESENT) on each part
// location.
while(min(hits) == 0 || iter < 2)
{
++iter;
const unsigned long rand_idx = rnd.get_random_32bit_number()%samples.size();
const double alpha = rnd.get_random_double()+0.1;
samples[i].current_shape += alpha*samples[rand_idx].target_shape;
hits += alpha*samples[rand_idx].present;
}
samples[i].current_shape = pointwise_multiply(samples[i].current_shape, reciprocal(hits));
}
}
for (unsigned long i = 0; i < samples.size(); ++i)
{
for (long k = 0; k < samples[i].present.size(); ++k)
{
// if this part is not present
if (samples[i].present(k) == 0)
samples[i].target_shape(k) = samples[i].current_shape(k);
}
}
......@@ -1029,8 +1156,11 @@ namespace dlib
for (unsigned long k = 0; k < det.num_parts(); ++k)
{
double score = length(det.part(k) - objects[i][j].part(k))/scale;
rs.add(score);
if (objects[i][j].part(k) != OBJECT_PART_NOT_PRESENT)
{
double score = length(det.part(k) - objects[i][j].part(k))/scale;
rs.add(score);
}
}
}
}
......
......@@ -42,6 +42,7 @@ namespace dlib
/*!
ensures
- #num_parts() == 0
- #num_features() == 0
!*/
unsigned long num_parts (
......@@ -51,15 +52,27 @@ namespace dlib
- returns the number of parts in the shapes predicted by this object.
!*/
template <typename image_type>
unsigned long num_features (
) const;
/*!
ensures
- Returns the dimensionality of the feature vector output by operator().
This number is the total number of trees in this object times the number
of leaves on each tree.
!*/
template <typename image_type, typename T, typename U>
full_object_detection operator()(
const image_type& img,
const rectangle& rect
const rectangle& rect,
std::vector<std::pair<T,U> >& feats
) const;
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- T is some unsigned integral type (e.g. unsigned int).
- U is any scalar type capable of storing the value 1 (e.g. float).
ensures
- Runs the shape prediction algorithm on the part of the image contained in
the given bounding rectangle. So it will try and fit the shape model to
......@@ -73,6 +86,29 @@ namespace dlib
- for all valid i:
- DET.part(i) == the location in img for the i-th part of the shape
predicted by this object.
- #feats == a sparse vector that records which leaf each tree used to make
the shape prediction. Moreover, it is an indicator vector, Therefore,
for all valid i:
- #feats[i].second == 1
Further, #feats is a vector from the space of num_features() dimensional
vectors. The output shape positions can be represented as the dot
product between #feats and a weight vector. Therefore, #feats encodes
all the information from img that was used to predict the returned shape
object.
!*/
template <typename image_type>
full_object_detection operator()(
const image_type& img,
const rectangle& rect
) const;
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
ensures
- Calling this function is equivalent to calling (*this)(img, rect, ignored)
where the 3d argument is discarded.
!*/
};
......@@ -359,6 +395,9 @@ namespace dlib
- images.size() > 0
- for some i: objects[i].size() != 0
(i.e. there has to be at least one full_object_detection in the training set)
- for all valid p, there must exist i and j such that:
objects[i][j].part(p) != OBJECT_PART_NOT_PRESENT.
(i.e. You can't define a part that is always set to OBJECT_PART_NOT_PRESENT.)
- for all valid i,j,k,l:
- objects[i][j].num_parts() == objects[k][l].num_parts()
(i.e. all objects must agree on the number of parts)
......@@ -370,6 +409,10 @@ namespace dlib
shape_predictor, SP, such that:
SP(images[i], objects[i][j].get_rect()) == objects[i][j]
This learned SP object is then returned.
- Not all parts are required to be observed for all objects. So if you
have training instances with missing parts then set the part positions
equal to OBJECT_PART_NOT_PRESENT and this algorithm will basically ignore
those missing parts.
!*/
};
......@@ -408,6 +451,8 @@ namespace dlib
and compare the result with the truth part positions in objects[i][j]. We
then return the average distance (measured in pixels) between a predicted
part location and its true position.
- Note that any parts in objects that are set to OBJECT_PART_NOT_PRESENT are
simply ignored.
- if (scales.size() != 0) then
- Each time we compute the distance between a predicted part location and
its true location in objects[i][j] we divide the distance by
......
......@@ -288,7 +288,7 @@ namespace dlib
struct dlib_pick_initial_centers_data
{
dlib_pick_initial_centers_data():idx(0), dist(1e200){}
dlib_pick_initial_centers_data():idx(0), dist(std::numeric_limits<double>::infinity()){}
long idx;
double dist;
bool operator< (const dlib_pick_initial_centers_data& d) const { return dist < d.dist; }
......@@ -331,7 +331,7 @@ namespace dlib
// pick the first sample as one of the centers
centers.push_back(samples[0]);
const long best_idx = static_cast<long>(samples.size() - samples.size()*percentile - 1);
const long best_idx = static_cast<long>(std::max(0.0,samples.size() - samples.size()*percentile - 1));
// pick the next center
for (long i = 0; i < num_centers-1; ++i)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment