Commit ad97e1f3 authored by Davis King's avatar Davis King


parents 7e26d2cf 7aed6b3a
......@@ -385,6 +385,15 @@ namespace dlib
return drectangle(p.x()-width/2, p.y()-height/2, p.x()+width/2, p.y()+height/2);
inline drectangle centered_drect (
const drectangle& rect,
double width,
double height
return centered_drect(dcenter(rect), width, height);
inline const drectangle shrink_rect (
const drectangle& rect,
double num
......@@ -419,6 +428,21 @@ namespace dlib
return shrink_rect(rect, -width, -height);
inline drectangle set_aspect_ratio (
const drectangle& rect,
double ratio
DLIB_ASSERT(ratio > 0,
"\t drectangle set_aspect_ratio()"
<< "\n\t ratio: " << ratio
const double h = std::sqrt(rect.area()/ratio);
const double w = h*ratio;
return centered_drect(rect, w, h);
// ----------------------------------------------------------------------------------------
......@@ -503,6 +503,18 @@ namespace dlib
- R.height() == height
// ----------------------------------------------------------------------------------------
drectangle centered_drect (
const drectangle& rect,
double width,
double height
- returns centered_drect(center(rect), width, height)
// ----------------------------------------------------------------------------------------
const drectangle shrink_rect (
......@@ -554,6 +566,24 @@ namespace dlib
(i.e. grows the given drectangle by expanding its border)
// ----------------------------------------------------------------------------------------
drectangle set_aspect_ratio (
const drectangle& rect,
double ratio
- ratio > 0
- This function reshapes the given rectangle so that it has the given aspect
ratio. In particular, this means we return a rectangle R such that the
following equations are true:
- R.width()/R.height() == ratio
- R.area() == rect.area()
- dcenter(rect) == dcenter(R)
// ----------------------------------------------------------------------------------------
......@@ -90,7 +90,16 @@ namespace dlib
- calls set_current_dir(old_dir())
- if (revert() hasn't already been called) then
- calls set_current_dir(old_dir())
void revert (
- if (revert() hasn't already been called) then
- calls set_current_dir(old_dir())
......@@ -18,13 +18,14 @@ namespace dlib
const std::string& new_dir
reverted = false;
_old_dir = get_current_dir();
const std::string& old_dir (
......@@ -33,7 +34,18 @@ namespace dlib
return _old_dir;
void revert (
if (!reverted)
reverted = true;
bool reverted;
std::string _old_dir;
......@@ -477,6 +477,138 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template <
typename array_type,
typename sample_type,
typename alloc
void find_clusters_using_angular_kmeans (
const array_type& samples,
std::vector<sample_type, alloc>& centers,
unsigned long max_iter = 1000
// make sure requires clause is not broken
DLIB_ASSERT(samples.size() > 0 && centers.size() > 0,
"\tvoid find_clusters_using_angular_kmeans()"
<< "\n\tYou passed invalid arguments to this function"
<< "\n\t samples.size(): " << samples.size()
<< "\n\t centers.size(): " << centers.size()
const long nr = samples[0].nr();
const long nc = samples[0].nc();
for (unsigned long i = 0; i < samples.size(); ++i)
DLIB_ASSERT(is_vector(samples[i]) && samples[i].nr() == nr && samples[i].nc() == nc,
"\tvoid find_clusters_using_angular_kmeans()"
<< "\n\t You passed invalid arguments to this function"
<< "\n\t is_vector(samples[i]): " << is_vector(samples[i])
<< "\n\t samples[i].nr(): " << samples[i].nr()
<< "\n\t nr: " << nr
<< "\n\t samples[i].nc(): " << samples[i].nc()
<< "\n\t nc: " << nc
<< "\n\t i: " << i
typedef typename sample_type::type scalar_type;
sample_type zero(centers[0]);
set_all_elements(zero, 0);
unsigned long seed = 0;
// tells which center a sample belongs to
std::vector<unsigned long> assignments(samples.size(), samples.size());
std::vector<double> lengths;
for (unsigned long i = 0; i < samples.size(); ++i)
// If there are zero vectors in samples then just say their length is 1 so we
// can avoid a division by zero check later on. Also, this doesn't matter
// since zero vectors can be assigned to any cluster randomly as there is no
// basis for picking one based on angle.
if (lengths.back() == 0)
lengths.back() = 1;
// We will keep the centers as unit vectors at all times throughout the processing.
for (unsigned long i = 0; i < centers.size(); ++i)
double len = length(centers[i]);
// Avoid having length 0 centers. If that is the case then pick another center
// at random.
while(len == 0)
centers[i] = matrix_cast<scalar_type>(gaussian_randm(centers[i].nr(), centers[i].nc(), seed++));
len = length(centers[i]);
centers[i] /= len;
unsigned long iter = 0;
bool centers_changed = true;
while (centers_changed && iter < max_iter)
centers_changed = false;
// loop over each sample and see which center it is closest to
for (unsigned long i = 0; i < samples.size(); ++i)
// find the best center for sample[i]
scalar_type best_angle = std::numeric_limits<scalar_type>::max();
unsigned long best_center = 0;
for (unsigned long j = 0; j < centers.size(); ++j)
scalar_type angle = -dot(centers[j],samples[i])/lengths[i];
if (angle < best_angle)
best_angle = angle;
best_center = j;
if (assignments[i] != best_center)
centers_changed = true;
assignments[i] = best_center;
// now update all the centers
centers.assign(centers.size(), zero);
for (unsigned long i = 0; i < samples.size(); ++i)
centers[assignments[i]] += samples[i];
// Now length normalize all the centers.
for (unsigned long i = 0; i < centers.size(); ++i)
double len = length(centers[i]);
// Avoid having length 0 centers. If that is the case then pick another center
// at random.
while(len == 0)
centers[i] = matrix_cast<scalar_type>(gaussian_randm(centers[i].nr(), centers[i].nc(), seed++));
len = length(centers[i]);
centers_changed = true;
centers[i] /= len;
// ----------------------------------------------------------------------------------------
......@@ -283,7 +283,7 @@ namespace dlib
- centers.size() > 0
- array_type == something with an interface compatible with std::vector
and it must contain row or column vectors capable of being stored in
sample_type objects
sample_type objects.
- sample_type == a dlib::matrix capable of representing vectors
- performs regular old linear kmeans clustering on the samples. The clustering
......@@ -293,6 +293,46 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template <
typename array_type,
typename sample_type,
typename alloc
void find_clusters_using_angular_kmeans (
const array_type& samples,
std::vector<sample_type, alloc>& centers,
unsigned long max_iter = 1000
- samples.size() > 0
- samples == a bunch of row or column vectors and they all must be of the
same length.
- centers.size() > 0
- array_type == something with an interface compatible with std::vector
and it must contain row or column vectors capable of being stored in
sample_type objects.
- sample_type == a dlib::matrix capable of representing vectors
- performs linear kmeans clustering on the samples, except instead of using
Euclidean distance to compare samples to the centers it uses the angle
between a sample and a center (with respect to the origin). So we try to
cluster samples together if they have small angles with respect to each
other. The clustering begins with the initial set of centers given as an
argument to this function. When it finishes #centers will contain the
resulting centers.
- for all valid i:
- length(#centers[i]) == 1
(i.e. the output centers are scaled to be unit vectors since their
magnitude is irrelevant. Moreover, this makes it so you can use
functions like nearest_center() with #centers to find the cluster
- No more than max_iter iterations will be performed before this function
// ----------------------------------------------------------------------------------------
template <
......@@ -44,32 +44,63 @@ namespace
std::vector<sample_type> centers;
pick_initial_centers(seed_centers.size(), centers, samples, linear_kernel<sample_type>());
std::vector<sample_type> centers;
pick_initial_centers(seed_centers.size(), centers, samples, linear_kernel<sample_type>());
find_clusters_using_kmeans(samples, centers);
find_clusters_using_kmeans(samples, centers);
DLIB_TEST(centers.size() == seed_centers.size());
DLIB_TEST(centers.size() == seed_centers.size());
std::vector<int> hits(centers.size(),0);
for (unsigned long i = 0; i < samples.size(); ++i)
unsigned long best_idx = 0;
double best_dist = 1e100;
for (unsigned long j = 0; j < centers.size(); ++j)
std::vector<int> hits(centers.size(),0);
for (unsigned long i = 0; i < samples.size(); ++i)
if (length(samples[i] - centers[j]) < best_dist)
unsigned long best_idx = 0;
double best_dist = 1e100;
for (unsigned long j = 0; j < centers.size(); ++j)
best_dist = length(samples[i] - centers[j]);
best_idx = j;
if (length(samples[i] - centers[j]) < best_dist)
best_dist = length(samples[i] - centers[j]);
best_idx = j;
for (unsigned long i = 0; i < hits.size(); ++i)
for (unsigned long i = 0; i < hits.size(); ++i)
DLIB_TEST(hits[i] == 250);
DLIB_TEST(hits[i] == 250);
std::vector<sample_type> centers;
pick_initial_centers(seed_centers.size(), centers, samples, linear_kernel<sample_type>());
find_clusters_using_angular_kmeans(samples, centers);
DLIB_TEST(centers.size() == seed_centers.size());
std::vector<int> hits(centers.size(),0);
for (unsigned long i = 0; i < samples.size(); ++i)
unsigned long best_idx = 0;
double best_dist = 1e100;
for (unsigned long j = 0; j < centers.size(); ++j)
if (length(samples[i] - centers[j]) < best_dist)
best_dist = length(samples[i] - centers[j]);
best_idx = j;
for (unsigned long i = 0; i < hits.size(); ++i)
DLIB_TEST(hits[i] == 250);
......@@ -111,6 +111,7 @@ Davis E. King. <a href="
......@@ -357,7 +358,23 @@ Davis E. King. <a href="
<spec_file link="true">dlib/svm/kkmeans_abstract.h</spec_file>
This is just a simple linear kmeans clustering implementation.
This is a simple linear kmeans clustering implementation.
It uses Euclidean distance to compare samples.
<!-- ************************************************************************* -->
<spec_file link="true">dlib/svm/kkmeans_abstract.h</spec_file>
This is a simple linear kmeans clustering implementation.
To compare a sample to a cluster, it measures the angle between them
with respect to the origin. Therefore, it tries to find clusters
of points that all have small angles between each cluster member.
......@@ -424,6 +424,7 @@
<term file="dlib/statistics/dpca_abstract.h.html" name="discriminant_pca_error" include="dlib/statistics.h"/>
<term file="ml.html" name="kkmeans" include="dlib/clustering.h"/>
<term file="ml.html" name="find_clusters_using_kmeans" include="dlib/clustering.h"/>
<term file="ml.html" name="find_clusters_using_angular_kmeans" include="dlib/clustering.h"/>
<term file="ml.html" name="nearest_center" include="dlib/clustering.h"/>
<term file="ml.html" name="newman_cluster" include="dlib/clustering.h"/>
<term file="ml.html" name="spectral_cluster" include="dlib/clustering.h"/>
......@@ -24,6 +24,7 @@ ADD_EXECUTABLE(${target_name}
// Copyright (C) 2015 Davis E. King (
// License: Boost Software License See LICENSE.txt for the full license.
#include "cluster.h"
#include <dlib/console_progress_indicator.h>
#include <dlib/image_io.h>
#include <dlib/data_io.h>
#include <dlib/image_transforms.h>
#include <dlib/misc_api.h>
#include <dlib/dir_nav.h>
#include <dlib/clustering.h>
#include <dlib/svm.h>
// ----------------------------------------------------------------------------------------
using namespace std;
using namespace dlib;
// ----------------------------------------------------------------------------
struct assignment
unsigned long c;
double dist;
unsigned long idx;
bool operator<(const assignment& item) const
{ return dist < item.dist; }
std::vector<assignment> angular_cluster (
std::vector<matrix<double,0,1> > feats,
const unsigned long num_clusters
DLIB_CASSERT(feats.size() != 0, "The dataset can't be empty");
for (unsigned long i = 0; i < feats.size(); ++i)
DLIB_CASSERT(feats[i].size() == feats[0].size(), "All feature vectors must have the same length.");
// find the centroid of feats
matrix<double,0,1> m;
for (unsigned long i = 0; i < feats.size(); ++i)
m += feats[i];
m /= feats.size();
// Now center feats and then project onto the unit sphere. The reason for projecting
// onto the unit sphere is so pick_initial_centers() works in a sensible way.
for (unsigned long i = 0; i < feats.size(); ++i)
feats[i] -= m;
double len = length(feats[i]);
if (len != 0)
feats[i] /= len;
// now do angular clustering of the points
std::vector<matrix<double,0,1> > centers;
pick_initial_centers(num_clusters, centers, feats, linear_kernel<matrix<double,0,1> >(), 0.05);
find_clusters_using_angular_kmeans(feats, centers);
// and then report the resulting assignments
std::vector<assignment> assignments;
for (unsigned long i = 0; i < feats.size(); ++i)
assignment temp;
temp.c = nearest_center(centers, feats[i]);
temp.dist = length(feats[i] - centers[temp.c]);
temp.idx = i;
return assignments;
// ----------------------------------------------------------------------------------------
bool compare_first (
const std::pair<double,image_dataset_metadata::image>& a,
const std::pair<double,image_dataset_metadata::image>& b
return a.first < b.first;
// ----------------------------------------------------------------------------------------
double mean_aspect_ratio (
const image_dataset_metadata::dataset& data
double sum = 0;
double cnt = 0;
for (unsigned long i = 0; i < data.images.size(); ++i)
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
rectangle rect = data.images[i].boxes[j].rect;
if (rect.area() == 0)
sum += rect.width()/(double)rect.height();
if (cnt != 0)
return sum/cnt;
return 0;
// ----------------------------------------------------------------------------------------
int cluster_dataset(
const dlib::command_line_parser& parser
// make sure the user entered an argument to this program
if (parser.number_of_arguments() != 1)
cerr << "The --cluster option requires you to give one XML file on the command line." << endl;
const unsigned long num_clusters = get_option(parser, "cluster", 2);
const unsigned long chip_size = get_option(parser, "size", 8000);
image_dataset_metadata::dataset data;
image_dataset_metadata::load_image_dataset_metadata(data, parser[0]);
const double aspect_ratio = mean_aspect_ratio(data);
dlib::array<array2d<rgb_pixel> > images;
std::vector<matrix<double,0,1> > feats;
console_progress_indicator pbar(data.images.size());
// extract all the object chips and HOG features.
cout << "Loading image data..." << endl;
for (unsigned long i = 0; i < data.images.size(); ++i)
if (data.images[i].boxes.size() == 0)
array2d<rgb_pixel> img, chip;
load_image(img, data.images[i].filename);
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
if (data.images[i].boxes[j].ignore)
drectangle rect = data.images[i].boxes[j].rect;
rect = set_aspect_ratio(rect, aspect_ratio);
extract_image_chip(img, chip_details(rect, chip_size), chip);
if (feats.size() == 0)
cerr << "No non-ignored object boxes found in the XML dataset. You can't cluster an empty dataset." << endl;
cout << "\nClustering objects..." << endl;
std::vector<assignment> assignments = angular_cluster(feats, num_clusters);
// Now output each cluster to disk as an XML file.
for (unsigned long c = 0; c < num_clusters; ++c)
// We are going to accumulate all the image metadata for cluster c. We put it
// into idata so we can sort the images such that images with central chips
// come before less central chips. The idea being to get the good chips to
// show up first in the listing, making it easy to manually remove bad ones if
// that is desired.
std::vector<std::pair<double,image_dataset_metadata::image> > idata(data.images.size());
unsigned long idx = 0;
for (unsigned long i = 0; i < data.images.size(); ++i)
if (data.images[i].boxes.size() == 0)
idata[i].first = std::numeric_limits<double>::infinity();
idata[i].second.filename = data.images[i].filename;
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
if (data.images[i].boxes[j].ignore)
// If this box goes into cluster c then update the score for the whole
// image based on this boxes' score. Otherwise, mark the box as
// ignored.
if (assignments[idx].c == c)
idata[i].first = std::min(idata[i].first, assignments[idx].dist);
idata[i].second.boxes.back().ignore = true;
// now save idata to an xml file.
std::sort(idata.begin(), idata.end(), compare_first);
image_dataset_metadata::dataset cdata;
cdata.comment = data.comment + "\n\n This file contains objects which were clustered into group " +
cast_to_string(c+1) + " of " + cast_to_string(num_clusters) + " groups with a chip size of " +
cast_to_string(chip_size) + " by imglab."; =;
for (unsigned long i = 0; i < idata.size(); ++i)
// if this image has non-ignored boxes in it then include it in the output.
if (idata[i].first != std::numeric_limits<double>::infinity())
string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".xml";
cout << "Saving " << outfile << endl;
save_image_dataset_metadata(cdata, outfile);
// Now output each cluster to disk as a big tiled jpeg file. Sort everything so, just
// like in the xml file above, the best objects come first in the tiling.
std::sort(assignments.begin(), assignments.end());
for (unsigned long c = 0; c < num_clusters; ++c)
dlib::array<array2d<rgb_pixel> > temp;
for (unsigned long i = 0; i < assignments.size(); ++i)
if (assignments[i].c == c)
string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".jpg";
cout << "Saving " << outfile << endl;
save_jpeg(tile_images(temp), outfile);
// ----------------------------------------------------------------------------------------
// Copyright (C) 2015 Davis E. King (
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/cmd_line_parser.h>
int cluster_dataset(const dlib::command_line_parser& parser);
......@@ -5,9 +5,11 @@
#include "convert_pascal_xml.h"
#include "convert_pascal_v1.h"
#include "convert_idl.h"
#include "cluster.h"
#include <dlib/cmd_line_parser.h>
#include <dlib/image_transforms.h>
#include <dlib/svm.h>
#include <dlib/console_progress_indicator.h>
#include <iostream>
#include <fstream>
......@@ -17,7 +19,7 @@
#include <dlib/dir_nav.h>
const char* VERSION = "1.1";
const char* VERSION = "1.2";
......@@ -314,6 +316,75 @@ void flip_dataset(const command_line_parser& parser)
// ----------------------------------------------------------------------------------------
int tile_dataset(const command_line_parser& parser)
#if defined(DLIB_PNG_SUPPORT) && defined(DLIB_JPEG_SUPPORT)
if (parser.number_of_arguments() != 1)
cerr << "The --tile option requires you to give one XML file on the command line." << endl;
string out_image = parser.option("tile").argument();
string ext = right_substr(out_image,".");
if (ext != "png" && ext != "jpg")
cerr << "The output image file must have either .png or .jpg extension." << endl;
const unsigned long chip_size = get_option(parser, "size", 8000);
dlib::image_dataset_metadata::dataset data;
load_image_dataset_metadata(data, parser[0]);
locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
dlib::array<array2d<rgb_pixel> > images;
console_progress_indicator pbar(data.images.size());
for (unsigned long i = 0; i < data.images.size(); ++i)
// don't even bother loading images that don't have objects.
if (data.images[i].boxes.size() == 0)
array2d<rgb_pixel> img;
load_image(img, data.images[i].filename);
// figure out what chips we want to take from this image
std::vector<chip_details> dets;
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
if (data.images[i].boxes[j].ignore)
rectangle rect = data.images[i].boxes[j].rect;
dets.push_back(chip_details(rect, chip_size));
// Now grab all those chips at once.
dlib::array<array2d<rgb_pixel> > chips;
extract_image_chips(img, dets, chips);
// and put the chips into the output.
for (unsigned long j = 0; j < chips.size(); ++j)
if (ext == "png")
save_png(tile_images(images), out_image);
save_jpeg(tile_images(images), out_image);
throw dlib::error("imglab must be compiled with libpng and libjpeg if you want to use the --tile option.");
// ----------------------------------------------------------------------------------------
int main(int argc, char** argv)
......@@ -332,9 +403,14 @@ int main(int argc, char** argv)
parser.add_option("convert","Convert foreign image Annotations from <arg> format to the imglab format. "
"Supported formats: pascal-xml, pascal-v1, idl.",1);
parser.set_group_name("Viewing/Editing XML files");
parser.set_group_name("Viewing XML files");
parser.add_option("tile","Chip out all the objects and save them as one big image called <arg>.",1);
parser.add_option("size","When using --tile or --cluster, make each extracted object contain "
"about <arg> pixels (default 8000).",1);
parser.add_option("l","List all the labels in the given XML file.");
parser.add_option("stats","List detailed statistics on the object labels in the given XML file.");
parser.set_group_name("Editing/Transforming XML files");
parser.add_option("rename", "Rename all labels of <arg1> to <arg2>.",2);
parser.add_option("parts","The display will allow image parts to be labeled. The set of allowable parts "
"is defined by <arg> which should be a space separated list of parts.",1);
......@@ -351,31 +427,47 @@ int main(int argc, char** argv)
"<arg2> files are modified.",2);
parser.add_option("flip", "Read an XML image dataset from the <arg> XML file and output a left-right flipped "
"version of the dataset and an accompanying flipped XML file named flipped_<arg>.",1);
parser.add_option("cluster", "Cluster all the objects in an XML file into <arg> different clusters and save "
"the results as cluster_###.xml and cluster_###.jpg files.",1);
parser.parse(argc, argv);
const char* singles[] = {"h","c","r","l","convert","parts","rmdiff","seed", "shuffle", "split", "add", "flip"};
const char* singles[] = {"h","c","r","l","convert","parts","rmdiff","seed", "shuffle", "split", "add",
"flip", "tile", "size", "cluster"};
const char* c_sub_ops[] = {"r", "convert"};
parser.check_sub_options("c", c_sub_ops);
parser.check_sub_option("shuffle", "seed");
const char* size_parent_ops[] = {"tile", "cluster"};
parser.check_sub_options(size_parent_ops, "size");
parser.check_incompatible_options("c", "l");
parser.check_incompatible_options("c", "rmdiff");
parser.check_incompatible_options("c", "add");
parser.check_incompatible_options("c", "flip");
parser.check_incompatible_options("c", "rename");
parser.check_incompatible_options("c", "parts");
parser.check_incompatible_options("c", "tile");
parser.check_incompatible_options("c", "cluster");
parser.check_incompatible_options("l", "rename");
parser.check_incompatible_options("l", "add");
parser.check_incompatible_options("l", "parts");
parser.check_incompatible_options("l", "flip");
parser.check_incompatible_options("add", "flip");
parser.check_incompatible_options("add", "tile");
parser.check_incompatible_options("flip", "tile");
parser.check_incompatible_options("cluster", "tile");
parser.check_incompatible_options("flip", "cluster");
parser.check_incompatible_options("add", "cluster");
parser.check_incompatible_options("shuffle", "tile");
parser.check_incompatible_options("convert", "l");
parser.check_incompatible_options("convert", "rename");
parser.check_incompatible_options("convert", "parts");
parser.check_incompatible_options("convert", "cluster");
parser.check_incompatible_options("rmdiff", "rename");
const char* convert_args[] = {"pascal-xml","pascal-v1","idl"};
parser.check_option_arg_range("convert", convert_args);
parser.check_option_arg_range("cluster", 2, 999);
parser.check_option_arg_range("size", 10*10, 1000*1000);
if (parser.option("h"))
......@@ -406,6 +498,16 @@ int main(int argc, char** argv)
if (parser.option("tile"))
return tile_dataset(parser);
if (parser.option("cluster"))
return cluster_dataset(parser);
if (parser.option("c"))
if (parser.option("convert"))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment