Commit 7aed6b3a authored by Davis King's avatar Davis King

Added the --cluster option to imglab.

parent 16efd469
......@@ -24,6 +24,7 @@ ADD_EXECUTABLE(${target_name}
// Copyright (C) 2015 Davis E. King (
// License: Boost Software License See LICENSE.txt for the full license.
#include "cluster.h"
#include <dlib/console_progress_indicator.h>
#include <dlib/image_io.h>
#include <dlib/data_io.h>
#include <dlib/image_transforms.h>
#include <dlib/misc_api.h>
#include <dlib/dir_nav.h>
#include <dlib/clustering.h>
#include <dlib/svm.h>
// ----------------------------------------------------------------------------------------
using namespace std;
using namespace dlib;
// ----------------------------------------------------------------------------
struct assignment
unsigned long c;
double dist;
unsigned long idx;
bool operator<(const assignment& item) const
{ return dist < item.dist; }
std::vector<assignment> angular_cluster (
std::vector<matrix<double,0,1> > feats,
const unsigned long num_clusters
DLIB_CASSERT(feats.size() != 0, "The dataset can't be empty");
for (unsigned long i = 0; i < feats.size(); ++i)
DLIB_CASSERT(feats[i].size() == feats[0].size(), "All feature vectors must have the same length.");
// find the centroid of feats
matrix<double,0,1> m;
for (unsigned long i = 0; i < feats.size(); ++i)
m += feats[i];
m /= feats.size();
// Now center feats and then project onto the unit sphere. The reason for projecting
// onto the unit sphere is so pick_initial_centers() works in a sensible way.
for (unsigned long i = 0; i < feats.size(); ++i)
feats[i] -= m;
double len = length(feats[i]);
if (len != 0)
feats[i] /= len;
// now do angular clustering of the points
std::vector<matrix<double,0,1> > centers;
pick_initial_centers(num_clusters, centers, feats, linear_kernel<matrix<double,0,1> >(), 0.05);
find_clusters_using_angular_kmeans(feats, centers);
// and then report the resulting assignments
std::vector<assignment> assignments;
for (unsigned long i = 0; i < feats.size(); ++i)
assignment temp;
temp.c = nearest_center(centers, feats[i]);
temp.dist = length(feats[i] - centers[temp.c]);
temp.idx = i;
return assignments;
// ----------------------------------------------------------------------------------------
bool compare_first (
const std::pair<double,image_dataset_metadata::image>& a,
const std::pair<double,image_dataset_metadata::image>& b
return a.first < b.first;
// ----------------------------------------------------------------------------------------
double mean_aspect_ratio (
const image_dataset_metadata::dataset& data
double sum = 0;
double cnt = 0;
for (unsigned long i = 0; i < data.images.size(); ++i)
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
rectangle rect = data.images[i].boxes[j].rect;
if (rect.area() == 0)
sum += rect.width()/(double)rect.height();
if (cnt != 0)
return sum/cnt;
return 0;
// ----------------------------------------------------------------------------------------
int cluster_dataset(
const dlib::command_line_parser& parser
// make sure the user entered an argument to this program
if (parser.number_of_arguments() != 1)
cerr << "The --cluster option requires you to give one XML file on the command line." << endl;
const unsigned long num_clusters = get_option(parser, "cluster", 2);
const unsigned long chip_size = get_option(parser, "size", 8000);
image_dataset_metadata::dataset data;
image_dataset_metadata::load_image_dataset_metadata(data, parser[0]);
const double aspect_ratio = mean_aspect_ratio(data);
dlib::array<array2d<rgb_pixel> > images;
std::vector<matrix<double,0,1> > feats;
console_progress_indicator pbar(data.images.size());
// extract all the object chips and HOG features.
cout << "Loading image data..." << endl;
for (unsigned long i = 0; i < data.images.size(); ++i)
if (data.images[i].boxes.size() == 0)
array2d<rgb_pixel> img, chip;
load_image(img, data.images[i].filename);
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
if (data.images[i].boxes[j].ignore)
drectangle rect = data.images[i].boxes[j].rect;
rect = set_aspect_ratio(rect, aspect_ratio);
extract_image_chip(img, chip_details(rect, chip_size), chip);
if (feats.size() == 0)
cerr << "No non-ignored object boxes found in the XML dataset. You can't cluster an empty dataset." << endl;
cout << "\nClustering objects..." << endl;
std::vector<assignment> assignments = angular_cluster(feats, num_clusters);
// Now output each cluster to disk as an XML file.
for (unsigned long c = 0; c < num_clusters; ++c)
// We are going to accumulate all the image metadata for cluster c. We put it
// into idata so we can sort the images such that images with central chips
// come before less central chips. The idea being to get the good chips to
// show up first in the listing, making it easy to manually remove bad ones if
// that is desired.
std::vector<std::pair<double,image_dataset_metadata::image> > idata(data.images.size());
unsigned long idx = 0;
for (unsigned long i = 0; i < data.images.size(); ++i)
if (data.images[i].boxes.size() == 0)
idata[i].first = std::numeric_limits<double>::infinity();
idata[i].second.filename = data.images[i].filename;
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
if (data.images[i].boxes[j].ignore)
// If this box goes into cluster c then update the score for the whole
// image based on this boxes' score. Otherwise, mark the box as
// ignored.
if (assignments[idx].c == c)
idata[i].first = std::min(idata[i].first, assignments[idx].dist);
idata[i].second.boxes.back().ignore = true;
// now save idata to an xml file.
std::sort(idata.begin(), idata.end(), compare_first);
image_dataset_metadata::dataset cdata;
cdata.comment = data.comment + "\n\n This file contains objects which were clustered into group " +
cast_to_string(c+1) + " of " + cast_to_string(num_clusters) + " groups with a chip size of " +
cast_to_string(chip_size) + " by imglab."; =;
for (unsigned long i = 0; i < idata.size(); ++i)
// if this image has non-ignored boxes in it then include it in the output.
if (idata[i].first != std::numeric_limits<double>::infinity())
string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".xml";
cout << "Saving " << outfile << endl;
save_image_dataset_metadata(cdata, outfile);
// Now output each cluster to disk as a big tiled jpeg file. Sort everything so, just
// like in the xml file above, the best objects come first in the tiling.
std::sort(assignments.begin(), assignments.end());
for (unsigned long c = 0; c < num_clusters; ++c)
dlib::array<array2d<rgb_pixel> > temp;
for (unsigned long i = 0; i < assignments.size(); ++i)
if (assignments[i].c == c)
string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".jpg";
cout << "Saving " << outfile << endl;
save_jpeg(tile_images(temp), outfile);
// ----------------------------------------------------------------------------------------
// Copyright (C) 2015 Davis E. King (
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/cmd_line_parser.h>
int cluster_dataset(const dlib::command_line_parser& parser);
......@@ -5,6 +5,7 @@
#include "convert_pascal_xml.h"
#include "convert_pascal_v1.h"
#include "convert_idl.h"
#include "cluster.h"
#include <dlib/cmd_line_parser.h>
#include <dlib/image_transforms.h>
#include <dlib/svm.h>
......@@ -18,7 +19,7 @@
#include <dlib/dir_nav.h>
const char* VERSION = "1.1";
const char* VERSION = "1.2";
......@@ -335,7 +336,7 @@ int tile_dataset(const command_line_parser& parser)
const unsigned long chip_size = get_option(parser, "size", 7000);
const unsigned long chip_size = get_option(parser, "size", 8000);
dlib::image_dataset_metadata::dataset data;
load_image_dataset_metadata(data, parser[0]);
......@@ -402,11 +403,14 @@ int main(int argc, char** argv)
parser.add_option("convert","Convert foreign image Annotations from <arg> format to the imglab format. "
"Supported formats: pascal-xml, pascal-v1, idl.",1);
parser.set_group_name("Viewing/Editing XML files");
parser.set_group_name("Viewing XML files");
parser.add_option("tile","Chip out all the objects and save them as one big image called <arg>.",1);
parser.add_option("size","When using --tile, make each object contain about <arg> pixels (default 7000).",1);
parser.add_option("size","When using --tile or --cluster, make each extracted object contain "
"about <arg> pixels (default 8000).",1);
parser.add_option("l","List all the labels in the given XML file.");
parser.add_option("stats","List detailed statistics on the object labels in the given XML file.");
parser.set_group_name("Editing/Transforming XML files");
parser.add_option("rename", "Rename all labels of <arg1> to <arg2>.",2);
parser.add_option("parts","The display will allow image parts to be labeled. The set of allowable parts "
"is defined by <arg> which should be a space separated list of parts.",1);
......@@ -423,16 +427,19 @@ int main(int argc, char** argv)
"<arg2> files are modified.",2);
parser.add_option("flip", "Read an XML image dataset from the <arg> XML file and output a left-right flipped "
"version of the dataset and an accompanying flipped XML file named flipped_<arg>.",1);
parser.add_option("cluster", "Cluster all the objects in an XML file into <arg> different clusters and save "
"the results as cluster_###.xml and cluster_###.jpg files.",1);
parser.parse(argc, argv);
const char* singles[] = {"h","c","r","l","convert","parts","rmdiff","seed", "shuffle", "split", "add",
"flip", "tile", "size"};
"flip", "tile", "size", "cluster"};
const char* c_sub_ops[] = {"r", "convert"};
parser.check_sub_options("c", c_sub_ops);
parser.check_sub_option("shuffle", "seed");
parser.check_sub_option("tile", "size");
const char* size_parent_ops[] = {"tile", "cluster"};
parser.check_sub_options(size_parent_ops, "size");
parser.check_incompatible_options("c", "l");
parser.check_incompatible_options("c", "rmdiff");
parser.check_incompatible_options("c", "add");
......@@ -440,6 +447,7 @@ int main(int argc, char** argv)
parser.check_incompatible_options("c", "rename");
parser.check_incompatible_options("c", "parts");
parser.check_incompatible_options("c", "tile");
parser.check_incompatible_options("c", "cluster");
parser.check_incompatible_options("l", "rename");
parser.check_incompatible_options("l", "add");
parser.check_incompatible_options("l", "parts");
......@@ -447,13 +455,19 @@ int main(int argc, char** argv)
parser.check_incompatible_options("add", "flip");
parser.check_incompatible_options("add", "tile");
parser.check_incompatible_options("flip", "tile");
parser.check_incompatible_options("cluster", "tile");
parser.check_incompatible_options("flip", "cluster");
parser.check_incompatible_options("add", "cluster");
parser.check_incompatible_options("shuffle", "tile");
parser.check_incompatible_options("convert", "l");
parser.check_incompatible_options("convert", "rename");
parser.check_incompatible_options("convert", "parts");
parser.check_incompatible_options("convert", "cluster");
parser.check_incompatible_options("rmdiff", "rename");
const char* convert_args[] = {"pascal-xml","pascal-v1","idl"};
parser.check_option_arg_range("convert", convert_args);
parser.check_option_arg_range("cluster", 2, 999);
parser.check_option_arg_range("size", 10*10, 1000*1000);
if (parser.option("h"))
......@@ -489,6 +503,11 @@ int main(int argc, char** argv)
return tile_dataset(parser);
if (parser.option("cluster"))
return cluster_dataset(parser);
if (parser.option("c"))
if (parser.option("convert"))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment