cluster.cpp 8.5 KB
// Copyright (C) 2015  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.

#include "cluster.h" 
#include <dlib/console_progress_indicator.h>
#include <dlib/image_io.h>
#include <dlib/data_io.h>
#include <dlib/image_transforms.h>
#include <dlib/misc_api.h>
#include <dlib/dir_nav.h>
#include <dlib/clustering.h>
#include <dlib/svm.h>

// ----------------------------------------------------------------------------------------

using namespace std;
using namespace dlib;

//  ----------------------------------------------------------------------------

struct assignment
{
    unsigned long c;
    double dist;
    unsigned long idx;

    bool operator<(const assignment& item) const
    { return dist < item.dist; }
};

std::vector<assignment> angular_cluster (
    std::vector<matrix<double,0,1> > feats,
    const unsigned long num_clusters
)
{
    DLIB_CASSERT(feats.size() != 0, "The dataset can't be empty");
    for (unsigned long i = 0; i < feats.size(); ++i)
    {
        DLIB_CASSERT(feats[i].size() == feats[0].size(), "All feature vectors must have the same length.");
    }

    // find the centroid of feats
    matrix<double,0,1> m;
    for (unsigned long i = 0; i < feats.size(); ++i)
        m += feats[i];
    m /= feats.size();

    // Now center feats and then project onto the unit sphere.  The reason for projecting
    // onto the unit sphere is so pick_initial_centers() works in a sensible way.
    for (unsigned long i = 0; i < feats.size(); ++i)
    {
        feats[i] -= m;
        double len = length(feats[i]);
        if (len != 0)
            feats[i] /= len;
    }

    // now do angular clustering of the points
    std::vector<matrix<double,0,1> > centers;
    pick_initial_centers(num_clusters, centers, feats, linear_kernel<matrix<double,0,1> >(), 0.05);
    find_clusters_using_angular_kmeans(feats, centers);

    // and then report the resulting assignments
    std::vector<assignment> assignments;
    for (unsigned long i = 0; i < feats.size(); ++i)
    {
        assignment temp;
        temp.c = nearest_center(centers, feats[i]);
        temp.dist = length(feats[i] - centers[temp.c]);
        temp.idx = i;
        assignments.push_back(temp);
    }
    return assignments;
}

// ----------------------------------------------------------------------------------------

bool compare_first (
    const std::pair<double,image_dataset_metadata::image>& a,
    const std::pair<double,image_dataset_metadata::image>& b
)
{
    return a.first < b.first;
}

// ----------------------------------------------------------------------------------------

double mean_aspect_ratio (
    const image_dataset_metadata::dataset& data
)
{
    double sum = 0;
    double cnt = 0;
    for (unsigned long i = 0; i < data.images.size(); ++i)
    {
        for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
        {
            rectangle rect = data.images[i].boxes[j].rect;
            if (rect.area() == 0)
                continue;
            sum += rect.width()/(double)rect.height();
            ++cnt;
        }
    }

    if (cnt != 0)
        return sum/cnt;
    else
        return 0;
}

// ----------------------------------------------------------------------------------------

int cluster_dataset(
    const dlib::command_line_parser& parser
)
{
    // make sure the user entered an argument to this program
    if (parser.number_of_arguments() != 1)
    {
        cerr << "The --cluster option requires you to give one XML file on the command line." << endl;
        return EXIT_FAILURE;
    }

    const unsigned long num_clusters = get_option(parser, "cluster", 2);
    const unsigned long chip_size = get_option(parser, "size", 8000);

    image_dataset_metadata::dataset data;

    set_current_dir(get_parent_directory(file(parser[0])));
    image_dataset_metadata::load_image_dataset_metadata(data, parser[0]);

    const double aspect_ratio = mean_aspect_ratio(data);

    dlib::array<array2d<rgb_pixel> > images;
    std::vector<matrix<double,0,1> > feats;
    console_progress_indicator pbar(data.images.size());
    // extract all the object chips and HOG features.
    cout << "Loading image data..." << endl;
    for (unsigned long i = 0; i < data.images.size(); ++i)
    {
        pbar.print_status(i);
        if (data.images[i].boxes.size() == 0)
            continue;
        array2d<rgb_pixel> img, chip;
        load_image(img, data.images[i].filename);

        for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
        {
            if (data.images[i].boxes[j].ignore || data.images[i].boxes[j].rect.area() < 10)
                continue;
            drectangle rect = data.images[i].boxes[j].rect;
            rect = set_aspect_ratio(rect, aspect_ratio);
            extract_image_chip(img, chip_details(rect, chip_size), chip);
            feats.push_back(extract_fhog_features(chip));
            images.push_back(chip);
        }
    }

    if (feats.size() == 0)
    {
        cerr << "No non-ignored object boxes found in the XML dataset.  You can't cluster an empty dataset." << endl;
        return EXIT_FAILURE;
    }

    cout << "\nClustering objects..." << endl;
    std::vector<assignment> assignments = angular_cluster(feats, num_clusters);


    // Now output each cluster to disk as an XML file.
    for (unsigned long c = 0; c < num_clusters; ++c)
    {
        // We are going to accumulate all the image metadata for cluster c.  We put it
        // into idata so we can sort the images such that images with central chips
        // come before less central chips.  The idea being to get the good chips to
        // show up first in the listing, making it easy to manually remove bad ones if
        // that is desired.
        std::vector<std::pair<double,image_dataset_metadata::image> > idata(data.images.size());
        unsigned long idx = 0;
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
            if (data.images[i].boxes.size() == 0)
                continue;
            idata[i].first = std::numeric_limits<double>::infinity();
            idata[i].second.filename = data.images[i].filename;
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
                idata[i].second.boxes.push_back(data.images[i].boxes[j]);

                if (data.images[i].boxes[j].ignore || data.images[i].boxes[j].rect.area() < 10)
                    continue;

                // If this box goes into cluster c then update the score for the whole
                // image based on this boxes' score.  Otherwise, mark the box as
                // ignored.
                if (assignments[idx].c == c)
                    idata[i].first = std::min(idata[i].first, assignments[idx].dist);
                else
                    idata[i].second.boxes.back().ignore = true;

                ++idx;
            }
        }

        // now save idata to an xml file.
        std::sort(idata.begin(), idata.end(), compare_first);
        image_dataset_metadata::dataset cdata;
        cdata.comment = data.comment + "\n\n This file contains objects which were clustered into group " + 
                     cast_to_string(c+1) + " of " + cast_to_string(num_clusters) + " groups with a chip size of " + 
                     cast_to_string(chip_size) + " by imglab.";
        cdata.name = data.name;
        for (unsigned long i = 0; i < idata.size(); ++i)
        {
            // if this image has non-ignored boxes in it then include it in the output.
            if (idata[i].first != std::numeric_limits<double>::infinity())
                cdata.images.push_back(idata[i].second);
        }

        string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".xml";
        cout << "Saving " << outfile << endl;
        save_image_dataset_metadata(cdata, outfile);
    }

    // Now output each cluster to disk as a big tiled jpeg file.  Sort everything so, just
    // like in the xml file above, the best objects come first in the tiling.
    std::sort(assignments.begin(), assignments.end());
    for (unsigned long c = 0; c < num_clusters; ++c)
    {
        dlib::array<array2d<rgb_pixel> > temp;
        for (unsigned long i = 0; i < assignments.size(); ++i)
        {
            if (assignments[i].c == c)
                temp.push_back(images[assignments[i].idx]);
        }

        string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".jpg";
        cout << "Saving " << outfile << endl;
        save_jpeg(tile_images(temp), outfile);
    }


    return EXIT_SUCCESS;
}

// ----------------------------------------------------------------------------------------