Commit a8acc89c authored by Davis King's avatar Davis King

Added --shuffle and --split options to imglab. Also added more outputs to --stats

parent de663ec2
......@@ -6,6 +6,7 @@
#include "convert_pascal_v1.h"
#include "convert_idl.h"
#include <dlib/cmd_line_parser.h>
#include <dlib/svm.h>
#include <iostream>
#include <fstream>
......@@ -15,7 +16,7 @@
#include <dlib/dir_nav.h>
const char* VERSION = "0.5";
const char* VERSION = "0.6";
......@@ -71,6 +72,71 @@ void create_new_dataset (
// ----------------------------------------------------------------------------------------
int split_dataset (
const command_line_parser& parser
)
{
if (parser.number_of_arguments() != 1)
{
cerr << "The --split option requires you to give one XML file on the command line." << endl;
return EXIT_FAILURE;
}
const std::string label = parser.option("split").argument();
dlib::image_dataset_metadata::dataset data, data_with, data_without;
load_image_dataset_metadata(data, parser[0]);
data_with.name = data.name;
data_with.comment = data.comment;
data_without.name = data.name;
data_without.comment = data.comment;
for (unsigned long i = 0; i < data.images.size(); ++i)
{
dlib::image_dataset_metadata::image temp = data.images[i];
bool has_the_label = false;
// check for the label we are looking for
for (unsigned long j = 0; j < temp.boxes.size(); ++j)
{
if (temp.boxes[j].label == label)
{
has_the_label = true;
break;
}
}
if (has_the_label)
{
std::vector<dlib::image_dataset_metadata::box> boxes;
// remove other labels
for (unsigned long j = 0; j < temp.boxes.size(); ++j)
{
if (temp.boxes[j].label == label)
{
// put only the boxes with the label we want into boxes
boxes.push_back(temp.boxes[j]);
}
}
temp.boxes = boxes;
data_with.images.push_back(temp);
}
else
{
data_without.images.push_back(temp);
}
}
save_image_dataset_metadata(data_with, left_substr(parser[0],".") + "_with_"+label + ".xml");
save_image_dataset_metadata(data_without, left_substr(parser[0],".") + "_without_"+label + ".xml");
return EXIT_SUCCESS;
}
// ----------------------------------------------------------------------------------------
void print_all_labels (
const dlib::image_dataset_metadata::dataset& data
)
......@@ -100,19 +166,27 @@ void print_all_label_stats (
)
{
std::map<std::string, running_stats<double> > area_stats, aspect_ratio;
std::map<std::string, int> image_hits;
std::set<std::string> labels;
for (unsigned long i = 0; i < data.images.size(); ++i)
{
std::set<std::string> temp;
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
{
labels.insert(data.images[i].boxes[j].label);
temp.insert(data.images[i].boxes[j].label);
area_stats[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.area());
aspect_ratio[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.width()/
(double)data.images[i].boxes[j].rect.height());
}
// count the number of images for each label
for (std::set<std::string>::iterator i = temp.begin(); i != temp.end(); ++i)
image_hits[*i] += 1;
}
cout << "Number of images: "<< data.images.size() << endl;
cout << "Number of different labels: "<< labels.size() << endl << endl;
for (std::set<std::string>::iterator i = labels.begin(); i != labels.end(); ++i)
......@@ -120,6 +194,7 @@ void print_all_label_stats (
if (i->size() != 0)
{
cout << "Label: "<< *i << endl;
cout << " number of images: " << image_hits[*i] << endl;
cout << " number of occurrences: " << area_stats[*i].current_n() << endl;
cout << " min box area: " << area_stats[*i].min() << endl;
cout << " max box area: " << area_stats[*i].max() << endl;
......@@ -176,13 +251,20 @@ int main(int argc, char** argv)
parser.add_option("parts","The display will allow image parts to be labeled. The set of allowable parts "
"defined in a space separated list contained in <arg>.",1);
parser.add_option("rmdiff","Remove boxes marked as difficult.");
parser.add_option("shuffle","Randomly shuffle the order of the images listed in file <arg>.");
parser.add_option("seed", "When using --shuffle, set the random seed to the string <arg>.",1);
parser.add_option("split", "Split the contents of an XML file into two separate files. One containing the "
"images with objects labeled <arg> and another file with all the other images. Additionally, the file "
"containing the <arg> labeled objects will not contain any other labels other than <arg>. "
"That is, the images in the first file are stripped of all labels other than the <arg> labels.",1);
parser.parse(argc, argv);
const char* singles[] = {"h","c","r","l","convert","parts","rmdiff"};
const char* singles[] = {"h","c","r","l","convert","parts","rmdiff","seed", "shuffle", "split"};
parser.check_one_time_options(singles);
const char* c_sub_ops[] = {"r", "convert"};
parser.check_sub_options("c", c_sub_ops);
parser.check_sub_option("shuffle", "seed");
parser.check_incompatible_options("c", "l");
parser.check_incompatible_options("c", "rmdiff");
parser.check_incompatible_options("c", "rename");
......@@ -269,6 +351,29 @@ int main(int argc, char** argv)
return EXIT_SUCCESS;
}
if (parser.option("split"))
{
return split_dataset(parser);
}
if (parser.option("shuffle"))
{
if (parser.number_of_arguments() != 1)
{
cerr << "The -shuffle option requires you to give one XML file on the command line." << endl;
return EXIT_FAILURE;
}
dlib::image_dataset_metadata::dataset data;
load_image_dataset_metadata(data, parser[0]);
const string default_seed = cast_to_string(time(0));
const string seed = get_option(parser, "seed", default_seed);
dlib::rand rnd(seed);
randomize_samples(data.images, rnd);
save_image_dataset_metadata(data, parser[0]);
return EXIT_SUCCESS;
}
if (parser.option("stats"))
{
if (parser.number_of_arguments() != 1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment