Commit dbc1ea4d authored by Davis King's avatar Davis King

Added --rmdupes.

parent 11e03822
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <dlib/image_transforms.h> #include <dlib/image_transforms.h>
#include <dlib/svm.h> #include <dlib/svm.h>
#include <dlib/console_progress_indicator.h> #include <dlib/console_progress_indicator.h>
#include <dlib/md5.h>
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
...@@ -468,6 +469,8 @@ int main(int argc, char** argv) ...@@ -468,6 +469,8 @@ int main(int argc, char** argv)
parser.add_option("rename", "Rename all labels of <arg1> to <arg2>.",2); parser.add_option("rename", "Rename all labels of <arg1> to <arg2>.",2);
parser.add_option("parts","The display will allow image parts to be labeled. The set of allowable parts " parser.add_option("parts","The display will allow image parts to be labeled. The set of allowable parts "
"is defined by <arg> which should be a space separated list of parts.",1); "is defined by <arg> which should be a space separated list of parts.",1);
parser.add_option("rmdupes","Remove duplicate images from the dataset. This is done by comparing "
"the md5 hash of each image file and removing duplicate images. " );
parser.add_option("rmdiff","Set the ignored flag to true for boxes marked as difficult."); parser.add_option("rmdiff","Set the ignored flag to true for boxes marked as difficult.");
parser.add_option("rmtrunc","Set the ignored flag to true for boxes that are partially outside the image."); parser.add_option("rmtrunc","Set the ignored flag to true for boxes that are partially outside the image.");
parser.add_option("shuffle","Randomly shuffle the order of the images listed in file <arg>."); parser.add_option("shuffle","Randomly shuffle the order of the images listed in file <arg>.");
...@@ -489,7 +492,7 @@ int main(int argc, char** argv) ...@@ -489,7 +492,7 @@ int main(int argc, char** argv)
parser.parse(argc, argv); parser.parse(argc, argv);
const char* singles[] = {"h","c","r","l","convert","parts","rmdiff", "rmtrunc","seed", "shuffle", "split", "add", const char* singles[] = {"h","c","r","l","convert","parts","rmdiff", "rmtrunc", "rmdupes", "seed", "shuffle", "split", "add",
"flip", "rotate", "tile", "size", "cluster"}; "flip", "rotate", "tile", "size", "cluster"};
parser.check_one_time_options(singles); parser.check_one_time_options(singles);
const char* c_sub_ops[] = {"r", "convert"}; const char* c_sub_ops[] = {"r", "convert"};
...@@ -499,6 +502,7 @@ int main(int argc, char** argv) ...@@ -499,6 +502,7 @@ int main(int argc, char** argv)
parser.check_sub_options(size_parent_ops, "size"); parser.check_sub_options(size_parent_ops, "size");
parser.check_incompatible_options("c", "l"); parser.check_incompatible_options("c", "l");
parser.check_incompatible_options("c", "rmdiff"); parser.check_incompatible_options("c", "rmdiff");
parser.check_incompatible_options("c", "rmdupes");
parser.check_incompatible_options("c", "rmtrunc"); parser.check_incompatible_options("c", "rmtrunc");
parser.check_incompatible_options("c", "add"); parser.check_incompatible_options("c", "add");
parser.check_incompatible_options("c", "flip"); parser.check_incompatible_options("c", "flip");
...@@ -527,6 +531,7 @@ int main(int argc, char** argv) ...@@ -527,6 +531,7 @@ int main(int argc, char** argv)
parser.check_incompatible_options("convert", "parts"); parser.check_incompatible_options("convert", "parts");
parser.check_incompatible_options("convert", "cluster"); parser.check_incompatible_options("convert", "cluster");
parser.check_incompatible_options("rmdiff", "rename"); parser.check_incompatible_options("rmdiff", "rename");
parser.check_incompatible_options("rmdupes", "rename");
parser.check_incompatible_options("rmtrunc", "rename"); parser.check_incompatible_options("rmtrunc", "rename");
const char* convert_args[] = {"pascal-xml","pascal-v1","idl"}; const char* convert_args[] = {"pascal-xml","pascal-v1","idl"};
parser.check_option_arg_range("convert", convert_args); parser.check_option_arg_range("convert", convert_args);
...@@ -619,6 +624,34 @@ int main(int argc, char** argv) ...@@ -619,6 +624,34 @@ int main(int argc, char** argv)
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
if (parser.option("rmdupes"))
{
if (parser.number_of_arguments() != 1)
{
cerr << "The --rmdupes option requires you to give one XML file on the command line." << endl;
return EXIT_FAILURE;
}
dlib::image_dataset_metadata::dataset data, data_out;
std::set<std::string> hashes;
load_image_dataset_metadata(data, parser[0]);
data_out = data;
data_out.images.clear();
for (unsigned long i = 0; i < data.images.size(); ++i)
{
ifstream fin(data.images[i].filename.c_str(), ios::binary);
string hash = md5(fin);
if (hashes.count(hash) == 0)
{
hashes.insert(hash);
data_out.images.push_back(data.images[i]);
}
}
save_image_dataset_metadata(data_out, parser[0]);
return EXIT_SUCCESS;
}
if (parser.option("rmtrunc")) if (parser.option("rmtrunc"))
{ {
if (parser.number_of_arguments() != 1) if (parser.number_of_arguments() != 1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment