Commit 36cf4a9b authored by Davis King's avatar Davis King

Filled in the rest of the interface for the structural_sequence_labeling_trainer

and cleaned up a few other related things.
parent 72d3ab86
......@@ -42,7 +42,7 @@ namespace dlib
for (unsigned long j = 0; j < pred.size(); ++j)
{
const unsigned long truth = labels[i][j];
if (truth >= res.nr())
if (truth >= static_cast<unsigned long>(res.nr()))
{
// ignore labels the labeler doesn't know about.
continue;
......
......@@ -125,7 +125,7 @@ namespace dlib
weights(weights_)
{
// make sure requires clause is not broken
DLIB_ASSERT(fe_.num_features() == weights_.size(),
DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
"\t sequence_labeler::sequence_labeler()"
<< "\n\t These sizes should match"
<< "\n\t fe_.num_features(): " << fe_.num_features()
......
......@@ -29,10 +29,15 @@ namespace dlib
explicit structural_sequence_labeling_trainer (
const feature_extractor& fe_
) : fe(fe_)
{}
{
set_defaults();
}
structural_sequence_labeling_trainer (
) {}
)
{
set_defaults();
}
const feature_extractor& get_feature_extractor (
) const { return fe; }
......@@ -40,6 +45,96 @@ namespace dlib
unsigned long num_labels (
) const { return fe.num_labels(); }
void set_num_threads (
unsigned long num
)
{
num_threads = num;
}
unsigned long get_num_threads (
) const
{
return num_threads;
}
void set_epsilon (
double eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\t void structural_sequence_labeling_trainer::set_epsilon()"
<< "\n\t eps_ must be greater than 0"
<< "\n\t eps_: " << eps_
<< "\n\t this: " << this
);
eps = eps_;
}
const double get_epsilon (
) const { return eps; }
void set_max_cache_size (
unsigned long max_size
)
{
max_cache_size = max_size;
}
unsigned long get_max_cache_size (
) const
{
return max_cache_size;
}
void be_verbose (
)
{
verbose = true;
}
void be_quiet (
)
{
verbose = false;
}
void set_oca (
const oca& item
)
{
solver = item;
}
const oca get_oca (
) const
{
return solver;
}
void set_c (
double C_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(C_ > 0,
"\t void structural_sequence_labeling_trainer::set_c()"
<< "\n\t C_ must be greater than 0"
<< "\n\t C_: " << C_
<< "\n\t this: " << this
);
C = C_;
}
double get_c (
) const
{
return C;
}
const sequence_labeler<feature_extractor> train(
const std::vector<sample_sequence_type>& x,
......@@ -77,12 +172,14 @@ namespace dlib
structural_svm_sequence_labeling_problem<feature_extractor> prob(x, y, fe);
oca solver;
structural_svm_sequence_labeling_problem<feature_extractor> prob(x, y, fe, num_threads);
matrix<double,0,1> weights;
prob.be_verbose();
prob.set_epsilon(0.5);
prob.set_c(100);
if (verbose)
prob.be_verbose();
prob.set_epsilon(eps);
prob.set_c(C);
prob.set_max_cache_size(max_cache_size);
solver(prob, weights);
return sequence_labeler<feature_extractor>(fe,weights);
......@@ -90,6 +187,22 @@ namespace dlib
private:
double C;
oca solver;
double eps;
bool verbose;
unsigned long num_threads;
unsigned long max_cache_size;
void set_defaults ()
{
C = 100;
verbose = false;
eps = 0.1;
num_threads = 2;
max_cache_size = 40;
}
feature_extractor fe;
};
......
......@@ -20,32 +20,61 @@ namespace dlib
class structural_sequence_labeling_trainer
{
/*!
REQUIREMENTS ON feature_extractor
It must be an object that implements an interface compatible with
the example_feature_extractor defined in dlib/svm/sequence_labeler_abstract.h.
WHAT THIS OBJECT REPRESENTS
This object is a tool for learning to do sequence labeling based
on a set of training data. The training procedure produces a
sequence_labeler object which can be use to predict the labels of
new data sequences.
Note that this is just a convenience wrapper around the
structural_svm_sequence_labeling_problem to make it look
similar to all the other trainers in dlib.
!*/
public:
typedef typename feature_extractor::sample_type sample_type;
typedef std::vector<sample_type> sample_sequence_type;
typedef std::vector<sample_type> sample_sequence_type;
typedef std::vector<unsigned long> labeled_sequence_type;
typedef sequence_labeler<feature_extractor> trained_function_type;
explicit structural_sequence_labeling_trainer (
const feature_extractor& fe_
) : fe(fe_)
{}
structural_sequence_labeling_trainer (
) {}
);
/*!
ensures
- #get_c() == 100
- this object isn't verbose
- #get_epsilon() == 0.1
- #get_num_threads() == 2
- #get_max_cache_size() == 40
- #get_feature_extractor() == a default initialized feature_extractor
!*/
explicit structural_sequence_labeling_trainer (
const feature_extractor& fe
);
/*!
ensures
- #get_c() == 100
- this object isn't verbose
- #get_epsilon() == 0.1
- #get_num_threads() == 2
- #get_max_cache_size() == 40
- #get_feature_extractor() == fe
!*/
const feature_extractor& get_feature_extractor (
) const { return fe; }
) const;
/*!
ensures
- returns the feature extractor used by this object
!*/
unsigned long num_labels (
) const { return fe.num_labels(); }
) const;
/*!
ensures
- returns get_feature_extractor().num_labels()
......@@ -53,6 +82,115 @@ namespace dlib
element of a sequence)
!*/
void set_num_threads (
unsigned long num
);
/*!
ensures
- #get_num_threads() == num
!*/
unsigned long get_num_threads (
) const;
/*!
ensures
- returns the number of threads used during training. You should
usually set this equal to the number of processing cores on your
machine.
!*/
void set_epsilon (
double eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
const double get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Smaller values may result in a more accurate solution but take longer
to train. You can think of this epsilon value as saying "solve the
optimization problem until the average number of labeling mistakes per
training sample is within epsilon of its optimal value".
!*/
void set_max_cache_size (
unsigned long max_size
);
/*!
ensures
- #get_max_cache_size() == max_size
!*/
unsigned long get_max_cache_size (
) const;
/*!
ensures
- During training, this object basically runs the sequence_labeler on
each training sample, over and over. To speed this up, it is possible to
cache the results of these labeler invocations. This function returns the
number of cache elements per training sample kept in the cache. Note
that a value of 0 means caching is not used at all.
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out
!*/
void set_oca (
const oca& item
);
/*!
ensures
- #get_oca() == item
!*/
const oca get_oca (
) const;
/*!
ensures
- returns a copy of the optimizer used to solve the structural SVM problem.
!*/
void set_c (
double C
);
/*!
requires
- C > 0
ensures
- #get_c() = C
!*/
double get_c (
) const;
/*!
ensures
- returns the SVM regularization parameter. It is the parameter
that determines the trade-off between trying to fit the training
data (i.e. minimize the loss) or allowing more errors but hopefully
improving the generalization of the resulting sequence labeler. Larger
values encourage exact fitting while smaller values of C may encourage
better generalization.
!*/
const sequence_labeler<feature_extractor> train(
const std::vector<sample_sequence_type>& x,
const std::vector<labeled_sequence_type>& y
......
......@@ -76,9 +76,10 @@ namespace dlib
structural_svm_sequence_labeling_problem(
const std::vector<std::vector<sample_type> >& samples_,
const std::vector<std::vector<unsigned long> >& labels_,
const feature_extractor& fe_
const feature_extractor& fe_,
unsigned long num_threads = 2
) :
structural_svm_problem_threaded<matrix_type,feature_vector_type>(4),
structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads),
samples(samples_),
labels(labels_),
fe(fe_)
......
......@@ -4,11 +4,10 @@
#ifdef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_H__
#include "structural_svm_sequence_labeling_problem_abstract.h"
#include "../matrix.h"
#include "sequence_labeler.h"
#include <vector>
#include "structural_svm_problem_threaded.h"
#include "structural_svm_problem_threaded_abstract.h"
#include "sequence_labeler_abstract.h"
// ----------------------------------------------------------------------------------------
......@@ -19,26 +18,52 @@ namespace dlib
typename feature_extractor
>
class structural_svm_sequence_labeling_problem : noncopyable,
public structural_svm_problem_threaded<matrix<double,0,1>,
std::vector<std::pair<unsigned long,double> > >
public structural_svm_problem_threaded<matrix<double,0,1>,
std::vector<std::pair<unsigned long,double> > >
{
public:
typedef matrix<double,0,1> matrix_type;
typedef std::vector<std::pair<unsigned long, double> > feature_vector_type;
/*!
REQUIREMENTS ON feature_extractor
It must be an object that implements an interface compatible with
the example_feature_extractor defined in dlib/svm/sequence_labeler_abstract.h.
WHAT THIS OBJECT REPRESENTS
This object is a tool for learning the weight vector needed to use
a sequence_labeler object.
It learns the parameter vector by formulating the problem as a structural
SVM problem. The general approach is discussed in the paper:
Hidden Markov Support Vector Machines by
Y. Altun, I. Tsochantaridis, T. Hofmann
While the particular optimization strategy used is the method from:
T. Joachims, T. Finley, Chun-Nam Yu, Cutting-Plane Training of
Structural SVMs, Machine Learning, 77(1):27-59, 2009.
!*/
public:
typedef typename feature_extractor::sample_type sample_type;
structural_svm_sequence_labeling_problem(
const std::vector<std::vector<sample_type> >& samples_,
const std::vector<std::vector<unsigned long> >& labels_,
const feature_extractor& fe_
) :
structural_svm_problem_threaded<matrix_type,feature_vector_type>(4),
samples(samples_),
labels(labels_),
fe(fe_)
{
}
const std::vector<std::vector<sample_type> >& samples,
const std::vector<std::vector<unsigned long> >& labels,
const feature_extractor& fe,
unsigned long num_threads = 2
);
/*!
requires
- is_sequence_labeling_problem(samples, labels)
- for all valid i and j: labels[i][j] < fe.num_labels()
ensures
- This object attempts to learn a mapping from the given samples to the
given labels. In particular, it attempts to learn to predict labels[i]
based on samples[i]. Or in other words, this object can be used to learn
a parameter vector, w, such that a sequence_labeler declared as:
sequence_labeler<feature_extractor> labeler(fe,w)
results in a labeler object which attempts to compute the following mapping:
labels[i] == labeler(samples[i])
- This object will use num_threads threads during the optimization
procedure. You should set this parameter equal to the number of
available processing cores on your machine.
!*/
};
// ----------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment