Commit 0ba771b5 authored by Davis King's avatar Davis King

Added a set of tools to allow a user to easily learn to do sequence

labeling using dlib's structural SVM implementation.
parent 700ea34e
......@@ -34,12 +34,14 @@
#include "svm/cross_validate_multiclass_trainer.h"
#include "svm/cross_validate_regression_trainer.h"
#include "svm/cross_validate_object_detection_trainer.h"
#include "svm/cross_validate_sequence_labeler.h"
#include "svm/one_vs_all_decision_function.h"
#include "svm/one_vs_all_trainer.h"
#include "svm/structural_svm_problem.h"
#include "svm/svm_multiclass_linear_trainer.h"
#include "svm/sequence_labeler.h"
#endif // DLIB_SVm_HEADER
......
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_H__
#define DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_H__
#include "cross_validate_sequence_labeler_abstract.h"
#include <vector>
#include "../matrix.h"
#include "svm.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename sequence_labeler_type,
typename sample_type
>
const matrix<double> test_sequence_labeler (
const sequence_labeler_type& labeler,
const std::vector<std::vector<sample_type> >& samples,
const std::vector<std::vector<unsigned long> >& labels
)
{
// make sure requires clause is not broken
DLIB_CASSERT( is_sequence_labeling_problem(samples, labels) == true,
"\tmatrix test_sequence_labeler()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t is_sequence_labeling_problem(samples, labels): "
<< is_sequence_labeling_problem(samples, labels));
matrix<double> res(labeler.num_labels(), labeler.num_labels());
res = 0;
std::vector<unsigned long> pred;
for (unsigned long i = 0; i < samples.size(); ++i)
{
labeler.label_sequence(samples[i], pred);
for (unsigned long j = 0; j < pred.size(); ++j)
{
const unsigned long truth = labels[i][j];
if (truth >= res.nr())
{
// make res big enough for this unexpected label
res = join_cols(res, zeros_matrix<double>(truth-res.nr()+1, res.nc()));
}
res(truth, pred[j]) += 1;
}
}
return res;
}
// ----------------------------------------------------------------------------------------
template <
typename trainer_type,
typename sample_type
>
const matrix<double> cross_validate_sequence_labeler (
const trainer_type& trainer,
const std::vector<std::vector<sample_type> >& samples,
const std::vector<std::vector<unsigned long> >& labels,
const long folds
)
{
// make sure requires clause is not broken
DLIB_CASSERT(is_sequence_labeling_problem(samples,labels) == true &&
1 < folds && folds <= static_cast<long>(samples.size()),
"\tmatrix cross_validate_sequence_labeler()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t samples.size(): " << samples.size()
<< "\n\t folds: " << folds
<< "\n\t is_sequence_labeling_problem(samples,labels): " << is_sequence_labeling_problem(samples,labels)
);
const long num_in_test = samples.size()/folds;
const long num_in_train = samples.size() - num_in_test;
std::vector<std::vector<sample_type> > x_test, x_train;
std::vector<std::vector<unsigned long> > y_test, y_train;
long next_test_idx = 0;
matrix<double> res;
for (long i = 0; i < folds; ++i)
{
x_test.clear();
y_test.clear();
x_train.clear();
y_train.clear();
// load up the test samples
for (long cnt = 0; cnt < num_in_test; ++cnt)
{
x_test.push_back(samples[next_test_idx]);
y_test.push_back(labels[next_test_idx]);
next_test_idx = (next_test_idx + 1)%samples.size();
}
// load up the training samples
long next = next_test_idx;
for (long cnt = 0; cnt < num_in_train; ++cnt)
{
x_train.push_back(samples[next]);
y_train.push_back(labels[next]);
next = (next + 1)%samples.size();
}
matrix<double> temp = test_sequence_labeler(trainer.train(x_train,y_train), x_test, y_test);
// Make sure res is always at least as big as temp. This might not be the case
// because temp is sized differently depending on how many different kinds of labels
// test_sequence_labeler() sees.
if (get_rect(res).contains(get_rect(temp)) == false)
{
if (res.size() == 0)
{
res.set_size(temp.nr(), temp.nc());
res = 0;
}
// Make res bigger by padding with zeros on the bottom or right if necessary.
if (res.nr() < temp.nr())
res = join_cols(res, zeros_matrix<double>(temp.nr()-res.nc(), res.nc()));
if (res.nc() < temp.nc())
res = join_rows(res, zeros_matrix<double>(res.nr(), temp.nc()-res.nc()));
}
// add temp to res
for (long r = 0; r < temp.nr(); ++r)
{
for (long c = 0; c < temp.nc(); ++c)
{
res(r,c) += temp(r,c);
}
}
} // for (long i = 0; i < folds; ++i)
return res;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_H__
#ifdef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_H__
#include "cross_validate_sequence_labeler_abstract.h"
#include <vector>
#include "../matrix.h"
#include "svm.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename sequence_labeler_type,
typename sample_type
>
const matrix<double> test_sequence_labeler (
const sequence_labeler_type& labeler,
const std::vector<std::vector<sample_type> >& samples,
const std::vector<std::vector<unsigned long> >& labels
);
/*!
requires
- is_sequence_labeling_problem(samples, labels)
ensures
- Tests labeler against the given samples and labels and returns a confusion
matrix summarizing the results.
- The confusion matrix C returned by this function has the following properties.
- C.nc() == labeler.num_labels()
- C.nr() == max(labeler.num_labels(), max value in labels + 1)
- C(T,P) == the number of times a sample with label T was predicted
to have a label of P.
!*/
// ----------------------------------------------------------------------------------------
template <
typename trainer_type,
typename sample_type
>
const matrix<double> cross_validate_sequence_labeler (
const trainer_type& trainer,
const std::vector<std::vector<sample_type> >& samples,
const std::vector<std::vector<unsigned long> >& labels,
const long folds
);
/*!
requires
- is_sequence_labeling_problem(samples, labels)
- 1 < folds <= samples.size()
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SEQUENCE_LAbELER_H___
#define DLIB_SEQUENCE_LAbELER_H___
#include "sequence_labeler_abstract.h"
#include "../matrix.h"
#include <vector>
#include "../optimization/find_max_factor_graph_viterbi.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace fe_helpers
{
template <typename EXP>
struct dot_functor
{
dot_functor(const matrix_exp<EXP>& lambda_) : lambda(lambda_), value(0) {}
inline void operator() (
unsigned long feat_index
)
{
value += lambda(feat_index);
}
inline void operator() (
unsigned long feat_index,
double feat_value
)
{
value += feat_value*lambda(feat_index);
}
const matrix_exp<EXP>& lambda;
double value;
};
template <typename feature_extractor, typename EXP, typename sample_type, typename EXP2>
double dot(
const matrix_exp<EXP>& lambda,
const feature_extractor& fe,
unsigned long position,
const matrix_exp<EXP2>& label_states,
const std::vector<sample_type>& x
)
{
dot_functor<EXP> dot(lambda);
fe.get_features(dot, position, label_states, x);
return dot.value;
}
}
// ----------------------------------------------------------------------------------------
template <typename feature_extractor>
class sequence_labeler
{
public:
typedef typename feature_extractor::sample_type sample_type;
typedef std::vector<sample_type> sample_sequence_type;
typedef std::vector<unsigned long> labeled_sequence_type;
private:
class map_prob
{
public:
unsigned long order() const { return fe.order(); }
unsigned long num_states() const { return fe.num_labels(); }
map_prob(
const sample_sequence_type& x_,
const feature_extractor& fe_,
const matrix<double,0,1>& weights_
) :
x(x_),
fe(fe_),
weights(weights_)
{
}
unsigned long number_of_nodes(
) const
{
return x.size();
}
template <
typename EXP
>
double factor_value (
unsigned long node_id,
const matrix_exp<EXP>& node_states
) const
{
if (fe.reject_labeling(node_id, node_states, x))
return -std::numeric_limits<double>::infinity();
return fe_helpers::dot(weights, fe, node_id, node_states, x);
}
const sample_sequence_type& x;
const feature_extractor& fe;
const matrix<double,0,1>& weights;
};
public:
sequence_labeler()
{}
sequence_labeler(
const feature_extractor& fe_,
const matrix<double,0,1>& weights_
) :
fe(fe_),
weights(weights_)
{}
const feature_extractor& get_feature_extractor (
) const { return fe; }
const matrix<double,0,1>& get_weights (
) const { return weights; }
unsigned long num_labels (
) const { return fe.num_labels(); }
labeled_sequence_type operator() (
const sample_sequence_type& x
) const
{
labeled_sequence_type y;
find_max_factor_graph_viterbi(map_prob(x,fe,weights), y);
return y;
}
void label_sequence (
const sample_sequence_type& x,
labeled_sequence_type& y
) const
{
find_max_factor_graph_viterbi(map_prob(x,fe,weights), y);
}
private:
feature_extractor fe;
matrix<double,0,1> weights;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SEQUENCE_LAbELER_H___
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SEQUENCE_LAbELER_ABSTRACT_H___
#ifdef DLIB_SEQUENCE_LAbELER_ABSTRACT_H___
#include "../matrix.h"
#include <vector>
#include "../optimization/find_max_factor_graph_viterbi_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class sequence_labeler
{
/*!
WHAT THIS OBJECT REPRESENTS
!*/
public:
typedef typename feature_extractor::sample_type sample_type;
typedef std::vector<sample_type> sample_sequence_type;
typedef std::vector<unsigned long> labeled_sequence_type;
sequence_labeler() {}
sequence_labeler(
const feature_extractor& fe_,
const matrix<double,0,1>& weights_
);
const feature_extractor& get_feature_extractor (
) const;
const matrix<double,0,1>& get_weights (
) const;
unsigned long num_labels (
) const { return fe.num_labels(); }
labeled_sequence_type operator() (
const sample_sequence_type& x
) const;
void label_sequence (
const sample_sequence_type& x,
labeled_sequence_type& y
) const;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SEQUENCE_LAbELER_ABSTRACT_H___
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_H__
#define DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_H__
#include "structural_sequence_labeling_trainer_abstract.h"
#include "../algs.h"
#include "../optimization.h"
#include "structural_svm_sequence_labeling_problem.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class structural_sequence_labeling_trainer
{
public:
typedef typename feature_extractor::sample_type sample_type;
typedef std::vector<sample_type> sample_sequence_type;
typedef std::vector<unsigned long> labeled_sequence_type;
typedef sequence_labeler<feature_extractor> trained_function_type;
structural_sequence_labeling_trainer (
const feature_extractor& fe_
) : fe(fe_)
{}
structural_sequence_labeling_trainer (
) {}
const sequence_labeler<feature_extractor> train(
const std::vector<sample_sequence_type>& x,
const std::vector<labeled_sequence_type>& y
) const
{
structural_svm_sequence_labeling_problem<feature_extractor> prob(x, y, fe);
oca solver;
matrix<double,0,1> weights;
prob.be_verbose();
prob.set_epsilon(0.5);
prob.set_c(100);
solver(prob, weights);
return sequence_labeler<feature_extractor>(fe,weights);
}
private:
feature_extractor fe;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_H__
#ifdef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_H__
#include "../algs.h"
#include "../optimization.h"
#include "structural_svm_sequence_labeling_problem_abstract.h"
#include "sequence_labeler_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class structural_sequence_labeling_trainer
{
/*!
WHAT THIS OBJECT REPRESENTS
!*/
public:
typedef typename feature_extractor::sample_type sample_type;
typedef std::vector<sample_type> sample_sequence_type;
typedef std::vector<unsigned long> labeled_sequence_type;
typedef sequence_labeler<feature_extractor> trained_function_type;
structural_sequence_labeling_trainer (
const feature_extractor& fe_
) : fe(fe_)
{}
structural_sequence_labeling_trainer (
) {}
const sequence_labeler<feature_extractor> train(
const std::vector<sample_sequence_type>& x,
const std::vector<labeled_sequence_type>& y
) const;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_H__
#define DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_H__
#include "structural_svm_sequence_labeling_problem_abstract.h"
#include "../matrix.h"
#include "sequence_labeler.h"
#include <vector>
#include "structural_svm_problem_threaded.h"
// ----------------------------------------------------------------------------------------
namespace dlib
{
namespace fe_helpers
{
// ----------------------------------------------------------------------------------------
struct get_feats_functor
{
get_feats_functor(std::vector<std::pair<unsigned long, double> >& feats_) : feats(feats_) {}
inline void operator() (
unsigned long feat_index,
double feat_value
)
{
feats.push_back(std::make_pair(feat_index, feat_value));
}
inline void operator() (
unsigned long feat_index
)
{
feats.push_back(std::make_pair(feat_index, 1));
}
std::vector<std::pair<unsigned long, double> >& feats;
};
// ----------------------------------------------------------------------------------------
template <typename feature_extractor, typename sample_type, typename EXP2>
void get_feature_vector(
std::vector<std::pair<unsigned long, double> >& feats,
const feature_extractor& fe,
unsigned long position,
const matrix_exp<EXP2>& label_states,
const std::vector<sample_type>& x
)
{
get_feats_functor funct(feats);
fe.get_features(funct, position, label_states, x);
}
}
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class structural_svm_sequence_labeling_problem :
public structural_svm_problem_threaded<matrix<double,0,1>, std::vector<std::pair<unsigned long,double> > >
{
public:
typedef matrix<double,0,1> matrix_type;
typedef std::vector<std::pair<unsigned long, double> > feature_vector_type;
typedef typename feature_extractor::sample_type sample_type;
structural_svm_sequence_labeling_problem(
const std::vector<std::vector<sample_type> >& samples_,
const std::vector<std::vector<unsigned long> >& labels_,
const feature_extractor& fe_
) :
structural_svm_problem_threaded<matrix_type,feature_vector_type>(4),
samples(samples_),
labels(labels_),
fe(fe_)
{
}
private:
virtual long get_num_dimensions (
) const
{
return fe.num_features();
}
virtual long get_num_samples (
) const
{
return samples.size();
}
void get_joint_feature_vector (
const std::vector<sample_type>& sample,
const std::vector<unsigned long>& label,
feature_vector_type& psi
) const
{
psi.clear();
const int order = fe.order();
matrix<unsigned long,0,1> label_states;
for (unsigned long i = 0; i < sample.size(); ++i)
{
label_states = rowm(vector_to_matrix(label), range(i, std::max((int)i-order,0)));
fe_helpers::get_feature_vector(psi,fe,i,label_states, sample);
}
}
virtual void get_truth_joint_feature_vector (
long idx,
feature_vector_type& psi
) const
{
get_joint_feature_vector(samples[idx], labels[idx], psi);
}
class map_prob
{
public:
unsigned long order() const { return fe.order(); }
unsigned long num_states() const { return fe.num_labels(); }
map_prob(
const std::vector<sample_type>& sample_,
const std::vector<unsigned long>& label_,
const feature_extractor& fe_,
const matrix<double,0,1>& weights_
) :
sample(sample_),
label(label_),
fe(fe_),
weights(weights_)
{
}
unsigned long number_of_nodes(
) const
{
return sample.size();
}
template <
typename EXP
>
double factor_value (
unsigned long node_id,
const matrix_exp<EXP>& node_states
) const
{
// Note that it is intentional that we don't call fe.reject_labeling() here
// because doing so would break the structural svm optimizer.
double loss = 0;
if (node_states(0) != label[node_id])
loss = 1;
return fe_helpers::dot(weights, fe, node_id, node_states, sample) + loss;
}
const std::vector<sample_type>& sample;
const std::vector<unsigned long>& label;
const feature_extractor& fe;
const matrix<double,0,1>& weights;
};
virtual void separation_oracle (
const long idx,
const matrix_type& current_solution,
scalar_type& loss,
feature_vector_type& psi
) const
{
std::vector<unsigned long> y;
find_max_factor_graph_viterbi(map_prob(samples[idx],labels[idx],fe,current_solution), y);
loss = 0;
for (unsigned long i = 0; i < y.size(); ++i)
{
if (y[i] != labels[idx][i])
loss += 1;
}
get_joint_feature_vector(samples[idx], y, psi);
}
const std::vector<std::vector<sample_type> >& samples;
const std::vector<std::vector<unsigned long> >& labels;
const feature_extractor& fe;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_H__
#ifdef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_H__
#include "structural_svm_sequence_labeling_problem_abstract.h"
#include "../matrix.h"
#include "sequence_labeler.h"
#include <vector>
#include "structural_svm_problem_threaded.h"
// ----------------------------------------------------------------------------------------
namespace dlib
{
template <
typename feature_extractor
>
class structural_svm_sequence_labeling_problem :
public structural_svm_problem_threaded<matrix<double,0,1>,
std::vector<std::pair<unsigned long,double> > >
{
public:
typedef matrix<double,0,1> matrix_type;
typedef std::vector<std::pair<unsigned long, double> > feature_vector_type;
typedef typename feature_extractor::sample_type sample_type;
structural_svm_sequence_labeling_problem(
const std::vector<std::vector<sample_type> >& samples_,
const std::vector<std::vector<unsigned long> >& labels_,
const feature_extractor& fe_
) :
structural_svm_problem_threaded<matrix_type,feature_vector_type>(4),
samples(samples_),
labels(labels_),
fe(fe_)
{
}
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_H__
......@@ -9,6 +9,8 @@
#include "svm/structural_svm_distributed.h"
#include "svm/structural_svm_object_detection_problem.h"
#include "svm/structural_object_detection_trainer.h"
#include "svm/structural_svm_sequence_labeling_problem.h"
#include "svm/structural_sequence_labeling_trainer.h"
#endif // DLIB_SVm_THREADED_HEADER
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment