Commit 9d81a1ef authored by Davis King's avatar Davis King

Added initial version of the assignment problem learning code.

parent 94ae09f5
......@@ -35,6 +35,7 @@
#include "svm/cross_validate_regression_trainer.h"
#include "svm/cross_validate_object_detection_trainer.h"
#include "svm/cross_validate_sequence_labeler.h"
#include "svm/cross_validate_assignment_trainer.h"
#include "svm/one_vs_all_decision_function.h"
#include "svm/one_vs_all_trainer.h"
......@@ -42,6 +43,7 @@
#include "svm/structural_svm_problem.h"
#include "svm/svm_multiclass_linear_trainer.h"
#include "svm/sequence_labeler.h"
#include "svm/assignment_function.h"
#endif // DLIB_SVm_HEADER
......
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_ASSIGNMENT_FuNCTION_H__
#define DLIB_ASSIGNMENT_FuNCTION_H__
#include "assignment_function_abstract.h"
#include "../matrix.h"
#include <vector>
#include "../optimization/max_cost_assignment.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class assignment_function
{
public:
typedef typename feature_extractor::lhs_type lhs_type;
typedef typename feature_extractor::rhs_type rhs_type;
typedef std::pair<std::vector<lhs_type>, std::vector<rhs_type> > sample_type;
typedef std::vector<long> label_type;
typedef label_type result_type;
assignment_function()
{
weights.set_size(fe.num_features());
weights = 0;
force_assignment = false;
}
explicit assignment_function(
const matrix<double,0,1>& weights_
) :
weights(weights_),
force_assignment(false)
{
// make sure requires clause is not broken
DLIB_ASSERT(fe.num_features() == static_cast<unsigned long>(weights_.size()),
"\t assignment_function::assignment_function(weights_)"
<< "\n\t These sizes should match"
<< "\n\t fe.num_features(): " << fe.num_features()
<< "\n\t weights_.size(): " << weights_.size()
<< "\n\t this: " << this
);
}
assignment_function(
const feature_extractor& fe_,
const matrix<double,0,1>& weights_
) :
fe(fe_),
weights(weights_),
force_assignment(false)
{
// make sure requires clause is not broken
DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
"\t assignment_function::assignment_function(fe_,weights_)"
<< "\n\t These sizes should match"
<< "\n\t fe_.num_features(): " << fe_.num_features()
<< "\n\t weights_.size(): " << weights_.size()
<< "\n\t this: " << this
);
}
assignment_function(
const feature_extractor& fe_,
const matrix<double,0,1>& weights_,
bool force_assignment_
) :
fe(fe_),
weights(weights_),
force_assignment(force_assignment_)
{
// make sure requires clause is not broken
DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
"\t assignment_function::assignment_function(fe_,weights_,force_assignment_)"
<< "\n\t These sizes should match"
<< "\n\t fe_.num_features(): " << fe_.num_features()
<< "\n\t weights_.size(): " << weights_.size()
<< "\n\t this: " << this
);
}
result_type operator()(
const std::vector<lhs_type>& lhs,
const std::vector<rhs_type>& rhs
) const
/*!
ensures
- returns a vector A such that:
- A.size() == lhs.size()
- if (A[i] != -1) then
- lhs[i] is predicted to associate to rhs[A[i]]
!*/
{
using dlib::sparse_vector::dot;
using dlib::dot;
matrix<double> cost;
unsigned long size;
if (force_assignment)
{
size = std::max(lhs.size(), rhs.size());
}
else
{
size = rhs.size() + lhs.size();
}
cost.set_size(size, size);
// now fill out the cost assignment matrix
for (long r = 0; r < cost.nr(); ++r)
{
for (long c = 0; c < cost.nc(); ++c)
{
if (r < (long)lhs.size() && c < (long)rhs.size())
{
cost(r,c) = dot(weights, fe(lhs[r], rhs[c]));
}
else
{
cost(r,c) = 0;
}
}
}
std::vector<long> assignment;
if (cost.size() != 0)
{
// max_cost_assignment() only works with integer matrices, so convert from
// double to integer.
const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost));
matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale));
assignment = max_cost_assignment(int_cost);
assignment.resize(lhs.size());
}
// adjust assignment so that non-assignments have a value of -1
for (unsigned long i = 0; i < assignment.size(); ++i)
{
if (assignment[i] >= (long)rhs.size())
assignment[i] = -1;
}
return assignment;
}
result_type operator() (
const sample_type& item
) const
{
return (*this)(item.first, item.second);
}
private:
feature_extractor fe;
matrix<double,0,1> weights;
bool force_assignment;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_ASSIGNMENT_FuNCTION_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_H__
#define DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_H__
#include "cross_validate_assignment_trainer_abstract.h"
#include <vector>
#include "../matrix.h"
#include "svm.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename assignment_function
>
double test_assignment_function (
const assignment_function& assigner,
const std::vector<typename assignment_function::sample_type>& samples,
const std::vector<typename assignment_function::label_type>& labels
)
{
double total_right = 0;
double total = 0;
for (unsigned long i = 0; i < samples.size(); ++i)
{
const std::vector<long>& out = assigner(samples[i]);
for (unsigned long j = 0; j < out.size(); ++j)
{
if (out[j] == labels[i][j])
++total_right;
++total;
}
}
if (total != 0)
return total_right/total;
else
return 1;
}
// ----------------------------------------------------------------------------------------
template <
typename trainer_type
>
double cross_validate_assignment_trainer (
const trainer_type& trainer,
const std::vector<typename trainer_type::sample_type>& samples,
const std::vector<typename trainer_type::label_type>& labels,
const long folds
)
{
typedef typename trainer_type::sample_type sample_type;
typedef typename trainer_type::label_type label_type;
const long num_in_test = samples.size()/folds;
const long num_in_train = samples.size() - num_in_test;
running_stats<double> rs;
std::vector<sample_type> samples_test, samples_train;
std::vector<label_type> labels_test, labels_train;
long next_test_idx = 0;
for (long i = 0; i < folds; ++i)
{
samples_test.clear();
labels_test.clear();
samples_train.clear();
labels_train.clear();
// load up the test samples
for (long cnt = 0; cnt < num_in_test; ++cnt)
{
samples_test.push_back(samples[next_test_idx]);
labels_test.push_back(labels[next_test_idx]);
next_test_idx = (next_test_idx + 1)%samples.size();
}
// load up the training samples
long next = next_test_idx;
for (long cnt = 0; cnt < num_in_train; ++cnt)
{
samples_train.push_back(samples[next]);
labels_train.push_back(labels[next]);
next = (next + 1)%samples.size();
}
rs.add(test_assignment_function(trainer.train(samples_train,labels_train),
samples_test,
labels_test));
} // for (long i = 0; i < folds; ++i)
return rs.mean();
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_H__
#define DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_H__
#include "structural_assignment_trainer_abstract.h"
#include "../algs.h"
#include "../optimization.h"
#include "structural_svm_assignment_problem.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class structural_assignment_trainer
{
public:
typedef typename feature_extractor::lhs_type lhs_type;
typedef typename feature_extractor::rhs_type rhs_type;
typedef std::pair<std::vector<lhs_type>, std::vector<rhs_type> > sample_type;
typedef std::vector<long> label_type;
typedef assignment_function<feature_extractor> trained_function_type;
const assignment_function<feature_extractor> train (
const std::vector<sample_type>& x,
const std::vector<label_type>& y
) const
/*!
requires
- is_assignment_problem(x,y) == true
- if (force assignment) then
- is_forced_assignment_problem(x,y) == true
!*/
{
DLIB_CASSERT(is_assignment_problem(x,y), "");
feature_extractor fe;
bool force_assignment = false;
unsigned long num_threads = 1;
structural_svm_assignment_problem<feature_extractor> prob(x,y, fe, force_assignment, num_threads);
prob.be_verbose();
prob.set_c(50);
prob.set_epsilon(1e-10);
oca solver;
matrix<double,0,1> weights;
solver(prob, weights);
std::cout << "weights: "<< trans(weights) << std::endl;
return assignment_function<feature_extractor>(fe,weights,force_assignment);
}
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_H__
#define DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_H__
#include "structural_svm_assignment_problem_abstract.h"
#include "../matrix.h"
#include "assignment_function.h"
#include <vector>
#include "structural_svm_problem_threaded.h"
// ----------------------------------------------------------------------------------------
namespace dlib
{
template <
typename feature_extractor
>
class structural_svm_assignment_problem : noncopyable,
public structural_svm_problem_threaded<matrix<double,0,1>, typename feature_extractor::feature_vector_type >
{
public:
typedef matrix<double,0,1> matrix_type;
typedef typename feature_extractor::feature_vector_type feature_vector_type;
typedef typename feature_extractor::lhs_type lhs_type;
typedef typename feature_extractor::rhs_type rhs_type;
typedef std::pair<std::vector<lhs_type>, std::vector<rhs_type> > sample_type;
typedef std::vector<long> label_type;
structural_svm_assignment_problem(
const std::vector<sample_type>& samples_,
const std::vector<label_type>& labels_,
const feature_extractor& fe_,
bool force_assignment_,
unsigned long num_threads = 2
) :
structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads),
samples(samples_),
labels(labels_),
fe(fe_),
force_assignment(force_assignment_)
{
}
private:
virtual long get_num_dimensions (
) const
{
return fe.num_features();
}
virtual long get_num_samples (
) const
{
return samples.size();
}
template <typename psi_type>
typename enable_if<is_matrix<psi_type> >::type get_joint_feature_vector (
const sample_type& sample,
const label_type& label,
psi_type& psi
) const
{
psi = 0;
for (unsigned long i = 0; i < sample.first.size(); ++i)
{
if (label[i] != -1)
{
psi += fe(sample.first[i], sample.second[label[i]]);
}
}
}
template <typename T>
void append_to_sparse_vect (
T& psi,
const T& vect
) const
{
std::copy(vect.begin(), vect.end(), std::back_inserter(psi));
}
template <typename psi_type>
typename disable_if<is_matrix<psi_type> >::type get_joint_feature_vector (
const sample_type& sample,
const label_type& label,
psi_type& psi
) const
{
psi.clear();
for (unsigned long i = 0; i < sample.first.size(); ++i)
{
if (label[i] != -1)
{
append_to_sparse_vect(psi, fe(sample.first[i], sample.second[label[i]]));
}
}
}
virtual void get_truth_joint_feature_vector (
long idx,
feature_vector_type& psi
) const
{
get_joint_feature_vector(samples[idx], labels[idx], psi);
}
virtual void separation_oracle (
const long idx,
const matrix_type& current_solution,
double& loss,
feature_vector_type& psi
) const
{
using dlib::sparse_vector::dot;
using dlib::dot;
matrix<double> cost;
unsigned long size;
if (force_assignment)
{
unsigned long lhs_size = samples[idx].first.size();
unsigned long rhs_size = samples[idx].second.size();
size = std::max(lhs_size, rhs_size);
}
else
{
unsigned long rhs_size = samples[idx].second.size() + samples[idx].first.size();
size = rhs_size;
}
cost.set_size(size, size);
// now fill out the cost assignment matrix
for (long r = 0; r < cost.nr(); ++r)
{
for (long c = 0; c < cost.nc(); ++c)
{
if (r < (long)samples[idx].first.size())
{
if (c < (long)samples[idx].second.size())
{
cost(r,c) = dot(current_solution, fe(samples[idx].first[r], samples[idx].second[c]));
// add in the loss since this corresponds to an incorrect prediction.
if (c != labels[idx][r])
{
cost(r,c) += 1;
}
}
else
{
if (labels[idx][r] == -1)
cost(r,c) = 0;
else
cost(r,c) = 1; // 1 for the loss
}
}
else
{
cost(r,c) = 0;
}
}
}
std::vector<long> assignment;
if (cost.size() != 0)
{
// max_cost_assignment() only works with integer matrices, so convert from
// double to integer.
const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost));
matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale));
assignment = max_cost_assignment(int_cost);
assignment.resize(samples[idx].first.size());
}
loss = 0;
// adjust assignment so that non-assignments have a value of -1. Also compute loss.
for (unsigned long i = 0; i < assignment.size(); ++i)
{
if (assignment[i] >= (long)samples[idx].second.size())
assignment[i] = -1;
if (assignment[i] != labels[idx][i])
loss += 1;
}
get_joint_feature_vector(samples[idx], assignment, psi);
}
const std::vector<sample_type>& samples;
const std::vector<label_type>& labels;
const feature_extractor& fe;
bool force_assignment;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_H__
......@@ -12,6 +12,9 @@
#include "svm/structural_svm_sequence_labeling_problem.h"
#include "svm/structural_sequence_labeling_trainer.h"
#include "svm/structural_svm_assignment_problem.h"
#include "svm/structural_assignment_trainer.h"
#endif // DLIB_SVm_THREADED_HEADER
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment