Commit 90c9d0be authored by Davis King's avatar Davis King

cleaned this up a little

parent 29964d28
...@@ -73,8 +73,8 @@ void deserialize(feature_extractor&, std::istream&) {} ...@@ -73,8 +73,8 @@ void deserialize(feature_extractor&, std::istream&) {}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void make_dataset ( void make_dataset (
const matrix<double>& emission_probabilities,
const matrix<double>& transition_probabilities, const matrix<double>& transition_probabilities,
const matrix<double>& emission_probabilities,
std::vector<std::vector<unsigned long> >& samples, std::vector<std::vector<unsigned long> >& samples,
std::vector<std::vector<unsigned long> >& labels, std::vector<std::vector<unsigned long> >& labels,
unsigned long dataset_size unsigned long dataset_size
...@@ -90,8 +90,10 @@ void make_dataset ( ...@@ -90,8 +90,10 @@ void make_dataset (
- This function randomly samples a bunch of sequences from the HMM defined by - This function randomly samples a bunch of sequences from the HMM defined by
transition_probabilities and emission_probabilities. transition_probabilities and emission_probabilities.
- The HMM is defined by: - The HMM is defined by:
- P(next_label |previous_label) == transition_probabilities(previous_label, next_label) - The probability of transitioning from hidden state H1 to H2
- P(next_sample|next_label) == emission_probabilities (next_label, next_sample) is given by transition_probabilities(H1,H2).
- The probability of a hidden state H producing an observed state
O is given by emission_probabilities(H,O).
- #samples.size() == labels.size() == dataset_size - #samples.size() == labels.size() == dataset_size
- for all valid i: - for all valid i:
- #labels[i] is a randomly sampled sequence of hidden states from the - #labels[i] is a randomly sampled sequence of hidden states from the
...@@ -103,6 +105,10 @@ void make_dataset ( ...@@ -103,6 +105,10 @@ void make_dataset (
int main() int main()
{ {
matrix<double> transition_probabilities(num_label_states, num_label_states);
transition_probabilities = 0.05, 0.90, 0.05,
0.05, 0.05, 0.90,
0.90, 0.05, 0.05;
// set this up so emission_probabilities(L,X) == The probability of a state with label L // set this up so emission_probabilities(L,X) == The probability of a state with label L
// emitting an X. // emitting an X.
...@@ -111,17 +117,11 @@ int main() ...@@ -111,17 +117,11 @@ int main()
0.0, 0.5, 0.5, 0.0, 0.5, 0.5,
0.5, 0.0, 0.5; 0.5, 0.0, 0.5;
matrix<double> transition_probabilities(num_label_states, num_label_states);
transition_probabilities = 0.05, 0.90, 0.05,
0.05, 0.05, 0.90,
0.90, 0.05, 0.05;
std::vector<std::vector<unsigned long> > samples; std::vector<std::vector<unsigned long> > samples;
std::vector<std::vector<unsigned long> > labels; std::vector<std::vector<unsigned long> > labels;
make_dataset(emission_probabilities, transition_probabilities, make_dataset(transition_probabilities,emission_probabilities,
samples, labels, 1000); samples, labels, 1000);
cout << "samples.size(): "<< samples.size() << endl; cout << "samples.size(): "<< samples.size() << endl;
...@@ -139,17 +139,19 @@ int main() ...@@ -139,17 +139,19 @@ int main()
trainer.set_num_threads(4); trainer.set_num_threads(4);
matrix<double> confusion_matrix;
// Learn to do sequence labeling from the dataset // Learn to do sequence labeling from the dataset
sequence_labeler<feature_extractor> labeler = trainer.train(samples, labels); sequence_labeler<feature_extractor> labeler = trainer.train(samples, labels);
confusion_matrix = test_sequence_labeler(labeler, samples, labels);
cout << "trained sequence labeler: " << endl; std::vector<unsigned long> predicted_labels = labeler(samples[0]);
cout << confusion_matrix; cout << "true hidden states: "<< trans(vector_to_matrix(labels[0]));
cout << "label accuracy: "<< sum(diag(confusion_matrix))/sum(confusion_matrix) << endl; cout << "predicted hidden states: "<< trans(vector_to_matrix(predicted_labels));
// We can also do cross-validation // We can also do cross-validation
matrix<double> confusion_matrix;
confusion_matrix = cross_validate_sequence_labeler(trainer, samples, labels, 4); confusion_matrix = cross_validate_sequence_labeler(trainer, samples, labels, 4);
cout << "\ncross-validation: " << endl; cout << "\ncross-validation: " << endl;
cout << confusion_matrix; cout << confusion_matrix;
...@@ -236,8 +238,8 @@ void sample_hmm ( ...@@ -236,8 +238,8 @@ void sample_hmm (
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void make_dataset ( void make_dataset (
const matrix<double>& emission_probabilities,
const matrix<double>& transition_probabilities, const matrix<double>& transition_probabilities,
const matrix<double>& emission_probabilities,
std::vector<std::vector<unsigned long> >& samples, std::vector<std::vector<unsigned long> >& samples,
std::vector<std::vector<unsigned long> >& labels, std::vector<std::vector<unsigned long> >& labels,
unsigned long dataset_size unsigned long dataset_size
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment