Commit ba0f7c5c authored by Davis King's avatar Davis King

Added a function to dnn_trainer that lets you query the "steps without

progress" estimate.  I also renamed the get/set functions for the shrink amount
to have a consistent name and use the word "factor" instead of "amount".
parent b974a575
...@@ -417,7 +417,7 @@ namespace dlib ...@@ -417,7 +417,7 @@ namespace dlib
DLIB_CASSERT(min(schedule) > 0,""); DLIB_CASSERT(min(schedule) > 0,"");
set_learning_rate(schedule(0,0)); set_learning_rate(schedule(0,0));
set_min_learning_rate(min(schedule)); set_min_learning_rate(min(schedule));
set_learning_rate_shrink_amount(1); set_learning_rate_shrink_factor(1);
lr_schedule = matrix_cast<double>(reshape_to_column_vector(schedule)); lr_schedule = matrix_cast<double>(reshape_to_column_vector(schedule));
lr_schedule_pos = 0; lr_schedule_pos = 0;
} }
...@@ -443,7 +443,13 @@ namespace dlib ...@@ -443,7 +443,13 @@ namespace dlib
return iter_without_progress_thresh; return iter_without_progress_thresh;
} }
void set_learning_rate_shrink_amount ( unsigned long get_steps_without_progress (
) const
{
return steps_without_progress;
}
void set_learning_rate_shrink_factor (
double shrink double shrink
) )
{ {
...@@ -451,9 +457,10 @@ namespace dlib ...@@ -451,9 +457,10 @@ namespace dlib
wait_for_thread_to_pause(); wait_for_thread_to_pause();
lr_schedule.set_size(0); lr_schedule.set_size(0);
learning_rate_shrink = shrink; learning_rate_shrink = shrink;
steps_without_progress = 0;
} }
double get_learning_rate_shrink ( double get_learning_rate_shrink_factor (
) const ) const
{ {
return learning_rate_shrink; return learning_rate_shrink;
......
...@@ -71,7 +71,7 @@ namespace dlib ...@@ -71,7 +71,7 @@ namespace dlib
- #get_learning_rate() == 1e-2 - #get_learning_rate() == 1e-2
- #get_min_learning_rate() == 1e-5 - #get_min_learning_rate() == 1e-5
- #get_iterations_without_progress_threshold() == 2000 - #get_iterations_without_progress_threshold() == 2000
- #get_learning_rate_shrink() == 0.1 - #get_learning_rate_shrink_factor() == 0.1
- #get_learning_rate_schedule().size() == 0 - #get_learning_rate_schedule().size() == 0
- if (cuda_extra_devices.size() > 0) then - if (cuda_extra_devices.size() > 0) then
- This object will use multiple graphics cards to run the learning - This object will use multiple graphics cards to run the learning
...@@ -190,7 +190,7 @@ namespace dlib ...@@ -190,7 +190,7 @@ namespace dlib
ensures ensures
- During training via this->train(), this object will test if progress is - During training via this->train(), this object will test if progress is
still being made and if it isn't then it will reduce get_learning_rate() still being made and if it isn't then it will reduce get_learning_rate()
by setting it to get_learning_rate()*get_learning_rate_shrink(). by setting it to get_learning_rate()*get_learning_rate_shrink_factor().
However, it will not reduce it below get_min_learning_rate(). Once this However, it will not reduce it below get_min_learning_rate(). Once this
minimum learning rate is crossed the training will terminate. minimum learning rate is crossed the training will terminate.
- get_min_learning_rate() doesn't apply if you are using train_one_step(). - get_min_learning_rate() doesn't apply if you are using train_one_step().
...@@ -210,7 +210,7 @@ namespace dlib ...@@ -210,7 +210,7 @@ namespace dlib
- #get_learning_rate_schedule() == reshape_to_column_vector(schedule) - #get_learning_rate_schedule() == reshape_to_column_vector(schedule)
- #get_learning_rate() == schedule(0,0) - #get_learning_rate() == schedule(0,0)
- #get_min_learning_rate() == min(schedule) - #get_min_learning_rate() == min(schedule)
- #set_learning_rate_shrink_amount() == 1 - #set_learning_rate_shrink_factor() == 1
!*/ !*/
const matrix<double,0,1>& get_learning_rate_schedule ( const matrix<double,0,1>& get_learning_rate_schedule (
...@@ -232,6 +232,17 @@ namespace dlib ...@@ -232,6 +232,17 @@ namespace dlib
end of the schedule by checking if get_learning_rate() >= 0.6. end of the schedule by checking if get_learning_rate() >= 0.6.
!*/ !*/
unsigned long get_steps_without_progress (
) const;
/*!
ensures
- if (get_learning_rate_shrink_factor() != 1) then
- returns an estimate of how many mini-batches have executed without us
observing a statistically significant decrease in the training error.
- else
- returns 0
!*/
void set_iterations_without_progress_threshold ( void set_iterations_without_progress_threshold (
unsigned long thresh unsigned long thresh
); );
...@@ -252,7 +263,7 @@ namespace dlib ...@@ -252,7 +263,7 @@ namespace dlib
get_iterations_without_progress_threshold() mini-batch results and get_iterations_without_progress_threshold() mini-batch results and
applying the statistical test defined by the running_gradient object to applying the statistical test defined by the running_gradient object to
see if the training error is getting smaller. If it isn't being reduced see if the training error is getting smaller. If it isn't being reduced
then get_learning_rate() is made smaller by a factor of get_learning_rate_shrink(). then get_learning_rate() is made smaller by a factor of get_learning_rate_shrink_factor().
Therefore, get_iterations_without_progress_threshold() should always be Therefore, get_iterations_without_progress_threshold() should always be
set to something sensibly large so that this test can be done with set to something sensibly large so that this test can be done with
...@@ -261,27 +272,27 @@ namespace dlib ...@@ -261,27 +272,27 @@ namespace dlib
then shrink the learning rate". then shrink the learning rate".
!*/ !*/
void set_learning_rate_shrink_amount ( void set_learning_rate_shrink_factor (
double shrink double shrink
); );
/*! /*!
requires requires
- 0 < shrink && shrink <= 1 - 0 < shrink && shrink <= 1
ensures ensures
- #get_learning_rate_shrink() == shrink - #get_learning_rate_shrink_factor() == shrink
- #get_learning_rate_schedule().size() == 0 - #get_learning_rate_schedule().size() == 0
- This function blocks until all threads inside the dnn_trainer have - This function blocks until all threads inside the dnn_trainer have
stopped touching the net. stopped touching the net.
!*/ !*/
double get_learning_rate_shrink ( double get_learning_rate_shrink_factor (
) const; ) const;
/*! /*!
ensures ensures
- Whenever the training routine thinks it isn't making progress anymore it - Whenever the training routine thinks it isn't making progress anymore it
will reduce get_learning_rate() by multiplying it by get_learning_rate_shrink(). will reduce get_learning_rate() by multiplying it by get_learning_rate_shrink_factor().
- You can disable the automatic learning rate reduction by setting - You can disable the automatic learning rate reduction by setting
get_learning_rate_shrink() to 1. get_learning_rate_shrink_factor() to 1.
!*/ !*/
void be_verbose ( void be_verbose (
......
...@@ -224,9 +224,9 @@ int main(int argc, char** argv) try ...@@ -224,9 +224,9 @@ int main(int argc, char** argv) try
// it looks like the error hasn't decreased for the last 2000 iterations it // it looks like the error hasn't decreased for the last 2000 iterations it
// will automatically reduce the learning rate by 0.1. You can change these // will automatically reduce the learning rate by 0.1. You can change these
// default parameters to some other values by calling these functions. Or // default parameters to some other values by calling these functions. Or
// disable the automatic shrinking entirely by setting the shrink amount to 1. // disable the automatic shrinking entirely by setting the shrink factor to 1.
trainer.set_iterations_without_progress_threshold(2000); trainer.set_iterations_without_progress_threshold(2000);
trainer.set_learning_rate_shrink_amount(0.1); trainer.set_learning_rate_shrink_factor(0.1);
// The learning rate will start at 1e-3. // The learning rate will start at 1e-3.
trainer.set_learning_rate(1e-3); trainer.set_learning_rate(1e-3);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment