Commit ba0f7c5c authored by Davis King's avatar Davis King

Added a function to dnn_trainer that lets you query the "steps without

progress" estimate.  I also renamed the get/set functions for the shrink amount
to have a consistent name and use the word "factor" instead of "amount".
parent b974a575
......@@ -417,7 +417,7 @@ namespace dlib
DLIB_CASSERT(min(schedule) > 0,"");
set_learning_rate(schedule(0,0));
set_min_learning_rate(min(schedule));
set_learning_rate_shrink_amount(1);
set_learning_rate_shrink_factor(1);
lr_schedule = matrix_cast<double>(reshape_to_column_vector(schedule));
lr_schedule_pos = 0;
}
......@@ -443,7 +443,13 @@ namespace dlib
return iter_without_progress_thresh;
}
void set_learning_rate_shrink_amount (
unsigned long get_steps_without_progress (
) const
{
return steps_without_progress;
}
void set_learning_rate_shrink_factor (
double shrink
)
{
......@@ -451,9 +457,10 @@ namespace dlib
wait_for_thread_to_pause();
lr_schedule.set_size(0);
learning_rate_shrink = shrink;
steps_without_progress = 0;
}
double get_learning_rate_shrink (
double get_learning_rate_shrink_factor (
) const
{
return learning_rate_shrink;
......
......@@ -71,7 +71,7 @@ namespace dlib
- #get_learning_rate() == 1e-2
- #get_min_learning_rate() == 1e-5
- #get_iterations_without_progress_threshold() == 2000
- #get_learning_rate_shrink() == 0.1
- #get_learning_rate_shrink_factor() == 0.1
- #get_learning_rate_schedule().size() == 0
- if (cuda_extra_devices.size() > 0) then
- This object will use multiple graphics cards to run the learning
......@@ -190,7 +190,7 @@ namespace dlib
ensures
- During training via this->train(), this object will test if progress is
still being made and if it isn't then it will reduce get_learning_rate()
by setting it to get_learning_rate()*get_learning_rate_shrink().
by setting it to get_learning_rate()*get_learning_rate_shrink_factor().
However, it will not reduce it below get_min_learning_rate(). Once this
minimum learning rate is crossed the training will terminate.
- get_min_learning_rate() doesn't apply if you are using train_one_step().
......@@ -210,7 +210,7 @@ namespace dlib
- #get_learning_rate_schedule() == reshape_to_column_vector(schedule)
- #get_learning_rate() == schedule(0,0)
- #get_min_learning_rate() == min(schedule)
- #set_learning_rate_shrink_amount() == 1
- #set_learning_rate_shrink_factor() == 1
!*/
const matrix<double,0,1>& get_learning_rate_schedule (
......@@ -232,6 +232,17 @@ namespace dlib
end of the schedule by checking if get_learning_rate() >= 0.6.
!*/
unsigned long get_steps_without_progress (
) const;
/*!
ensures
- if (get_learning_rate_shrink_factor() != 1) then
- returns an estimate of how many mini-batches have executed without us
observing a statistically significant decrease in the training error.
- else
- returns 0
!*/
void set_iterations_without_progress_threshold (
unsigned long thresh
);
......@@ -252,7 +263,7 @@ namespace dlib
get_iterations_without_progress_threshold() mini-batch results and
applying the statistical test defined by the running_gradient object to
see if the training error is getting smaller. If it isn't being reduced
then get_learning_rate() is made smaller by a factor of get_learning_rate_shrink().
then get_learning_rate() is made smaller by a factor of get_learning_rate_shrink_factor().
Therefore, get_iterations_without_progress_threshold() should always be
set to something sensibly large so that this test can be done with
......@@ -261,27 +272,27 @@ namespace dlib
then shrink the learning rate".
!*/
void set_learning_rate_shrink_amount (
void set_learning_rate_shrink_factor (
double shrink
);
/*!
requires
- 0 < shrink && shrink <= 1
ensures
- #get_learning_rate_shrink() == shrink
- #get_learning_rate_shrink_factor() == shrink
- #get_learning_rate_schedule().size() == 0
- This function blocks until all threads inside the dnn_trainer have
stopped touching the net.
!*/
double get_learning_rate_shrink (
double get_learning_rate_shrink_factor (
) const;
/*!
ensures
- Whenever the training routine thinks it isn't making progress anymore it
will reduce get_learning_rate() by multiplying it by get_learning_rate_shrink().
will reduce get_learning_rate() by multiplying it by get_learning_rate_shrink_factor().
- You can disable the automatic learning rate reduction by setting
get_learning_rate_shrink() to 1.
get_learning_rate_shrink_factor() to 1.
!*/
void be_verbose (
......
......@@ -224,9 +224,9 @@ int main(int argc, char** argv) try
// it looks like the error hasn't decreased for the last 2000 iterations it
// will automatically reduce the learning rate by 0.1. You can change these
// default parameters to some other values by calling these functions. Or
// disable the automatic shrinking entirely by setting the shrink amount to 1.
// disable the automatic shrinking entirely by setting the shrink factor to 1.
trainer.set_iterations_without_progress_threshold(2000);
trainer.set_learning_rate_shrink_amount(0.1);
trainer.set_learning_rate_shrink_factor(0.1);
// The learning rate will start at 1e-3.
trainer.set_learning_rate(1e-3);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment