Commit a32f475c authored by Davis King's avatar Davis King

Added parallel_for_blocked_verbose() and also slightly improved

verbose time remaining estimation.
parent acdae5c2
...@@ -298,7 +298,7 @@ namespace dlib ...@@ -298,7 +298,7 @@ namespace dlib
obj(obj_), funct(funct_), pbar(end-begin) obj(obj_), funct(funct_), pbar(end-begin)
{ {
count = 0; count = 0;
pbar.print_status(-1); pbar.print_status(0);
} }
long count; long count;
...@@ -312,7 +312,17 @@ namespace dlib ...@@ -312,7 +312,17 @@ namespace dlib
(obj.*funct)(i); (obj.*funct)(i);
{ {
auto_mutex lock(m); auto_mutex lock(m);
pbar.print_status(count++); pbar.print_status(++count);
}
}
void operator()(long begin, long end)
{
(obj.*funct)(begin, end);
{
auto_mutex lock(m);
count += end-begin;
pbar.print_status(count);
} }
} }
}; };
...@@ -324,7 +334,7 @@ namespace dlib ...@@ -324,7 +334,7 @@ namespace dlib
parfor_verbose_helper2(const T& obj_, long begin, long end) : obj(obj_), pbar(end-begin) parfor_verbose_helper2(const T& obj_, long begin, long end) : obj(obj_), pbar(end-begin)
{ {
count = 0; count = 0;
pbar.print_status(-1); pbar.print_status(0);
} }
mutable long count; mutable long count;
...@@ -337,7 +347,17 @@ namespace dlib ...@@ -337,7 +347,17 @@ namespace dlib
obj(i); obj(i);
{ {
auto_mutex lock(m); auto_mutex lock(m);
pbar.print_status(count++); pbar.print_status(++count);
}
}
void operator()(long begin, long end) const
{
obj(begin, end);
{
auto_mutex lock(m);
count += end-begin;
pbar.print_status(count);
} }
} }
}; };
...@@ -439,6 +459,104 @@ namespace dlib ...@@ -439,6 +459,104 @@ namespace dlib
parallel_for(num_threads, begin, end, helper, chunks_per_thread); parallel_for(num_threads, begin, end, helper, chunks_per_thread);
} }
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked_verbose (
thread_pool& tp,
long begin,
long end,
T& obj,
void (T::*funct)(long,long),
long chunks_per_thread = 8
)
{
// make sure requires clause is not broken
DLIB_ASSERT(begin <= end && chunks_per_thread > 0,
"\t void parallel_for_blocked_verbose()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t begin: " << begin
<< "\n\t end: " << end
<< "\n\t chunks_per_thread: " << chunks_per_thread
);
impl::parfor_verbose_helper<T> helper(obj, funct, begin, end);
parallel_for_blocked(tp, begin, end, helper, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked_verbose (
unsigned long num_threads,
long begin,
long end,
T& obj,
void (T::*funct)(long,long),
long chunks_per_thread = 8
)
{
// make sure requires clause is not broken
DLIB_ASSERT(begin <= end && chunks_per_thread > 0,
"\t void parallel_for_blocked_verbose()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t begin: " << begin
<< "\n\t end: " << end
<< "\n\t chunks_per_thread: " << chunks_per_thread
);
impl::parfor_verbose_helper<T> helper(obj, funct, begin, end);
parallel_for_blocked(num_threads, begin, end, helper, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked_verbose (
thread_pool& tp,
long begin,
long end,
const T& funct,
long chunks_per_thread = 8
)
{
// make sure requires clause is not broken
DLIB_ASSERT(begin <= end && chunks_per_thread > 0,
"\t void parallel_for_blocked_verbose()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t begin: " << begin
<< "\n\t end: " << end
<< "\n\t chunks_per_thread: " << chunks_per_thread
);
impl::parfor_verbose_helper2<T> helper(funct, begin, end);
parallel_for_blocked(tp, begin, end, helper, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked_verbose (
unsigned long num_threads,
long begin,
long end,
const T& funct,
long chunks_per_thread = 8
)
{
// make sure requires clause is not broken
DLIB_ASSERT(begin <= end && chunks_per_thread > 0,
"\t void parallel_for_blocked_verbose()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t begin: " << begin
<< "\n\t end: " << end
<< "\n\t chunks_per_thread: " << chunks_per_thread
);
impl::parfor_verbose_helper2<T> helper(funct, begin, end);
parallel_for_blocked(num_threads, begin, end, helper, chunks_per_thread);
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
} }
......
...@@ -24,8 +24,8 @@ namespace dlib ...@@ -24,8 +24,8 @@ namespace dlib
- begin <= end - begin <= end
- chunks_per_thread > 0 - chunks_per_thread > 0
ensures ensures
- This is a convenience function for submitting a block of jobs to a - This is a convenience function for submitting a block of jobs to a thread_pool.
thread_pool. In particular, given the range [begin, end), this function will In particular, given the half open range [begin, end), this function will
split the range into approximately tp.num_threads_in_pool()*chunks_per_thread split the range into approximately tp.num_threads_in_pool()*chunks_per_thread
blocks, which it will then submit to the thread_pool. The given thread_pool blocks, which it will then submit to the thread_pool. The given thread_pool
will then call (obj.*funct)() on each of the subranges. will then call (obj.*funct)() on each of the subranges.
...@@ -38,7 +38,8 @@ namespace dlib ...@@ -38,7 +38,8 @@ namespace dlib
- [begin[n], end[n]) - [begin[n], end[n])
Then parallel_for_blocked() submits each of these subranges to tp for Then parallel_for_blocked() submits each of these subranges to tp for
processing such that (obj.*funct)(begin[i], end[i]) is invoked for all valid processing such that (obj.*funct)(begin[i], end[i]) is invoked for all valid
values of i. values of i. Moreover, the subranges are non-overlapping and completely
cover the total range of [begin, end).
- This function will not perform any memory allocations or create any system - This function will not perform any memory allocations or create any system
resources such as mutex objects. resources such as mutex objects.
!*/ !*/
...@@ -299,6 +300,89 @@ namespace dlib ...@@ -299,6 +300,89 @@ namespace dlib
parallel for loop. parallel for loop.
!*/ !*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked_verbose (
thread_pool& tp,
long begin,
long end,
T& obj,
void (T::*funct)(long,long),
long chunks_per_thread = 8
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is identical to the parallel_for_blocked() routine defined
above except that it will print messages to cout showing the progress in
executing the parallel for loop.
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked_verbose (
unsigned long num_threads,
long begin,
long end,
T& obj,
void (T::*funct)(long,long),
long chunks_per_thread = 8
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is identical to the parallel_for_blocked() routine defined
above except that it will print messages to cout showing the progress in
executing the parallel for loop.
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked_verbose (
thread_pool& tp,
long begin,
long end,
const T& funct,
long chunks_per_thread = 8
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is identical to the parallel_for_blocked() routine defined
above except that it will print messages to cout showing the progress in
executing the parallel for loop.
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked_verbose (
unsigned long num_threads,
long begin,
long end,
const T& funct,
long chunks_per_thread = 8
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is identical to the parallel_for_blocked() routine defined
above except that it will print messages to cout showing the progress in
executing the parallel for loop.
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment