Commit da52cbb8 authored by Davis King's avatar Davis King

Added parallel_for() and parallel_for_blocked().

parent d2625a0d
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "threads/thread_function_extension.h" #include "threads/thread_function_extension.h"
#include "threads/thread_pool_extension.h" #include "threads/thread_pool_extension.h"
#include "threads/read_write_mutex_extension.h" #include "threads/read_write_mutex_extension.h"
#include "threads/parallel_for_extension.h"
#endif // DLIB_THREADs_ #endif // DLIB_THREADs_
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_PARALLEL_FoR_H__
#define DLIB_PARALLEL_FoR_H__
#include "parallel_for_extension_abstract.h"
#include "thread_pool_extension.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace impl
{
template <typename T>
class helper_parallel_for
{
public:
helper_parallel_for (
T& obj_,
void (T::*funct_)(long)
) :
obj(obj_),
funct(funct_)
{}
T& obj;
void (T::*funct)(long);
void process_block (long begin, long end)
{
for (long i = begin; i < end; ++i)
(obj.*funct)(i);
}
};
template <typename T>
class helper_parallel_for_funct
{
public:
helper_parallel_for_funct (
const T& funct_
) : funct(funct_) {}
const T& funct;
void run(long i)
{
funct(i);
}
};
template <typename T>
class helper_parallel_for_funct2
{
public:
helper_parallel_for_funct2 (
const T& funct_
) : funct(funct_) {}
const T& funct;
void run(long begin, long end)
{
funct(begin, end);
}
};
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked (
thread_pool& tp,
long begin,
long end,
T& obj,
void (T::*funct)(long, long),
long chunks_per_thread = 4
)
{
if (tp.num_threads_in_pool() != 0)
{
const long num = end-begin;
const long num_workers = static_cast<long>(tp.num_threads_in_pool());
// How many samples to process in a single task (aim for chunks_per_thread jobs per worker)
const long block_size = std::max(1L, num/(num_workers*chunks_per_thread));
for (long i = 0; i < num; i+=block_size)
{
tp.add_task(obj, funct, i, std::min(i+block_size, num));
}
tp.wait_for_all_tasks();
}
else
{
// Since there aren't any threads in the pool we might as well just invoke
// the function directly since that's all the thread_pool object would do.
// But doing it ourselves skips a mutex lock.
(obj.*funct)(begin, end);
}
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked (
unsigned long num_threads,
long begin,
long end,
T& obj,
void (T::*funct)(long, long),
long chunks_per_thread = 4
)
{
thread_pool tp(num_threads);
parallel_for_blocked(tp, begin, end, obj, funct, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked (
thread_pool& tp,
long begin,
long end,
const T& funct,
long chunks_per_thread = 4
)
{
impl::helper_parallel_for_funct2<T> helper(funct);
parallel_for_blocked(tp, begin, end, helper, &impl::helper_parallel_for_funct2<T>::run, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked (
unsigned long num_threads,
long begin,
long end,
const T& funct,
long chunks_per_thread = 4
)
{
thread_pool tp(num_threads);
parallel_for_blocked(tp, begin, end, funct, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for (
thread_pool& tp,
long begin,
long end,
T& obj,
void (T::*funct)(long),
long chunks_per_thread = 4
)
{
impl::helper_parallel_for<T> helper(obj, funct);
parallel_for_blocked(tp, begin, end, helper, &impl::helper_parallel_for<T>::process_block, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for (
unsigned long num_threads,
long begin,
long end,
T& obj,
void (T::*funct)(long),
long chunks_per_thread = 4
)
{
thread_pool tp(num_threads);
parallel_for(tp, begin, end, obj, funct, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for (
thread_pool& tp,
long begin,
long end,
const T& funct,
long chunks_per_thread = 4
)
{
impl::helper_parallel_for_funct<T> helper(funct);
parallel_for(tp, begin, end, helper, &impl::helper_parallel_for_funct<T>::run, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for (
unsigned long num_threads,
long begin,
long end,
const T& funct,
long chunks_per_thread = 4
)
{
thread_pool tp(num_threads);
parallel_for(tp, begin, end, funct, chunks_per_thread);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_PARALLEL_FoR_H__
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_PARALLEL_FoR_ABSTRACT_H__
#ifdef DLIB_PARALLEL_FoR_ABSTRACT_H__
#include "thread_pool_extension_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked (
thread_pool& tp,
long begin,
long end,
T& obj,
void (T::*funct)(long, long),
long chunks_per_thread = 4
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This is a convenience function for submitting a block of jobs to a
thread_pool. In particular, given the range [begin, end), this function will
split the range into approximately tp.num_threads_in_pool()*chunks_per_thread
blocks, which it will then submit to the thread_pool. The given thread_pool
will then call (obj.*funct)() on each of the subranges.
- To be precise, suppose we have broken the range [begin, end) into the
following subranges:
- [begin[0], end[0])
- [begin[1], end[1])
- [begin[2], end[2])
...
- [begin[n], end[n])
Then parallel_for_blocked() submits each of these subranges to tp for
processing such that (obj.*funct)(begin[i], end[i]) is invoked for all valid
values of i.
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked (
unsigned long num_threads,
long begin,
long end,
T& obj,
void (T::*funct)(long, long),
long chunks_per_thread = 4
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is equivalent to the following block of code:
thread_pool tp(num_threads);
parallel_for_blocked(tp, begin, end, obj, funct, chunks_per_thread);
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked (
thread_pool& tp,
long begin,
long end,
const T& funct,
long chunks_per_thread = 4
);
/*!
requires
- chunks_per_thread > 0
- begin <= end
ensures
- This is a convenience function for submitting a block of jobs to a
thread_pool. In particular, given the range [begin, end), this function will
split the range into approximately tp.num_threads_in_pool()*chunks_per_thread
blocks, which it will then submit to the thread_pool. The given thread_pool
will then call funct() on each of the subranges.
- To be precise, suppose we have broken the range [begin, end) into the
following subranges:
- [begin[0], end[0])
- [begin[1], end[1])
- [begin[2], end[2])
...
- [begin[n], end[n])
Then parallel_for_blocked() submits each of these subranges to tp for
processing such that funct(begin[i], end[i]) is invoked for all valid values
of i.
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for_blocked (
unsigned long num_threads,
long begin,
long end,
const T& funct,
long chunks_per_thread = 4
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is equivalent to the following block of code:
thread_pool tp(num_threads);
parallel_for_blocked(tp, begin, end, funct, chunks_per_thread);
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for (
thread_pool& tp,
long begin,
long end,
T& obj,
void (T::*funct)(long),
long chunks_per_thread = 4
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is equivalent to the following function call:
parallel_for_blocked(tp, begin, end, [&](long begin_sub, long end_sub)
{
for (long i = begin_sub; i < end_sub; ++i)
(obj.*funct)(i);
}, chunks_per_thread);
- Therefore, this routine invokes (obj.*funct)(i) for all i in the range
[begin, end). However, it does so using tp.num_threads_in_pool() parallel
threads.
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for (
unsigned long num_threads,
long begin,
long end,
T& obj,
void (T::*funct)(long),
long chunks_per_thread = 4
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is equivalent to the following block of code:
thread_pool tp(num_threads);
parallel_for(tp, begin, end, obj, funct, chunks_per_thread);
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for (
thread_pool& tp,
long begin,
long end,
const T& funct,
long chunks_per_thread = 4
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is equivalent to the following function call:
parallel_for_blocked(tp, begin, end, [&](long begin_sub, long end_sub)
{
for (long i = begin_sub; i < end_sub; ++i)
funct(i);
}, chunks_per_thread);
- Therefore, this routine invokes funct(i) for all i in the range [begin, end).
However, it does so using tp.num_threads_in_pool() parallel threads.
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void parallel_for (
unsigned long num_threads,
long begin,
long end,
const T& funct,
long chunks_per_thread = 4
);
/*!
requires
- begin <= end
- chunks_per_thread > 0
ensures
- This function is equivalent to the following block of code:
thread_pool tp(num_threads);
parallel_for(tp, begin, end, funct, chunks_per_thread);
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_PARALLEL_FoR_ABSTRACT_H__
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment