Commit 491cbe1a authored by Davis King's avatar Davis King

Refactored and greatly simplified the BSP implementation. This has

fixed a few subtle race conditions and now the tool seems to work
robustly.
parent aa230458
This diff is collapsed.
...@@ -19,7 +19,7 @@ namespace dlib ...@@ -19,7 +19,7 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
namespace impl namespace impl1
{ {
struct bsp_con struct bsp_con
{ {
...@@ -71,6 +71,82 @@ namespace dlib ...@@ -71,6 +71,82 @@ namespace dlib
map_id_to_con& cons, map_id_to_con& cons,
unsigned short port unsigned short port
); );
struct msg_data
{
shared_ptr<std::string> data;
unsigned long sender_id;
char msg_type;
};
class thread_safe_deque
{
public:
thread_safe_deque() : sig(class_mutex),disabled(false) {}
~thread_safe_deque()
{
disable();
}
void disable()
{
auto_mutex lock(class_mutex);
disabled = true;
sig.broadcast();
}
unsigned long size() const { return data.size(); }
void push_front( const msg_data& item)
{
auto_mutex lock(class_mutex);
data.push_front(item);
sig.signal();
}
void push_and_consume( msg_data& item)
{
auto_mutex lock(class_mutex);
data.push_back(item);
// do this here so that we don't have to worry about different threads touching the shared_ptr.
item.data.reset();
sig.signal();
}
bool pop (
msg_data& item
)
/*!
ensures
- if (this function returns true) then
- #item == the next thing from the queue
- else
- this object is disabled
!*/
{
auto_mutex lock(class_mutex);
while (data.size() == 0 && !disabled)
sig.wait();
if (disabled)
return false;
item = data.front();
data.pop_front();
return true;
}
private:
std::deque<msg_data> data;
dlib::mutex class_mutex;
dlib::signaler sig;
bool disabled;
};
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -159,7 +235,7 @@ namespace dlib ...@@ -159,7 +235,7 @@ namespace dlib
bsp_context( bsp_context(
unsigned long node_id_, unsigned long node_id_,
impl::map_id_to_con& cons_ impl1::map_id_to_con& cons_
); );
void close_all_connections_gracefully(); void close_all_connections_gracefully();
...@@ -175,28 +251,16 @@ namespace dlib ...@@ -175,28 +251,16 @@ namespace dlib
unsigned long& sending_node_id unsigned long& sending_node_id
); );
void send_to_master_node (
char msg
);
void notify_everyone_if_all_blocked( void send_byte (
char val,
unsigned long target_node_id
); );
/*!
requires
- class_mutex is locked
ensures
- sends out notifications to all the nodes if we are all blocked on receive. This
will cause all receive calls to unblock and return false.
!*/
void read_thread ( void broadcast_byte (
impl::bsp_con* con, char val
unsigned long sender_id
); );
void check_for_errors();
void send_data( void send_data(
const std::string& item, const std::string& item,
unsigned long target_node_id unsigned long target_node_id
...@@ -211,18 +275,14 @@ namespace dlib ...@@ -211,18 +275,14 @@ namespace dlib
rmutex class_mutex; // used to lock any class members touched from more than one thread.
std::string error_message;
bool read_thread_terminated_improperly; // true if any of our connections goes down.
unsigned long outstanding_messages; unsigned long outstanding_messages;
unsigned long num_waiting_nodes; unsigned long num_waiting_nodes;
unsigned long num_terminated_nodes; unsigned long num_terminated_nodes;
rsignaler buf_not_empty; // used to signal when msg_buffer isn't empty
rsignaler terminated_signal;
std::deque<shared_ptr<std::string> > msg_buffer;
std::deque<unsigned long> msg_sender_id;
impl::map_id_to_con& _cons; impl1::thread_safe_deque msg_buffer;
impl1::map_id_to_con& _cons;
const unsigned long _node_id; const unsigned long _node_id;
array<scoped_ptr<thread_function> > threads; array<scoped_ptr<thread_function> > threads;
...@@ -366,7 +426,7 @@ namespace dlib ...@@ -366,7 +426,7 @@ namespace dlib
funct_type& funct funct_type& funct
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
const unsigned long node_id = 0; const unsigned long node_id = 0;
connect_all(cons, hosts, node_id); connect_all(cons, hosts, node_id);
send_out_connection_orders(cons, hosts); send_out_connection_orders(cons, hosts);
...@@ -387,7 +447,7 @@ namespace dlib ...@@ -387,7 +447,7 @@ namespace dlib
ARG1 arg1 ARG1 arg1
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
const unsigned long node_id = 0; const unsigned long node_id = 0;
connect_all(cons, hosts, node_id); connect_all(cons, hosts, node_id);
send_out_connection_orders(cons, hosts); send_out_connection_orders(cons, hosts);
...@@ -410,7 +470,7 @@ namespace dlib ...@@ -410,7 +470,7 @@ namespace dlib
ARG2 arg2 ARG2 arg2
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
const unsigned long node_id = 0; const unsigned long node_id = 0;
connect_all(cons, hosts, node_id); connect_all(cons, hosts, node_id);
send_out_connection_orders(cons, hosts); send_out_connection_orders(cons, hosts);
...@@ -435,7 +495,7 @@ namespace dlib ...@@ -435,7 +495,7 @@ namespace dlib
ARG3 arg3 ARG3 arg3
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
const unsigned long node_id = 0; const unsigned long node_id = 0;
connect_all(cons, hosts, node_id); connect_all(cons, hosts, node_id);
send_out_connection_orders(cons, hosts); send_out_connection_orders(cons, hosts);
...@@ -462,7 +522,7 @@ namespace dlib ...@@ -462,7 +522,7 @@ namespace dlib
ARG4 arg4 ARG4 arg4
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
const unsigned long node_id = 0; const unsigned long node_id = 0;
connect_all(cons, hosts, node_id); connect_all(cons, hosts, node_id);
send_out_connection_orders(cons, hosts); send_out_connection_orders(cons, hosts);
...@@ -483,7 +543,7 @@ namespace dlib ...@@ -483,7 +543,7 @@ namespace dlib
funct_type& funct funct_type& funct
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
unsigned long node_id; unsigned long node_id;
listen_and_connect_all(node_id, cons, listening_port); listen_and_connect_all(node_id, cons, listening_port);
bsp_context obj(node_id, cons); bsp_context obj(node_id, cons);
...@@ -503,7 +563,7 @@ namespace dlib ...@@ -503,7 +563,7 @@ namespace dlib
ARG1 arg1 ARG1 arg1
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
unsigned long node_id; unsigned long node_id;
listen_and_connect_all(node_id, cons, listening_port); listen_and_connect_all(node_id, cons, listening_port);
bsp_context obj(node_id, cons); bsp_context obj(node_id, cons);
...@@ -525,7 +585,7 @@ namespace dlib ...@@ -525,7 +585,7 @@ namespace dlib
ARG2 arg2 ARG2 arg2
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
unsigned long node_id; unsigned long node_id;
listen_and_connect_all(node_id, cons, listening_port); listen_and_connect_all(node_id, cons, listening_port);
bsp_context obj(node_id, cons); bsp_context obj(node_id, cons);
...@@ -549,7 +609,7 @@ namespace dlib ...@@ -549,7 +609,7 @@ namespace dlib
ARG3 arg3 ARG3 arg3
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
unsigned long node_id; unsigned long node_id;
listen_and_connect_all(node_id, cons, listening_port); listen_and_connect_all(node_id, cons, listening_port);
bsp_context obj(node_id, cons); bsp_context obj(node_id, cons);
...@@ -575,7 +635,7 @@ namespace dlib ...@@ -575,7 +635,7 @@ namespace dlib
ARG4 arg4 ARG4 arg4
) )
{ {
impl::map_id_to_con cons; impl1::map_id_to_con cons;
unsigned long node_id; unsigned long node_id;
listen_and_connect_all(node_id, cons, listening_port); listen_and_connect_all(node_id, cons, listening_port);
bsp_context obj(node_id, cons); bsp_context obj(node_id, cons);
......
...@@ -15,6 +15,11 @@ namespace dlib ...@@ -15,6 +15,11 @@ namespace dlib
{ {
/*! /*!
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
THREAD SAFETY
This object is not thread-safe. This means you must serialize all access
to it using an appropriate mutex or other synchronization mechanism if it
is to be accessed from multiple threads.
!*/ !*/
public: public:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment