Commit 37278e99 authored by Davis King's avatar Davis King

cleaned up trainer a bit

parent 6d281c31
......@@ -1420,15 +1420,24 @@ namespace dlib
typedef typename net_type::label_type label_type;
typedef typename net_type::input_type input_type;
dnn_trainer()
{}
dnn_trainer(
)
{
init();
}
explicit dnn_trainer(const net_type& net_) : net(net_) {}
explicit dnn_trainer(const net_type& net_) : net(net_)
{
init();
}
dnn_trainer(
const net_type& net_,
const solver_type& solver_
) : net(net_), solvers(solver_) {}
) : net(net_), solvers(solver_)
{
init();
}
const net_type& get_net (
) const { return net; }
......@@ -1447,6 +1456,29 @@ namespace dlib
solvers = solver_;
}
unsigned long get_mini_batch_size (
) const { return mini_batch_size; }
void set_mini_batch_size (
unsigned long batch_size
)
{
DLIB_CASSERT(batch_size > 0,"");
mini_batch_size = batch_size;
}
unsigned long get_num_epochs (
) const { return num_epochs; }
void set_num_epochs (
unsigned long num
) const
{
DLIB_CASSERT(num > 0,"");
num_epochs = num;
}
const sstack<solver_type,net_type::num_layers>& get_solvers (
) const { return solvers; }
......@@ -1458,19 +1490,50 @@ namespace dlib
const std::vector<label_type>& labels
)
{
DLIB_CASSERT(data.size() == labels.size(), "");
DLIB_CASSERT(data.size() == labels.size() && data.size() > 0, "");
resizable_tensor t1, t2;
const int batch_size = 11;
for (int iter = 0; iter < 300; ++iter)
for (unsigned long epoch_iteration = 0; epoch_iteration < num_epochs; ++epoch_iteration)
{
for (unsigned long i = 0; i < data.size(); i+=batch_size)
unsigned long j = 0;
// Load two tensors worth of data at once so we can overlap the computation
// and data transfer between the host and the device.
if (j < data.size())
{
net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
j += mini_batch_size;
}
if (j < data.size())
{
net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
j += mini_batch_size;
}
unsigned long i = 0;
while (i < data.size())
{
// TODO, move the contents of update() here and do the alternating tensor
// loading thing to hide GPU transfer latency.
std::cout << "loss: "<<net.update(data.begin()+i,
data.begin()+std::min(i+batch_size,data.size()),
labels.begin()+i,
solvers) << std::endl;
net.update(t1, labels.begin()+i, solvers);
i += mini_batch_size;
if (j < data.size())
{
net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
j += mini_batch_size;
}
if (i < data.size())
{
net.update(t2, labels.begin()+i, solvers);
i += mini_batch_size;
if (j < data.size())
{
net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
j += mini_batch_size;
}
}
}
}
return net;
......@@ -1480,20 +1543,53 @@ namespace dlib
const std::vector<input_type>& data
)
{
DLIB_CASSERT(data.size() > 0, "");
const bool has_unsupervised_loss = std::is_same<no_label_type, label_type>::value;
static_assert(has_unsupervised_loss,
"You can only call this version of train() when using an unsupervised loss.");
const int batch_size = 10;
for (int iter = 0; iter < 300; ++iter)
resizable_tensor t1, t2;
for (unsigned long epoch_iteration = 0; epoch_iteration < num_epochs; ++epoch_iteration)
{
for (unsigned long i = 0; i < data.size(); i+=batch_size)
unsigned long j = 0;
// Load two tensors worth of data at once so we can overlap the computation
// and data transfer between the host and the device.
if (j < data.size())
{
// TODO, move the contents of update() here and do the alternating tensor
// loading thing to hide GPU transfer latency.
std::cout << "loss: "<<net.update(data.begin()+i,
data.begin()+std::min(i+batch_size,data.size()),
solvers) << std::endl;
net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
j += mini_batch_size;
}
if (j < data.size())
{
net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
j += mini_batch_size;
}
unsigned long i = 0;
while (i < data.size())
{
net.update(t1, solvers);
i += mini_batch_size;
if (j < data.size())
{
net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
j += mini_batch_size;
}
if (i < data.size())
{
net.update(t2, solvers);
i += mini_batch_size;
if (j < data.size())
{
net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
j += mini_batch_size;
}
}
}
}
return net;
......@@ -1501,6 +1597,15 @@ namespace dlib
private:
void init()
{
num_epochs = 300;
mini_batch_size = 11;
}
unsigned long num_epochs;
unsigned long mini_batch_size;
net_type net;
sstack<solver_type,net_type::num_layers> solvers;
};
......
......@@ -975,6 +975,20 @@ namespace dlib
sstack<solver_type,net_type::num_layers>& get_solvers (
);
unsigned long get_mini_batch_size (
) const;
void set_mini_batch_size (
unsigned long batch_size
);
unsigned long get_num_epochs (
) const;
void set_num_epochs (
unsigned long num
) const;
const net_type& train (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment