Commit ecc00ce7 authored by Davis King's avatar Davis King

merged

parents 573fce15 99ce564b
......@@ -240,7 +240,7 @@ namespace dlib
return;
CHECK_CUDNN(cudnnSetTensor(context(),
descriptor(t),
t.device(),
t.device_write_only(),
&value));
}
......
......@@ -95,6 +95,13 @@ namespace dlib
return data_host.get();
}
float* host_write_only()
{
host_current = true;
device_current = false;
return data_host.get();
}
const float* device() const
{
#ifndef DLIB_USE_CUDA
......@@ -116,6 +123,18 @@ namespace dlib
return data_device.get();
}
float* device_write_only()
{
#ifndef DLIB_USE_CUDA
DLIB_CASSERT(false, "CUDA NOT ENABLED");
#endif
wait_for_transfer_to_finish();
host_current = false;
device_current = true;
device_in_use = true;
return data_device.get();
}
bool host_ready (
) const { return host_current; }
......
......@@ -125,6 +125,20 @@ namespace dlib
calling host().
!*/
float* host_write_only(
);
/*!
ensures
- This function returns the same pointer as host(), except that it never
performs a device to host memory copy. Instead, it immediately marks the
device side data as out of date, effectively discarding it. Therefore,
the values in the data pointed to by host_write_only() are undefined and
you should only call host_write_only() if you are going to assign to
every memory location in the returned memory block.
- #host_ready() == true
- #device_ready() == false
!*/
const float* device(
) const;
/*!
......@@ -154,6 +168,23 @@ namespace dlib
- #device_ready() == true
!*/
float* device_write_only(
);
/*!
requires
- DLIB_USE_CUDA is #defined
ensures
- This function returns the same pointer as device(), except that it never
performs a host to device memory copy. Instead, it immediately marks the
host side data as out of date, effectively discarding it. Therefore, the
values in the data pointed to by device_write_only() are undefined and
you should only call device_write_only() if you are going to assign to
every memory location in the returned memory block.
- #host_ready() == false
- #device_ready() == true
!*/
size_t size(
) const;
/*!
......
......@@ -68,7 +68,7 @@ namespace dlib
const double scale = 1.0/output_tensor.num_samples();
double loss = 0;
const float* out_data = output_tensor.host();
float* g = grad.host();
float* g = grad.host_write_only();
for (long i = 0; i < output_tensor.num_samples(); ++i)
{
const float y = *truth++;
......@@ -79,6 +79,10 @@ namespace dlib
loss += scale*temp;
g[i] = -scale*y;
}
else
{
g[i] = 0;
}
}
return loss;
}
......
......@@ -8,6 +8,7 @@
#include "../matrix.h"
#include "cudnn_dlibapi.h"
#include "gpu_data.h"
#include "../byte_orderer.h"
#include <memory>
namespace dlib
......@@ -46,8 +47,10 @@ namespace dlib
virtual const float* host() const = 0;
virtual float* host() = 0;
virtual float* host_write_only() = 0;
virtual const float* device() const = 0;
virtual float* device() = 0;
virtual float* device_write_only() = 0;
tensor& operator= (float val)
{
......@@ -62,8 +65,9 @@ namespace dlib
return *this;
}
#endif
for (auto& d : *this)
d = val;
auto d = host_write_only();
for (size_t i = 0; i < size(); ++i)
d[i] = val;
return *this;
}
......@@ -95,7 +99,7 @@ namespace dlib
static_assert((is_same_type<float, typename EXP::type>::value == true),
"To assign a matrix to a tensor the matrix must contain float values");
set_ptrm(host(), m_n, m_nr*m_nc*m_k) = item;
set_ptrm(host_write_only(), m_n, m_nr*m_nc*m_k) = item;
return *this;
}
......@@ -279,8 +283,10 @@ namespace dlib
virtual const float* host() const { return data_instance.host(); }
virtual float* host() { return data_instance.host(); }
virtual float* host_write_only() { return data_instance.host_write_only(); }
virtual const float* device() const { return data_instance.device(); }
virtual float* device() { return data_instance.device(); }
virtual float* device_write_only() { return data_instance.device_write_only(); }
void clear(
)
......@@ -373,21 +379,33 @@ namespace dlib
inline void serialize(const tensor& item, std::ostream& out)
{
int version = 1;
int version = 2;
serialize(version, out);
serialize(item.num_samples(), out);
serialize(item.k(), out);
serialize(item.nr(), out);
serialize(item.nc(), out);
for (auto& d : item)
serialize(d, out);
byte_orderer bo;
auto sbuf = out.rdbuf();
for (auto d : item)
{
// Write out our data as 4byte little endian IEEE floats rather than using
// dlib's default float serialization. We do this because it will result in
// more compact outputs. It's slightly less portable but it seems doubtful
// that any CUDA enabled platform isn't going to use IEEE floats. But if one
// does we can just update the serialization code here to handle it if such a
// platform is encountered.
bo.host_to_little(d);
static_assert(sizeof(d)==4, "This serialization code assumes we are writing 4 byte floats");
sbuf->sputn((char*)&d, sizeof(d));
}
}
inline void deserialize(resizable_tensor& item, std::istream& in)
{
int version;
deserialize(version, in);
if (version != 1)
if (version != 2)
throw serialization_error("Unexpected version found while deserializing dlib::resizable_tensor.");
long num_samples=0, k=0, nr=0, nc=0;
......@@ -396,8 +414,18 @@ namespace dlib
deserialize(nr, in);
deserialize(nc, in);
item.set_size(num_samples, k, nr, nc);
byte_orderer bo;
auto sbuf = in.rdbuf();
for (auto& d : item)
deserialize(d, in);
{
static_assert(sizeof(d)==4, "This serialization code assumes we are writing 4 byte floats");
if (sbuf->sgetn((char*)&d,sizeof(d)) != sizeof(d))
{
in.setstate(std::ios::badbit);
throw serialization_error("Error reading data while deserializing dlib::resizable_tensor.");
}
bo.little_to_host(d);
}
}
// ----------------------------------------------------------------------------------------
......@@ -441,8 +469,10 @@ namespace dlib
virtual const float* host() const { return data_instance->host()+data_offset; }
virtual float* host() { return data_instance->host()+data_offset; }
virtual float* host_write_only() { return data_instance->host()+data_offset; }
virtual const float* device() const { return data_instance->device()+data_offset; }
virtual float* device() { return data_instance->device()+data_offset; }
virtual float* device_write_only() { return data_instance->device()+data_offset; }
#ifdef DLIB_USE_CUDA
......
......@@ -133,6 +133,18 @@ namespace dlib
calling host().
!*/
float float* host_write_only(
) = 0;
/*!
ensures
- This function returns the same pointer as host(), except that it never
performs a device to host memory copy. Instead, it immediately marks the
device side data as out of date, effectively discarding it. Therefore,
the values in the data pointed to by host_write_only() are undefined and
you should only call host_write_only() if you are going to assign to
every memory location in the returned memory block.
!*/
virtual const float* device(
) const = 0;
/*!
......@@ -161,6 +173,20 @@ namespace dlib
host() will perform a device to host transfer.
!*/
float float* device_write_only(
) = 0;
/*!
requires
- DLIB_USE_CUDA is #defined
ensures
- This function returns the same pointer as device(), except that it never
performs a host to device memory copy. Instead, it immediately marks the
host side data as out of date, effectively discarding it. Therefore, the
values in the data pointed to by device_write_only() are undefined and
you should only call device_write_only() if you are going to assign to
every memory location in the returned memory block.
!*/
tensor& operator= (
float val
);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment