Commit ecc00ce7 authored by Davis King's avatar Davis King

merged

parents 573fce15 99ce564b
...@@ -240,7 +240,7 @@ namespace dlib ...@@ -240,7 +240,7 @@ namespace dlib
return; return;
CHECK_CUDNN(cudnnSetTensor(context(), CHECK_CUDNN(cudnnSetTensor(context(),
descriptor(t), descriptor(t),
t.device(), t.device_write_only(),
&value)); &value));
} }
......
...@@ -95,6 +95,13 @@ namespace dlib ...@@ -95,6 +95,13 @@ namespace dlib
return data_host.get(); return data_host.get();
} }
float* host_write_only()
{
host_current = true;
device_current = false;
return data_host.get();
}
const float* device() const const float* device() const
{ {
#ifndef DLIB_USE_CUDA #ifndef DLIB_USE_CUDA
...@@ -116,6 +123,18 @@ namespace dlib ...@@ -116,6 +123,18 @@ namespace dlib
return data_device.get(); return data_device.get();
} }
float* device_write_only()
{
#ifndef DLIB_USE_CUDA
DLIB_CASSERT(false, "CUDA NOT ENABLED");
#endif
wait_for_transfer_to_finish();
host_current = false;
device_current = true;
device_in_use = true;
return data_device.get();
}
bool host_ready ( bool host_ready (
) const { return host_current; } ) const { return host_current; }
......
...@@ -125,6 +125,20 @@ namespace dlib ...@@ -125,6 +125,20 @@ namespace dlib
calling host(). calling host().
!*/ !*/
float* host_write_only(
);
/*!
ensures
- This function returns the same pointer as host(), except that it never
performs a device to host memory copy. Instead, it immediately marks the
device side data as out of date, effectively discarding it. Therefore,
the values in the data pointed to by host_write_only() are undefined and
you should only call host_write_only() if you are going to assign to
every memory location in the returned memory block.
- #host_ready() == true
- #device_ready() == false
!*/
const float* device( const float* device(
) const; ) const;
/*! /*!
...@@ -154,6 +168,23 @@ namespace dlib ...@@ -154,6 +168,23 @@ namespace dlib
- #device_ready() == true - #device_ready() == true
!*/ !*/
float* device_write_only(
);
/*!
requires
- DLIB_USE_CUDA is #defined
ensures
- This function returns the same pointer as device(), except that it never
performs a host to device memory copy. Instead, it immediately marks the
host side data as out of date, effectively discarding it. Therefore, the
values in the data pointed to by device_write_only() are undefined and
you should only call device_write_only() if you are going to assign to
every memory location in the returned memory block.
- #host_ready() == false
- #device_ready() == true
!*/
size_t size( size_t size(
) const; ) const;
/*! /*!
......
...@@ -68,7 +68,7 @@ namespace dlib ...@@ -68,7 +68,7 @@ namespace dlib
const double scale = 1.0/output_tensor.num_samples(); const double scale = 1.0/output_tensor.num_samples();
double loss = 0; double loss = 0;
const float* out_data = output_tensor.host(); const float* out_data = output_tensor.host();
float* g = grad.host(); float* g = grad.host_write_only();
for (long i = 0; i < output_tensor.num_samples(); ++i) for (long i = 0; i < output_tensor.num_samples(); ++i)
{ {
const float y = *truth++; const float y = *truth++;
...@@ -79,6 +79,10 @@ namespace dlib ...@@ -79,6 +79,10 @@ namespace dlib
loss += scale*temp; loss += scale*temp;
g[i] = -scale*y; g[i] = -scale*y;
} }
else
{
g[i] = 0;
}
} }
return loss; return loss;
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "../matrix.h" #include "../matrix.h"
#include "cudnn_dlibapi.h" #include "cudnn_dlibapi.h"
#include "gpu_data.h" #include "gpu_data.h"
#include "../byte_orderer.h"
#include <memory> #include <memory>
namespace dlib namespace dlib
...@@ -46,8 +47,10 @@ namespace dlib ...@@ -46,8 +47,10 @@ namespace dlib
virtual const float* host() const = 0; virtual const float* host() const = 0;
virtual float* host() = 0; virtual float* host() = 0;
virtual float* host_write_only() = 0;
virtual const float* device() const = 0; virtual const float* device() const = 0;
virtual float* device() = 0; virtual float* device() = 0;
virtual float* device_write_only() = 0;
tensor& operator= (float val) tensor& operator= (float val)
{ {
...@@ -62,8 +65,9 @@ namespace dlib ...@@ -62,8 +65,9 @@ namespace dlib
return *this; return *this;
} }
#endif #endif
for (auto& d : *this) auto d = host_write_only();
d = val; for (size_t i = 0; i < size(); ++i)
d[i] = val;
return *this; return *this;
} }
...@@ -95,7 +99,7 @@ namespace dlib ...@@ -95,7 +99,7 @@ namespace dlib
static_assert((is_same_type<float, typename EXP::type>::value == true), static_assert((is_same_type<float, typename EXP::type>::value == true),
"To assign a matrix to a tensor the matrix must contain float values"); "To assign a matrix to a tensor the matrix must contain float values");
set_ptrm(host(), m_n, m_nr*m_nc*m_k) = item; set_ptrm(host_write_only(), m_n, m_nr*m_nc*m_k) = item;
return *this; return *this;
} }
...@@ -279,8 +283,10 @@ namespace dlib ...@@ -279,8 +283,10 @@ namespace dlib
virtual const float* host() const { return data_instance.host(); } virtual const float* host() const { return data_instance.host(); }
virtual float* host() { return data_instance.host(); } virtual float* host() { return data_instance.host(); }
virtual float* host_write_only() { return data_instance.host_write_only(); }
virtual const float* device() const { return data_instance.device(); } virtual const float* device() const { return data_instance.device(); }
virtual float* device() { return data_instance.device(); } virtual float* device() { return data_instance.device(); }
virtual float* device_write_only() { return data_instance.device_write_only(); }
void clear( void clear(
) )
...@@ -373,21 +379,33 @@ namespace dlib ...@@ -373,21 +379,33 @@ namespace dlib
inline void serialize(const tensor& item, std::ostream& out) inline void serialize(const tensor& item, std::ostream& out)
{ {
int version = 1; int version = 2;
serialize(version, out); serialize(version, out);
serialize(item.num_samples(), out); serialize(item.num_samples(), out);
serialize(item.k(), out); serialize(item.k(), out);
serialize(item.nr(), out); serialize(item.nr(), out);
serialize(item.nc(), out); serialize(item.nc(), out);
for (auto& d : item) byte_orderer bo;
serialize(d, out); auto sbuf = out.rdbuf();
for (auto d : item)
{
// Write out our data as 4byte little endian IEEE floats rather than using
// dlib's default float serialization. We do this because it will result in
// more compact outputs. It's slightly less portable but it seems doubtful
// that any CUDA enabled platform isn't going to use IEEE floats. But if one
// does we can just update the serialization code here to handle it if such a
// platform is encountered.
bo.host_to_little(d);
static_assert(sizeof(d)==4, "This serialization code assumes we are writing 4 byte floats");
sbuf->sputn((char*)&d, sizeof(d));
}
} }
inline void deserialize(resizable_tensor& item, std::istream& in) inline void deserialize(resizable_tensor& item, std::istream& in)
{ {
int version; int version;
deserialize(version, in); deserialize(version, in);
if (version != 1) if (version != 2)
throw serialization_error("Unexpected version found while deserializing dlib::resizable_tensor."); throw serialization_error("Unexpected version found while deserializing dlib::resizable_tensor.");
long num_samples=0, k=0, nr=0, nc=0; long num_samples=0, k=0, nr=0, nc=0;
...@@ -396,8 +414,18 @@ namespace dlib ...@@ -396,8 +414,18 @@ namespace dlib
deserialize(nr, in); deserialize(nr, in);
deserialize(nc, in); deserialize(nc, in);
item.set_size(num_samples, k, nr, nc); item.set_size(num_samples, k, nr, nc);
byte_orderer bo;
auto sbuf = in.rdbuf();
for (auto& d : item) for (auto& d : item)
deserialize(d, in); {
static_assert(sizeof(d)==4, "This serialization code assumes we are writing 4 byte floats");
if (sbuf->sgetn((char*)&d,sizeof(d)) != sizeof(d))
{
in.setstate(std::ios::badbit);
throw serialization_error("Error reading data while deserializing dlib::resizable_tensor.");
}
bo.little_to_host(d);
}
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -441,8 +469,10 @@ namespace dlib ...@@ -441,8 +469,10 @@ namespace dlib
virtual const float* host() const { return data_instance->host()+data_offset; } virtual const float* host() const { return data_instance->host()+data_offset; }
virtual float* host() { return data_instance->host()+data_offset; } virtual float* host() { return data_instance->host()+data_offset; }
virtual float* host_write_only() { return data_instance->host()+data_offset; }
virtual const float* device() const { return data_instance->device()+data_offset; } virtual const float* device() const { return data_instance->device()+data_offset; }
virtual float* device() { return data_instance->device()+data_offset; } virtual float* device() { return data_instance->device()+data_offset; }
virtual float* device_write_only() { return data_instance->device()+data_offset; }
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
......
...@@ -133,6 +133,18 @@ namespace dlib ...@@ -133,6 +133,18 @@ namespace dlib
calling host(). calling host().
!*/ !*/
float float* host_write_only(
) = 0;
/*!
ensures
- This function returns the same pointer as host(), except that it never
performs a device to host memory copy. Instead, it immediately marks the
device side data as out of date, effectively discarding it. Therefore,
the values in the data pointed to by host_write_only() are undefined and
you should only call host_write_only() if you are going to assign to
every memory location in the returned memory block.
!*/
virtual const float* device( virtual const float* device(
) const = 0; ) const = 0;
/*! /*!
...@@ -161,6 +173,20 @@ namespace dlib ...@@ -161,6 +173,20 @@ namespace dlib
host() will perform a device to host transfer. host() will perform a device to host transfer.
!*/ !*/
float float* device_write_only(
) = 0;
/*!
requires
- DLIB_USE_CUDA is #defined
ensures
- This function returns the same pointer as device(), except that it never
performs a host to device memory copy. Instead, it immediately marks the
host side data as out of date, effectively discarding it. Therefore, the
values in the data pointed to by device_write_only() are undefined and
you should only call device_write_only() if you are going to assign to
every memory location in the returned memory block.
!*/
tensor& operator= ( tensor& operator= (
float val float val
); );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment