Commit 30c4ee54 authored by Davis King's avatar Davis King

Added gpu_data spec and also optimized it a little bit.

parent fe8b99f1
......@@ -34,16 +34,10 @@ namespace dlib
void gpu_data::
copy_to_device() const
// We want transfers to the device to always be concurrent with any device
// computation. So we use our non-default stream to do the transfer.
if (!device_current)
std::cout << "cudaMemcpy to device" << std::endl;
CHECK_CUDA(cudaMemcpy(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice));
device_current = true;
// Check for errors. These calls to cudaGetLastError() are what help us find
// out if our kernel launches have been failing.
void gpu_data::
......@@ -3,6 +3,7 @@
#ifndef DLIB_GPU_DaTA_H_
#define DLIB_GPU_DaTA_H_
#include "gpu_data_abstract.h"
#include <memory>
#include "cuda_errors.h"
#include "../serialize.h"
......@@ -29,10 +30,6 @@ namespace dlib
modified the data and it hasn't been copied to the device yet then
host_current==true and device_current==false.
This object is not thread-safe. Don't touch it from multiple threads as the
same time.
// Copyright (C) 2015 Davis E. King (
// License: Boost Software License See LICENSE.txt for the full license.
#include "cuda_errors.h"
#include "../serialize.h"
namespace dlib
// ----------------------------------------------------------------------------------------
class gpu_data
This object is a block of size() floats, all stored contiguously in memory.
In particular, it keeps two copies of the floats, one on the host CPU side
and another on the GPU device side. It automatically performs the necessary
host/device transfers to keep these two copies of the data in sync.
All transfers to the device happen asynchronously so that CUDA kernel
computations can overlap with data transfers. However, any transfers from
the device to the host happen synchronously in the default CUDA stream.
Therefore, you should perform all your CUDA kernel launches on the default
stream so that transfers back to the host do not happen before the
computations have completed.
If DLIB_USE_CUDA is not #defined then this object will not use CUDA at all.
Instead, it will simply store one host side memory block of floats.
This object is not thread-safe. Don't touch it from multiple threads at
the same time.
- #size() == 0
- #host() == nullptr
- #device() == nullptr
// This object is not copyable, however, it is movable.
gpu_data(const gpu_data&) = delete;
gpu_data& operator=(const gpu_data&) = delete;
gpu_data(gpu_data&& item);
gpu_data& operator=(gpu_data&& item);
void async_copy_to_device(
- This function does not block.
- if (the host version of the data is newer than the device's copy) then
- Begins asynchronously copying host data to the device.
- A call to device() that happens before the transfer completes will
block until the transfer is complete. That is, it is safe to call
async_copy_to_device() and then immediately call device().
void set_size(
size_t new_size
- #size() == new_size
const float* host(
) const;
- returns a pointer to the host memory block of size() contiguous float
values or nullptr if size()==0.
- if (the host's copy of the data is out of date) then
- copies the data from the device to the host, while this is happening
the call to host() blocks.
float* host(
- returns a pointer to the host memory block of size() contiguous float
values or nullptr if size()==0.
- if (the host's copy of the data is out of date) then
- copies the data from the device to the host, while this is happening
the call to host() blocks.
- Marks the device side data as out of date so that the next call to
device() will perform a host to device transfer. If you want to begin
the transfer immediately then you can call async_copy_to_device() after
calling host().
const float* device(
) const;
- DLIB_USE_CUDA is #defined
- returns a pointer to the device memory block of size() contiguous float
values or nullptr if size()==0.
- if (the device's copy of the data is out of date) then
- copies the data from the host to the device, while this is happening
the call to device() blocks.
float* device(
- DLIB_USE_CUDA is #defined
- returns a pointer to the device memory block of size() contiguous float
values or nullptr if size()==0.
- if (the device's copy of the data is out of date) then
- copies the data from the host to the device, while this is happening
the call to device() blocks.
- Marks the host side data as out of date so that the next call to
host() will perform a device to host transfer.
size_t size(
) const;
- returns the number of floats contained in this object.
void swap (
gpu_data& item
- swaps the state of *this and item
void serialize(const gpu_data& item, std::ostream& out);
void deserialize(gpu_data& item, std::istream& in);
provides serialization support
// ----------------------------------------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment