1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include "DeviceMemory.h"
#include <list>
#include <memory>
#include <unordered_map>
namespace faiss { namespace gpu {
/// Device memory manager that provides temporary memory allocations
/// out of a region of memory
class StackDeviceMemory : public DeviceMemory {
public:
/// Allocate a new region of memory that we manage
explicit StackDeviceMemory(int device, size_t allocPerDevice);
/// Manage a region of memory for a particular device, with or
/// without ownership
StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
~StackDeviceMemory() override;
int getDevice() const override;
DeviceMemoryReservation getMemory(cudaStream_t stream,
size_t size) override;
size_t getSizeAvailable() const override;
std::string toString() const override;
size_t getHighWaterCudaMalloc() const override;
protected:
void returnAllocation(DeviceMemoryReservation& m) override;
protected:
/// Previous allocation ranges and the streams for which
/// synchronization is required
struct Range {
inline Range(char* s, char* e, cudaStream_t str) :
start_(s), end_(e), stream_(str) {
}
// References a memory range [start, end)
char* start_;
char* end_;
cudaStream_t stream_;
};
struct Stack {
/// Constructor that allocates memory via cudaMalloc
Stack(int device, size_t size);
/// Constructor that references a pre-allocated region of memory
Stack(int device, void* p, size_t size, bool isOwner);
~Stack();
/// Returns how much size is available for an allocation without
/// calling cudaMalloc
size_t getSizeAvailable() const;
/// Obtains an allocation; all allocations are guaranteed to be 16
/// byte aligned
char* getAlloc(size_t size, cudaStream_t stream);
/// Returns an allocation
void returnAlloc(char* p, size_t size, cudaStream_t stream);
/// Returns the stack state
std::string toString() const;
/// Returns the high-water mark of cudaMalloc activity
size_t getHighWaterCudaMalloc() const;
/// Device this allocation is on
int device_;
/// Do we own our region of memory?
bool isOwner_;
/// Where our allocation begins and ends
/// [start_, end_) is valid
char* start_;
char* end_;
/// Total size end_ - start_
size_t size_;
/// Stack head within [start, end)
char* head_;
/// List of previous last users of allocations on our stack, for
/// possible synchronization purposes
std::list<Range> lastUsers_;
/// How much cudaMalloc memory is currently outstanding?
size_t mallocCurrent_;
/// What's the high water mark in terms of memory used from the
/// temporary buffer?
size_t highWaterMemoryUsed_;
/// What's the high water mark in terms of memory allocated via
/// cudaMalloc?
size_t highWaterMalloc_;
};
/// Our device
int device_;
/// Memory stack
Stack stack_;
};
} } // namespace