-
Notifications
You must be signed in to change notification settings - Fork 145
/
Copy pathmachine.h
317 lines (233 loc) · 11.3 KB
/
machine.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
/* Copyright 2024 Stanford University, NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// machine model for Realm
#ifndef REALM_MACHINE_H
#define REALM_MACHINE_H
#include "realm/processor.h"
#include "realm/memory.h"
#include <iterator>
namespace Realm {
class Runtime;
class REALM_PUBLIC_API Machine {
protected:
friend class Runtime;
explicit Machine(void *_impl) : impl(_impl) {}
public:
Machine(const Machine& m) : impl(m.impl) {}
Machine& operator=(const Machine& m) { impl = m.impl; return *this; }
~Machine(void) {}
static Machine get_machine(void);
class ProcessorQuery;
class MemoryQuery;
struct AffinityDetails {
unsigned bandwidth;
unsigned latency;
};
bool has_affinity(Processor p, Memory m, AffinityDetails *details = 0) const;
bool has_affinity(Memory m1, Memory m2, AffinityDetails *details = 0) const;
// older queries, to be deprecated
void get_all_memories(std::set<Memory>& mset) const;
void get_all_processors(std::set<Processor>& pset) const;
void get_local_processors(std::set<Processor>& pset) const;
void get_local_processors_by_kind(std::set<Processor>& pset,
Processor::Kind kind) const;
// Return the set of memories visible from a processor
void get_visible_memories(Processor p, std::set<Memory>& mset,
bool local_only = true) const;
// Return the set of memories visible from a memory
void get_visible_memories(Memory m, std::set<Memory>& mset,
bool local_only = true) const;
// Return the set of processors which can all see a given memory
void get_shared_processors(Memory m, std::set<Processor>& pset,
bool local_only = true) const;
size_t get_address_space_count(void) const;
// get information about the OS process in which tasks for a given
// processor run - note that the uniqueness of any/all of the provided
// information depends on the underlying OS and any container runtimes
struct ProcessInfo {
static const size_t MAX_HOSTNAME_LENGTH = 256;
char hostname[MAX_HOSTNAME_LENGTH]; // always null-terminated
uint64_t hostid; // gethostid on posix, hash of hostname on windows
uint32_t processid;
};
// populates the `info` struct with information about the processor `p`'s
// containing process, returning true if successful, false if the
// processor is unknown or the information is unavailable
bool get_process_info(Processor p, ProcessInfo *info) const;
public:
struct ProcessorMemoryAffinity {
Processor p; // accessing processor
Memory m; // target memory
unsigned bandwidth; // in MB/s
unsigned latency; // in nanoseconds
};
struct MemoryMemoryAffinity {
Memory m1; // source memory
Memory m2; // destination memory
unsigned bandwidth; // in MB/s
unsigned latency; // in nanoseconds
};
int get_proc_mem_affinity(std::vector<ProcessorMemoryAffinity>& result,
Processor restrict_proc = Processor::NO_PROC,
Memory restrict_memory = Memory::NO_MEMORY,
bool local_only = true) const;
int get_mem_mem_affinity(std::vector<MemoryMemoryAffinity>& result,
Memory restrict_mem1 = Memory::NO_MEMORY,
Memory restrict_mem2 = Memory::NO_MEMORY,
bool local_only = true) const;
// subscription interface for dynamic machine updates
class MachineUpdateSubscriber {
public:
virtual ~MachineUpdateSubscriber(void) {}
enum UpdateType { THING_ADDED,
THING_REMOVED,
THING_UPDATED
};
// callbacks occur on a thread that belongs to the runtime - please defer any
// complicated processing if possible
virtual void processor_updated(Processor p, UpdateType update_type,
const void *payload, size_t payload_size) = 0;
virtual void memory_updated(Memory m, UpdateType update_type,
const void *payload, size_t payload_size) = 0;
};
// subscriptions are encouraged to use a query which filters which processors or
// memories cause notifications
void add_subscription(MachineUpdateSubscriber *subscriber);
void add_subscription(MachineUpdateSubscriber *subscriber,
const ProcessorQuery &query);
void add_subscription(MachineUpdateSubscriber *subscriber,
const MemoryQuery &query);
void remove_subscription(MachineUpdateSubscriber *subscriber);
void *impl; // hidden internal implementation - this is NOT a transferrable handle
};
template <typename QT, typename RT> class MachineQueryIterator;
class REALM_PUBLIC_API Machine::ProcessorQuery {
public:
explicit ProcessorQuery(const Machine& m);
ProcessorQuery(const ProcessorQuery& q);
~ProcessorQuery(void);
ProcessorQuery& operator=(const ProcessorQuery& q);
bool operator==(const ProcessorQuery& compare_to) const;
bool operator!=(const ProcessorQuery& compare_to) const;
// filter predicates (returns self-reference for chaining)
// if multiple predicates are used, they must all match (i.e. the intersection is returned)
// restrict to just those of the specified 'kind'
ProcessorQuery& only_kind(Processor::Kind kind);
// restrict to those managed by this address space
ProcessorQuery& local_address_space(void);
// restrict to those in same address space as specified Processor or Memory
ProcessorQuery& same_address_space_as(Processor p);
ProcessorQuery& same_address_space_as(Memory m);
// restrict to those that have affinity to a given memory
ProcessorQuery& has_affinity_to(Memory m, unsigned min_bandwidth = 0, unsigned max_latency = 0);
// restrict to those whose best affinity is to the given memory
ProcessorQuery& best_affinity_to(Memory m, int bandwidth_weight = 1, int latency_weight = 0);
// results - a query may be executed multiple times - when the machine model is
// dynamic, there is no guarantee that the results of any two executions will be consistent
// return the number of matched processors
size_t count(void) const;
// return the first matched processor, or NO_PROC
Processor first(void) const;
// return the next matched processor after the one given, or NO_PROC
Processor next(Processor after) const;
// return a random matched processor, or NO_PROC if none exist
Processor random(void) const;
typedef MachineQueryIterator<ProcessorQuery, Processor> iterator;
// return an iterator that allows enumerating all matched processors
iterator begin(void) const;
iterator end(void) const;
protected:
void *impl;
};
class REALM_PUBLIC_API Machine::MemoryQuery {
public:
explicit MemoryQuery(const Machine& m);
MemoryQuery(const MemoryQuery& q);
~MemoryQuery(void);
MemoryQuery& operator=(const MemoryQuery& q);
bool operator==(const MemoryQuery& compare_to) const;
bool operator!=(const MemoryQuery& compare_to) const;
// filter predicates (returns self-reference for chaining)
// if multiple predicates are used, they must all match (i.e. the intersection is returned)
// restrict to just those of the specified 'kind'
MemoryQuery& only_kind(Memory::Kind kind);
// restrict to those managed by this address space
MemoryQuery& local_address_space(void);
// restrict to those in same address space as specified Processor or Memory
MemoryQuery& same_address_space_as(Processor p);
MemoryQuery& same_address_space_as(Memory m);
// restrict to those that have affinity to a given processor or memory
MemoryQuery& has_affinity_to(Processor p, unsigned min_bandwidth = 0, unsigned max_latency = 0);
MemoryQuery& has_affinity_to(Memory m, unsigned min_bandwidth = 0, unsigned max_latency = 0);
// restrict to those whose best affinity is to the given processor or memory
MemoryQuery& best_affinity_to(Processor p, int bandwidth_weight = 1, int latency_weight = 0);
MemoryQuery& best_affinity_to(Memory m, int bandwidth_weight = 1, int latency_weight = 0);
// restrict to those whose total capacity is at least 'min_size' bytes
MemoryQuery& has_capacity(size_t min_bytes);
// results - a query may be executed multiple times - when the machine model is
// dynamic, there is no guarantee that the results of any two executions will be consistent
// return the number of matched processors
size_t count(void) const;
// return the first matched processor, or NO_PROC
Memory first(void) const;
// return the next matched processor after the one given, or NO_PROC
Memory next(Memory after) const;
// return a random matched processor, or NO_PROC if none exist
Memory random(void) const;
typedef MachineQueryIterator<MemoryQuery, Memory> iterator;
// return an iterator that allows enumerating all matched processors
iterator begin(void) const;
iterator end(void) const;
protected:
void *impl;
};
template <typename QT, typename RT>
class REALM_PUBLIC_API MachineQueryIterator {
public:
// explicitly set iterator traits
typedef std::input_iterator_tag iterator_category;
typedef RT value_type;
typedef std::ptrdiff_t difference_type;
typedef RT *pointer;
typedef RT& reference;
// would like this constructor to be protected and have QT be a friend.
// The CUDA compiler also seems to be a little dense here as well
#if(!defined(__CUDACC__) && !defined(__HIPCC__))
protected:
friend QT;
#else
public:
#endif
MachineQueryIterator(const QT& _query, RT _result);
protected:
QT query;
RT result;
public:
MachineQueryIterator(const MachineQueryIterator<QT,RT>& copy_from);
~MachineQueryIterator(void);
MachineQueryIterator<QT,RT>& operator=(const MachineQueryIterator<QT,RT>& copy_from);
bool operator==(const MachineQueryIterator<QT,RT>& compare_to) const;
bool operator!=(const MachineQueryIterator<QT,RT>& compare_to) const;
RT operator*(void);
const RT *operator->(void);
MachineQueryIterator<QT,RT>& operator++(/*prefix*/);
MachineQueryIterator<QT,RT> operator++(int /*postfix*/);
// in addition to testing an iterator against .end(), you can also cast to bool, allowing
// for(iterator it = q.begin(); q; ++q) ...
operator bool(void) const;
};
}; // namespace Realm
#include "realm/machine.inl"
#endif // ifndef REALM_MACHINE_H