-
Notifications
You must be signed in to change notification settings - Fork 1
/
CG.h
295 lines (247 loc) · 8.5 KB
/
CG.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef CG_H
#define CG_H
#include <cassert>
#include <chrono>
#include <memory>
#include "Matrix.h"
#include "Preconditioner.h"
#include "WorkDistribution.h"
#include "def.h"
/// @brief The base class implementing the conjugate gradients method.
///
/// It is used to solve the equation system Ax = k. A is a sparse matrix
/// stored either COO, CRS or ELLPACK format.
class CG {
public:
/// Different vectors used to solve the equation system.
enum Vector {
/// LHS of the equation system.
VectorK,
/// Computed solution of the equation system.
VectorX,
/// Temporary vector for the search direction.
VectorP,
/// Temporary vector holding the result of the matrix vector multiplication.
VectorQ,
/// Temporary vector for the residual.
VectorR,
/// Temporary vector in use with the preconditioner.
VectorZ,
};
/// Different formats used to store the sparse matrix.
enum MatrixFormat {
/// %Matrix is represented by CG#matrixCOO.
MatrixFormatCOO,
/// %Matrix is represented by either CG#matrixCRS, CG#splitMatrixCRS, or
/// CG#partitionedMatrixCRS.
MatrixFormatCRS,
/// %Matrix is represented by either CG#matrixELL, CG#splitMatrixELL, or
/// CG#partitionedMatrixELL.
MatrixFormatELL,
};
/// Different preconditioners to use.
enum Preconditioner {
/// Use no preconditioner.
PreconditionerNone,
/// Use a Jacobi preconditioner.
PreconditionerJacobi,
};
/// Different calculations of #workDistribution.
enum WorkDistributionCalc {
/// @see WorkDistribution.calculateByRow()
WorkDistributionByRow,
/// @see WorkDistribution.calculateByNz()
WorkDistributionByNz,
};
private:
int iteration;
int maxIterations = 1000;
floatType residual;
floatType tolerance = 1e-9;
floatType checkTolerance = 1e-5;
/// Struct holding timing information for IO, converting, the total solve time
/// and for each kernel.
struct Timing {
using clock = std::chrono::steady_clock;
using duration = std::chrono::duration<double>;
duration io{0};
duration converting{0};
duration transferTo{0};
duration transferFrom{0};
duration check{0};
duration solve{0};
duration matvec{0};
duration axpy{0};
duration xpay{0};
duration vectorDot{0};
duration preconditioner{0};
};
Timing timing;
using time_point = Timing::clock::time_point;
time_point now() const { return Timing::clock::now(); }
void matvec(Vector in, Vector out) {
time_point start = now();
matvecKernel(in, out);
timing.matvec += now() - start;
}
void axpy(floatType a, Vector x, Vector y) {
time_point start = now();
axpyKernel(a, x, y);
timing.axpy += now() - start;
}
void xpay(Vector x, floatType a, Vector y) {
time_point start = now();
xpayKernel(x, a, y);
timing.xpay += now() - start;
}
floatType vectorDot(Vector a, Vector b) {
time_point start = now();
floatType res = vectorDotKernel(a, b);
timing.vectorDot += now() - start;
return res;
}
void applyPreconditioner(Vector x, Vector y) {
time_point start = now();
applyPreconditionerKernel(x, y);
timing.preconditioner += now() - start;
}
protected:
/// Dimension of the matrix.
int N;
/// Nonzeros in the matrix.
int nz;
/// Way to calculate #workDistribution.
WorkDistributionCalc workDistributionCalc = WorkDistributionByRow;
/// How the work is distributed into multiple chunks.
std::unique_ptr<WorkDistribution> workDistribution;
/// Whether to overlap the gather with some computation of matvec().
bool overlappedGather = false;
/// Format to store the matrix.
MatrixFormat matrixFormat;
/// Matrix in cooridinate format.
std::unique_ptr<MatrixCOO> matrixCOO;
/// Matrix in CRS format.
std::unique_ptr<MatrixCRS> matrixCRS;
/// Matrix in ELLPACK format.
std::unique_ptr<MatrixELL> matrixELL;
/// Matrix in CRS format, split for #workDistribution.
std::unique_ptr<SplitMatrixCRS> splitMatrixCRS;
/// Matrix in ELLPACK format, split for #workDistribution.
std::unique_ptr<SplitMatrixELL> splitMatrixELL;
/// Matrix in CRS format, partitioned for #workDistribution.
std::unique_ptr<PartitionedMatrixCRS> partitionedMatrixCRS;
/// Matrix in ELLPACK format, partitioned for #workDistribution.
std::unique_ptr<PartitionedMatrixELL> partitionedMatrixELL;
/// The preconditioner to use.
Preconditioner preconditioner;
/// Jacobi preconditioner.
std::unique_ptr<Jacobi> jacobi;
/// #VectorK
floatType *k = nullptr;
/// #VectorX
floatType *x = nullptr;
/// Construct a new object with a \a defaultMatrixFormat to store tha matrix
/// and a \a defaultPreconditioner to use.
CG(MatrixFormat defaultMatrixFormat,
Preconditioner defaultPreconditioner = PreconditionerNone,
bool overlappedGather = false)
: overlappedGather(overlappedGather), matrixFormat(defaultMatrixFormat),
preconditioner(defaultPreconditioner) {}
/// @return \a true if this implementation supports \a format to store the matrix.
virtual bool supportsMatrixFormat(MatrixFormat format) = 0;
/// @return \a true if this implementation supports the \a preconditioner.
virtual bool supportsPreconditioner(Preconditioner preconditioner) {
return false;
}
/// @return the number of chunks that the work should be split into, or -1
/// if no work distributition is necessary.
virtual int getNumberOfChunks() { return -1; }
/// @return \a true if this implementation supports overlapping the gather
/// with some computation of matvec().
virtual bool supportsOverlappedGather() { return false; }
/// Allocate MatrixCRS.
virtual void allocateMatrixCRS();
/// Allocate MatrixELL.
virtual void allocateMatrixELL();
/// Allocate SplitMatrixCRS.
virtual void allocateSplitMatrixCRS();
/// Allocate SplitMatrixELL.
virtual void allocateSplitMatrixELL();
/// Allocate PartitionedMatrixCRS.
virtual void allocatePartitionedMatrixCRS();
/// Allocate PartitionedMatrixELL.
virtual void allocatePartitionedMatrixELL();
/// Initialize the Jacobi preconditioner.
virtual void allocateJacobi();
/// Allocate #k.
virtual void allocateK();
/// Deallocate #k.
virtual void deallocateK();
/// Allocate #x.
virtual void allocateX();
/// Deallocate #x.
virtual void deallocateX();
/// Do transfer data before calling #solve().
virtual void doTransferTo() {}
/// Do transfer data after calling #solve().
virtual void doTransferFrom() {}
/// Copy vector \a _src to \a _dst.
virtual void cpy(Vector _dst, Vector _src) = 0;
/// \a _y = A * \a _x.
virtual void matvecKernel(Vector _x, Vector _y) = 0;
/// \a _y = \a a * \a _x + \a _y.
virtual void axpyKernel(floatType a, Vector _x, Vector _y) = 0;
/// \a _y = \a _y + \a a * \a _y.
virtual void xpayKernel(Vector _x, floatType a, Vector _y) = 0;
/// @return vector dot product <\a _a, \a _b>
virtual floatType vectorDotKernel(Vector _a, Vector _b) = 0;
/// \a _y = B * \a _x
virtual void applyPreconditionerKernel(Vector _x, Vector _y) {
assert(0 && "Preconditioner not implemented!");
}
/// Print \a label (padded to a constant number of characters) and \a value.
static void printPadded(const char *label, const std::string &value);
public:
virtual ~CG() { }
/// Parse and validate environment variables.
virtual void parseEnvironment();
/// Init data by reading matrix from \a matrixFile.
virtual void init(const char *matrixFile);
/// Reset values in x vector.
void resetX();
/// Reset all timings except for initialization.
void resetNonInitTimings() {
Timing old = timing;
// Create new object with zero values.
timing = Timing{};
// Copy init timings.
timing.io = old.io;
timing.converting = old.converting;
}
/// @return true if this implementation needs to transfer data for solving.
virtual bool needsTransfer() { return false; }
/// Transfer data before calling #solve().
void transferTo() {
auto start = now();
doTransferTo();
timing.transferTo = now() - start;
}
/// Solve sparse equation system.
void solve();
/// Transfer data after calling #solve().
void transferFrom() {
auto start = now();
doTransferFrom();
timing.transferFrom = now() - start;
}
/// Print summary after system has been solved.
virtual void printSummary();
/// Check the computed solution.
bool check();
/// Cleanup allocated memory.
virtual void cleanup();
/// @return new instance of a CG implementation.
static CG *getInstance();
};
#endif