1 #include <petscsys.h>
2 #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h> /*I "petscksp.h" I*/
3
4 static PetscBool cited = PETSC_FALSE;
5 static const char citation[] = "@article{SSM2016,\n"
6 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
7 " title = {Pipelined, Flexible Krylov Subspace Methods},\n"
8 " journal = {SIAM Journal on Scientific Computing},\n"
9 " volume = {38},\n"
10 " number = {5},\n"
11 " pages = {C441-C470},\n"
12 " year = {2016},\n"
13 " doi = {10.1137/15M1049130},\n"
14 " URL = {http://dx.doi.org/10.1137/15M1049130},\n"
15 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
16 "}\n";
17
18 #define KSPPIPEGCR_DEFAULT_MMAX 15
19 #define KSPPIPEGCR_DEFAULT_NPREALLOC 5
20 #define KSPPIPEGCR_DEFAULT_VECB 5
21 #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
22 #define KSPPIPEGCR_DEFAULT_UNROLL_W PETSC_TRUE
23
24 #include <petscksp.h>
25
KSPAllocateVectors_PIPEGCR(KSP ksp,PetscInt nvecsneeded,PetscInt chunksize)26 static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
27 {
28 PetscInt i;
29 KSP_PIPEGCR *pipegcr;
30 PetscInt nnewvecs, nvecsprev;
31
32 PetscFunctionBegin;
33 pipegcr = (KSP_PIPEGCR *)ksp->data;
34
35 /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
36 if (pipegcr->nvecs < PetscMin(pipegcr->mmax + 1, nvecsneeded)) {
37 nvecsprev = pipegcr->nvecs;
38 nnewvecs = PetscMin(PetscMax(nvecsneeded - pipegcr->nvecs, chunksize), pipegcr->mmax + 1 - pipegcr->nvecs);
39 PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ppvecs[pipegcr->nchunks], 0, NULL));
40 PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->psvecs[pipegcr->nchunks], 0, NULL));
41 PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->pqvecs[pipegcr->nchunks], 0, NULL));
42 if (pipegcr->unroll_w) PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ptvecs[pipegcr->nchunks], 0, NULL));
43 pipegcr->nvecs += nnewvecs;
44 for (i = 0; i < nnewvecs; i++) {
45 pipegcr->qvecs[nvecsprev + i] = pipegcr->pqvecs[pipegcr->nchunks][i];
46 pipegcr->pvecs[nvecsprev + i] = pipegcr->ppvecs[pipegcr->nchunks][i];
47 pipegcr->svecs[nvecsprev + i] = pipegcr->psvecs[pipegcr->nchunks][i];
48 if (pipegcr->unroll_w) pipegcr->tvecs[nvecsprev + i] = pipegcr->ptvecs[pipegcr->nchunks][i];
49 }
50 pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
51 pipegcr->nchunks++;
52 }
53 PetscFunctionReturn(PETSC_SUCCESS);
54 }
55
KSPSolve_PIPEGCR_cycle(KSP ksp)56 static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
57 {
58 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
59 Mat A, B;
60 Vec x, r, b, z, w, m, n, p, s, q, t, *redux;
61 PetscInt i, j, k, idx, kdx, mi;
62 PetscScalar alpha = 0.0, gamma, *betas, *dots;
63 PetscReal rnorm = 0.0, delta, *eta, *etas;
64
65 PetscFunctionBegin;
66 /* !!PS We have not checked these routines for use with complex numbers. The inner products
67 are likely not defined correctly for that case */
68 PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
69
70 PetscCall(KSPGetOperators(ksp, &A, &B));
71 x = ksp->vec_sol;
72 b = ksp->vec_rhs;
73 r = ksp->work[0];
74 z = ksp->work[1];
75 w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
76 m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r)) (pipelining intermediate) */
77 n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
78 p = pipegcr->pvecs[0];
79 s = pipegcr->svecs[0];
80 q = pipegcr->qvecs[0];
81 t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;
82
83 redux = pipegcr->redux;
84 dots = pipegcr->dots;
85 etas = pipegcr->etas;
86 betas = dots; /* dots takes the result of all dot products of which the betas are a subset */
87
88 /* cycle initial residual */
89 PetscCall(KSP_MatMult(ksp, A, x, r));
90 PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax */
91 PetscCall(KSP_PCApply(ksp, r, z)); /* z <- B(r) */
92 PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az */
93
94 /* initialization of other variables and pipelining intermediates */
95 PetscCall(VecCopy(z, p));
96 PetscCall(KSP_MatMult(ksp, A, p, s));
97
98 /* overlap initial computation of delta, gamma */
99 redux[0] = w;
100 redux[1] = r;
101 PetscCall(VecMDotBegin(w, 2, redux, dots)); /* Start split reductions for gamma = (w,r), delta = (w,w) */
102 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s))); /* perform asynchronous reduction */
103 PetscCall(KSP_PCApply(ksp, s, q)); /* q = B(s) */
104 if (pipegcr->unroll_w) PetscCall(KSP_MatMult(ksp, A, q, t)); /* t = Aq */
105 PetscCall(VecMDotEnd(w, 2, redux, dots)); /* Finish split reduction */
106 delta = PetscRealPart(dots[0]);
107 etas[0] = delta;
108 gamma = dots[1];
109 alpha = gamma / delta;
110
111 i = 0;
112 do {
113 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
114 ksp->its++;
115 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
116
117 /* update solution, residuals, .. */
118 PetscCall(VecAXPY(x, +alpha, p));
119 PetscCall(VecAXPY(r, -alpha, s));
120 PetscCall(VecAXPY(z, -alpha, q));
121 if (pipegcr->unroll_w) {
122 PetscCall(VecAXPY(w, -alpha, t));
123 } else {
124 PetscCall(KSP_MatMult(ksp, A, z, w));
125 }
126
127 /* Computations of current iteration done */
128 i++;
129
130 if (pipegcr->modifypc) PetscCall((*pipegcr->modifypc)(ksp, ksp->its, 0 /* unused argument */, ksp->rnorm, pipegcr->modifypc_ctx));
131
132 /* If needbe, allocate a new chunk of vectors */
133 PetscCall(KSPAllocateVectors_PIPEGCR(ksp, i + 1, pipegcr->vecb));
134
135 /* Note that we wrap around and start clobbering old vectors */
136 idx = i % (pipegcr->mmax + 1);
137 p = pipegcr->pvecs[idx];
138 s = pipegcr->svecs[idx];
139 q = pipegcr->qvecs[idx];
140 if (pipegcr->unroll_w) t = pipegcr->tvecs[idx];
141 eta = pipegcr->etas + idx;
142
143 /* number of old directions to orthogonalize against */
144 switch (pipegcr->truncstrat) {
145 case KSP_FCD_TRUNC_TYPE_STANDARD:
146 mi = pipegcr->mmax;
147 break;
148 case KSP_FCD_TRUNC_TYPE_NOTAY:
149 mi = ((i - 1) % pipegcr->mmax) + 1;
150 break;
151 default:
152 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
153 }
154
155 /* Pick old p,s,q,zeta in a way suitable for VecMDot */
156 for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
157 kdx = k % (pipegcr->mmax + 1);
158 pipegcr->pold[j] = pipegcr->pvecs[kdx];
159 pipegcr->sold[j] = pipegcr->svecs[kdx];
160 pipegcr->qold[j] = pipegcr->qvecs[kdx];
161 if (pipegcr->unroll_w) pipegcr->told[j] = pipegcr->tvecs[kdx];
162 redux[j] = pipegcr->svecs[kdx];
163 }
164 /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
165 redux[j] = r;
166 redux[j + 1] = w;
167
168 /* Dot products */
169 /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
170 PetscCall(VecMDotBegin(w, j + 2, redux, dots));
171 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w)));
172
173 /* B(w-r) + u stabilization */
174 PetscCall(VecWAXPY(n, -1.0, r, w)); /* m = u + B(w-r): (a) ntmp = w-r */
175 PetscCall(KSP_PCApply(ksp, n, m)); /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
176 PetscCall(VecAXPY(m, 1.0, z)); /* m = u + B(w-r): (c) m = z + mtmp */
177 if (pipegcr->unroll_w) PetscCall(KSP_MatMult(ksp, A, m, n)); /* n = Am */
178
179 /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
180 PetscCall(VecMDotEnd(w, j + 2, redux, dots));
181 gamma = dots[j];
182 delta = PetscRealPart(dots[j + 1]);
183
184 /* compute new residual norm.
185 this cannot be done before this point so that the natural norm
186 is available for free and the communication involved is overlapped */
187 switch (ksp->normtype) {
188 case KSP_NORM_PRECONDITIONED:
189 PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
190 break;
191 case KSP_NORM_UNPRECONDITIONED:
192 PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
193 break;
194 case KSP_NORM_NATURAL:
195 rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
196 break;
197 case KSP_NORM_NONE:
198 rnorm = 0.0;
199 break;
200 default:
201 SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
202 }
203
204 /* Check for convergence */
205 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
206 ksp->rnorm = rnorm;
207 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
208 PetscCall(KSPLogResidualHistory(ksp, rnorm));
209 PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
210 PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
211 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
212
213 /* compute new eta and scale beta */
214 *eta = 0.;
215 for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
216 kdx = k % (pipegcr->mmax + 1);
217 betas[j] /= -etas[kdx]; /* betak /= etak */
218 *eta -= PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]) * etas[kdx];
219 /* etaitmp = -betaik^2 * etak */
220 }
221 *eta += delta; /* etai = delta -betaik^2 * etak */
222
223 /* check breakdown of eta = (s,s) */
224 if (*eta < 0.) {
225 pipegcr->norm_breakdown = PETSC_TRUE;
226 PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
227 break;
228 } else {
229 alpha = gamma / (*eta); /* alpha = gamma/etai */
230 }
231
232 /* project out stored search directions using classical G-S */
233 PetscCall(VecCopy(z, p));
234 PetscCall(VecCopy(w, s));
235 PetscCall(VecCopy(m, q));
236 if (pipegcr->unroll_w) {
237 PetscCall(VecCopy(n, t));
238 PetscCall(VecMAXPY(t, j, betas, pipegcr->told)); /* ti <- n - sum_k beta_k t_k */
239 }
240 PetscCall(VecMAXPY(p, j, betas, pipegcr->pold)); /* pi <- ui - sum_k beta_k p_k */
241 PetscCall(VecMAXPY(s, j, betas, pipegcr->sold)); /* si <- wi - sum_k beta_k s_k */
242 PetscCall(VecMAXPY(q, j, betas, pipegcr->qold)); /* qi <- m - sum_k beta_k q_k */
243
244 } while (ksp->its < ksp->max_it);
245 if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
246 PetscFunctionReturn(PETSC_SUCCESS);
247 }
248
KSPSolve_PIPEGCR(KSP ksp)249 static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
250 {
251 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
252 Mat A, B;
253 Vec x, b, r, z, w;
254 PetscScalar gamma;
255 PetscReal rnorm = 0.0;
256 PetscBool issym;
257
258 PetscFunctionBegin;
259 PetscCall(PetscCitationsRegister(citation, &cited));
260
261 PetscCall(KSPGetOperators(ksp, &A, &B));
262 x = ksp->vec_sol;
263 b = ksp->vec_rhs;
264 r = ksp->work[0];
265 z = ksp->work[1];
266 w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
267
268 /* compute initial residual */
269 if (!ksp->guess_zero) {
270 PetscCall(KSP_MatMult(ksp, A, x, r));
271 PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax */
272 } else {
273 PetscCall(VecCopy(b, r)); /* r <- b */
274 }
275
276 /* initial residual norm */
277 PetscCall(KSP_PCApply(ksp, r, z)); /* z <- B(r) */
278 PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az */
279 PetscCall(VecDot(r, w, &gamma)); /* gamma = (r,w) */
280
281 switch (ksp->normtype) {
282 case KSP_NORM_PRECONDITIONED:
283 PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
284 break;
285 case KSP_NORM_UNPRECONDITIONED:
286 PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
287 break;
288 case KSP_NORM_NATURAL:
289 rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
290 break;
291 case KSP_NORM_NONE:
292 rnorm = 0.0;
293 break;
294 default:
295 SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
296 }
297
298 /* Is A symmetric? */
299 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &issym, MATSBAIJ, MATSEQSBAIJ, MATMPISBAIJ, ""));
300 if (!issym) PetscCall(PetscInfo(A, "Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?\n"));
301
302 /* logging */
303 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
304 ksp->its = 0;
305 ksp->rnorm0 = rnorm;
306 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
307 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm0));
308 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm0));
309 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm0, &ksp->reason, ksp->cnvP));
310 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
311
312 do {
313 PetscCall(KSPSolve_PIPEGCR_cycle(ksp));
314 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
315 if (pipegcr->norm_breakdown) {
316 pipegcr->n_restarts++;
317 pipegcr->norm_breakdown = PETSC_FALSE;
318 }
319 } while (ksp->its < ksp->max_it);
320
321 if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
322 PetscFunctionReturn(PETSC_SUCCESS);
323 }
324
KSPView_PIPEGCR(KSP ksp,PetscViewer viewer)325 static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
326 {
327 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
328 PetscBool isascii, isstring;
329 const char *truncstr;
330
331 PetscFunctionBegin;
332 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
333 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
334
335 if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
336 truncstr = "Using standard truncation strategy";
337 } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
338 truncstr = "Using Notay's truncation strategy";
339 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
340
341 if (isascii) {
342 PetscCall(PetscViewerASCIIPrintf(viewer, " max previous directions = %" PetscInt_FMT "\n", pipegcr->mmax));
343 PetscCall(PetscViewerASCIIPrintf(viewer, " preallocated %" PetscInt_FMT " directions\n", PetscMin(pipegcr->nprealloc, pipegcr->mmax + 1)));
344 PetscCall(PetscViewerASCIIPrintf(viewer, " %s\n", truncstr));
345 PetscCall(PetscViewerASCIIPrintf(viewer, " w unrolling = %s \n", PetscBools[pipegcr->unroll_w]));
346 PetscCall(PetscViewerASCIIPrintf(viewer, " restarts performed = %" PetscInt_FMT " \n", pipegcr->n_restarts));
347 } else if (isstring) {
348 PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipegcr->mmax, pipegcr->nprealloc, truncstr));
349 }
350 PetscFunctionReturn(PETSC_SUCCESS);
351 }
352
KSPSetUp_PIPEGCR(KSP ksp)353 static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
354 {
355 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
356 Mat A;
357 PetscBool diagonalscale;
358 const PetscInt nworkstd = 5;
359
360 PetscFunctionBegin;
361 PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
362 PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
363
364 PetscCall(KSPGetOperators(ksp, &A, NULL));
365
366 /* Allocate "standard" work vectors */
367 PetscCall(KSPSetWorkVecs(ksp, nworkstd));
368
369 /* Allocated space for pointers to additional work vectors
370 note that mmax is the number of previous directions, so we add 1 for the current direction */
371 PetscCall(PetscMalloc6(pipegcr->mmax + 1, &pipegcr->pvecs, pipegcr->mmax + 1, &pipegcr->ppvecs, pipegcr->mmax + 1, &pipegcr->svecs, pipegcr->mmax + 1, &pipegcr->psvecs, pipegcr->mmax + 1, &pipegcr->qvecs, pipegcr->mmax + 1, &pipegcr->pqvecs));
372 if (pipegcr->unroll_w) PetscCall(PetscMalloc3(pipegcr->mmax + 1, &pipegcr->tvecs, pipegcr->mmax + 1, &pipegcr->ptvecs, pipegcr->mmax + 2, &pipegcr->told));
373 PetscCall(PetscMalloc4(pipegcr->mmax + 2, &pipegcr->pold, pipegcr->mmax + 2, &pipegcr->sold, pipegcr->mmax + 2, &pipegcr->qold, pipegcr->mmax + 2, &pipegcr->chunksizes));
374 PetscCall(PetscMalloc3(pipegcr->mmax + 2, &pipegcr->dots, pipegcr->mmax + 1, &pipegcr->etas, pipegcr->mmax + 2, &pipegcr->redux));
375 /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
376 if (pipegcr->nprealloc > pipegcr->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipegcr->nprealloc, pipegcr->mmax + 1));
377
378 /* Preallocate additional work vectors */
379 PetscCall(KSPAllocateVectors_PIPEGCR(ksp, pipegcr->nprealloc, pipegcr->nprealloc));
380 PetscFunctionReturn(PETSC_SUCCESS);
381 }
382
KSPReset_PIPEGCR(KSP ksp)383 static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
384 {
385 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
386
387 PetscFunctionBegin;
388 if (pipegcr->modifypc_destroy) PetscCall((*pipegcr->modifypc_destroy)(&pipegcr->modifypc_ctx));
389 PetscFunctionReturn(PETSC_SUCCESS);
390 }
391
KSPDestroy_PIPEGCR(KSP ksp)392 static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
393 {
394 PetscInt i;
395 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
396
397 PetscFunctionBegin;
398 PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work)); /* Destroy "standard" work vecs */
399
400 /* Destroy vectors for old directions and the arrays that manage pointers to them */
401 if (pipegcr->nvecs) {
402 for (i = 0; i < pipegcr->nchunks; i++) {
403 PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ppvecs[i]));
404 PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->psvecs[i]));
405 PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->pqvecs[i]));
406 if (pipegcr->unroll_w) PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ptvecs[i]));
407 }
408 }
409
410 PetscCall(PetscFree6(pipegcr->pvecs, pipegcr->ppvecs, pipegcr->svecs, pipegcr->psvecs, pipegcr->qvecs, pipegcr->pqvecs));
411 PetscCall(PetscFree4(pipegcr->pold, pipegcr->sold, pipegcr->qold, pipegcr->chunksizes));
412 PetscCall(PetscFree3(pipegcr->dots, pipegcr->etas, pipegcr->redux));
413 if (pipegcr->unroll_w) PetscCall(PetscFree3(pipegcr->tvecs, pipegcr->ptvecs, pipegcr->told));
414
415 PetscCall(KSPReset_PIPEGCR(ksp));
416 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPFlexibleSetModifyPC_C", NULL));
417 PetscCall(KSPDestroyDefault(ksp));
418 PetscFunctionReturn(PETSC_SUCCESS);
419 }
420
421 /*@
422 KSPPIPEGCRSetUnrollW - Set to `PETSC_TRUE` to use `KSPPIPEGCR` with unrolling of the w vector
423
424 Logically Collective
425
426 Input Parameters:
427 + ksp - the Krylov space context
428 - unroll_w - use unrolling
429
430 Level: intermediate
431
432 Options Database Key:
433 . -ksp_pipegcr_unroll_w <bool> - use unrolling
434
435 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRGetUnrollW()`
436 @*/
KSPPIPEGCRSetUnrollW(KSP ksp,PetscBool unroll_w)437 PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp, PetscBool unroll_w)
438 {
439 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
440
441 PetscFunctionBegin;
442 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
443 PetscValidLogicalCollectiveBool(ksp, unroll_w, 2);
444 pipegcr->unroll_w = unroll_w;
445 PetscFunctionReturn(PETSC_SUCCESS);
446 }
447
448 /*@
449 KSPPIPEGCRGetUnrollW - Get information on `KSPPIPEGCR` if it uses unrolling the w vector
450
451 Logically Collective
452
453 Input Parameter:
454 . ksp - the Krylov space context
455
456 Output Parameter:
457 . unroll_w - `KSPPIPEGCR` uses unrolling (bool)
458
459 Level: intermediate
460
461 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetUnrollW()`
462 @*/
KSPPIPEGCRGetUnrollW(KSP ksp,PetscBool * unroll_w)463 PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp, PetscBool *unroll_w)
464 {
465 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
466
467 PetscFunctionBegin;
468 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
469 *unroll_w = pipegcr->unroll_w;
470 PetscFunctionReturn(PETSC_SUCCESS);
471 }
472
473 /*@
474 KSPPIPEGCRSetMmax - set the maximum number of previous directions `KSPPIPEGCR` will store for orthogonalization
475
476 Logically Collective
477
478 Input Parameters:
479 + ksp - the Krylov space context
480 - mmax - the maximum number of previous directions to orthogonalize against
481
482 Options Database Key:
483 . -ksp_pipegcr_mmax <mmax> - maximum number of previous directions
484
485 Level: intermediate
486
487 Note:
488 `mmax` + 1 directions are stored (`mmax` previous ones along with a current one)
489 and whether all are used in each iteration also depends on the truncation strategy
490 (see `KSPPIPEGCRSetTruncationType`)
491
492 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
493 @*/
KSPPIPEGCRSetMmax(KSP ksp,PetscInt mmax)494 PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp, PetscInt mmax)
495 {
496 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
497
498 PetscFunctionBegin;
499 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
500 PetscValidLogicalCollectiveInt(ksp, mmax, 2);
501 pipegcr->mmax = mmax;
502 PetscFunctionReturn(PETSC_SUCCESS);
503 }
504
505 /*@
506 KSPPIPEGCRGetMmax - get the maximum number of previous directions `KSPPIPEGCR` will store
507
508 Not Collective
509
510 Input Parameter:
511 . ksp - the Krylov space context
512
513 Output Parameter:
514 . mmax - the maximum number of previous directions allowed for orthogonalization
515
516 Level: intermediate
517
518 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetMmax()`
519 @*/
KSPPIPEGCRGetMmax(KSP ksp,PetscInt * mmax)520 PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp, PetscInt *mmax)
521 {
522 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
523
524 PetscFunctionBegin;
525 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
526 *mmax = pipegcr->mmax;
527 PetscFunctionReturn(PETSC_SUCCESS);
528 }
529
530 /*@
531 KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with `KSPPIPEGCR`
532
533 Logically Collective
534
535 Input Parameters:
536 + ksp - the Krylov space context
537 - nprealloc - the number of vectors to preallocate
538
539 Level: advanced
540
541 Options Database Key:
542 . -ksp_pipegcr_nprealloc <N> - number of vectors to preallocate
543
544 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`
545 @*/
KSPPIPEGCRSetNprealloc(KSP ksp,PetscInt nprealloc)546 PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp, PetscInt nprealloc)
547 {
548 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
549
550 PetscFunctionBegin;
551 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
552 PetscValidLogicalCollectiveInt(ksp, nprealloc, 2);
553 pipegcr->nprealloc = nprealloc;
554 PetscFunctionReturn(PETSC_SUCCESS);
555 }
556
557 /*@
558 KSPPIPEGCRGetNprealloc - get the number of directions preallocate by `KSPPIPEGCR`
559
560 Not Collective
561
562 Input Parameter:
563 . ksp - the Krylov space context
564
565 Output Parameter:
566 . nprealloc - the number of directions preallocated
567
568 Level: advanced
569
570 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
571 @*/
KSPPIPEGCRGetNprealloc(KSP ksp,PetscInt * nprealloc)572 PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp, PetscInt *nprealloc)
573 {
574 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
575
576 PetscFunctionBegin;
577 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
578 *nprealloc = pipegcr->nprealloc;
579 PetscFunctionReturn(PETSC_SUCCESS);
580 }
581
582 /*@
583 KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions `KSPPIPEGCR` uses during orthogonalization
584
585 Logically Collective
586
587 Input Parameters:
588 + ksp - the Krylov space context
589 - truncstrat - the choice of strategy
590 .vb
591 KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
592 KSP_FCD_TRUNC_TYPE_NOTAY uses the last `max(1,mod(i,mmax))` directions at iteration i = 0, 1, ..
593 .ve
594
595 Options Database Key:
596 . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
597
598 Level: intermediate
599
600 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPFCDTruncationType`, `KSPPIPEGCRGetTruncationType()`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
601 @*/
KSPPIPEGCRSetTruncationType(KSP ksp,KSPFCDTruncationType truncstrat)602 PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
603 {
604 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
605
606 PetscFunctionBegin;
607 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
608 PetscValidLogicalCollectiveEnum(ksp, truncstrat, 2);
609 pipegcr->truncstrat = truncstrat;
610 PetscFunctionReturn(PETSC_SUCCESS);
611 }
612
613 /*@
614 KSPPIPEGCRGetTruncationType - get the truncation strategy employed by `KSPPIPEGCR`
615
616 Not Collective
617
618 Input Parameter:
619 . ksp - the Krylov space context
620
621 Output Parameter:
622 . truncstrat - the strategy type
623 .vb
624 KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
625 KSP_FCD_TRUNC_TYPE_NOTAY uses the last `max(1,mod(i,mmax))` directions at iteration i =0, 1, ..
626 .ve
627
628 Level: intermediate
629
630 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
631 @*/
KSPPIPEGCRGetTruncationType(KSP ksp,KSPFCDTruncationType * truncstrat)632 PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
633 {
634 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
635
636 PetscFunctionBegin;
637 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
638 *truncstrat = pipegcr->truncstrat;
639 PetscFunctionReturn(PETSC_SUCCESS);
640 }
641
KSPSetFromOptions_PIPEGCR(KSP ksp,PetscOptionItems PetscOptionsObject)642 static PetscErrorCode KSPSetFromOptions_PIPEGCR(KSP ksp, PetscOptionItems PetscOptionsObject)
643 {
644 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
645 PetscInt mmax, nprealloc;
646 PetscBool flg;
647
648 PetscFunctionBegin;
649 PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEGCR options");
650 PetscCall(PetscOptionsInt("-ksp_pipegcr_mmax", "Number of search directions to storue", "KSPPIPEGCRSetMmax", pipegcr->mmax, &mmax, &flg));
651 if (flg) PetscCall(KSPPIPEGCRSetMmax(ksp, mmax));
652 PetscCall(PetscOptionsInt("-ksp_pipegcr_nprealloc", "Number of directions to preallocate", "KSPPIPEGCRSetNprealloc", pipegcr->nprealloc, &nprealloc, &flg));
653 if (flg) PetscCall(KSPPIPEGCRSetNprealloc(ksp, nprealloc));
654 PetscCall(PetscOptionsEnum("-ksp_pipegcr_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipegcr->truncstrat, (PetscEnum *)&pipegcr->truncstrat, NULL));
655 PetscCall(PetscOptionsBool("-ksp_pipegcr_unroll_w", "Use unrolling of w", "KSPPIPEGCRSetUnrollW", pipegcr->unroll_w, &pipegcr->unroll_w, NULL));
656 PetscOptionsHeadEnd();
657 PetscFunctionReturn(PETSC_SUCCESS);
658 }
659
KSPFlexibleSetModifyPC_PIPEGCR(KSP ksp,KSPFlexibleModifyPCFn * function,PetscCtx ctx,PetscCtxDestroyFn * destroy)660 static PetscErrorCode KSPFlexibleSetModifyPC_PIPEGCR(KSP ksp, KSPFlexibleModifyPCFn *function, PetscCtx ctx, PetscCtxDestroyFn *destroy)
661 {
662 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
663
664 PetscFunctionBegin;
665 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
666 pipegcr->modifypc = function;
667 pipegcr->modifypc_destroy = destroy;
668 pipegcr->modifypc_ctx = ctx;
669 PetscFunctionReturn(PETSC_SUCCESS);
670 }
671
672 /*MC
673 KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method {cite}`sananschneppmay2016`. [](sec_flexibleksp). [](sec_pipelineksp)
674
675 Options Database Keys:
676 + -ksp_pipegcr_mmax <N> - the max number of Krylov directions to orthogonalize against
677 . -ksp_pipegcr_unroll_w - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: `PETSC_TRUE`)
678 . -ksp_pipegcr_nprealloc <N> - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
679 - -ksp_pipegcr_truncation_type <standard,notay> - which previous search directions to orthogonalize against
680
681 Level: intermediate
682
683 Notes:
684 Pipelined version of `KSPGCR` that overlaps communication (global reductions) with computation (preconditioner application and matrix-vector products) to reduce the impact of communication latency on parallel performance.
685
686 The `KSPPIPEGCR` Krylov method supports non-symmetric matrices and permits the use of a preconditioner
687 which may vary from one iteration to the next. Users can define a method to vary the
688 preconditioner between iterates via `KSPFlexibleSetModifyPC()`.
689 Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
690 solution is given by the current estimate for `x` which was obtained by the last restart
691 iterations of the `KSPPIPEGCR` algorithm.
692 The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as `KSPGCR`.
693
694 Only supports left preconditioning.
695
696 The natural "norm" for this method is $(u,Au)$, where $u$ is the preconditioned residual. This norm is available at no additional computational cost, as with standard
697 `KSPCG`. Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.
698
699 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
700 See [](doc_faq_pipelined)
701
702 Contributed by:
703 Sascha M. Schnepp and Patrick Sanan
704
705 .seealso: [](ch_ksp), [](sec_flexibleksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`,
706 `KSPFlexibleSetModifyPC()`, `KSPPIPEFGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPIPEFCG`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRSetUnrollW()`, `KSPPIPEGCRSetMmax()`,
707 `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRGetMmax()`, `KSPGCR`, `KSPPIPEGCRGetUnrollW()`
708 M*/
KSPCreate_PIPEGCR(KSP ksp)709 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
710 {
711 KSP_PIPEGCR *pipegcr;
712
713 PetscFunctionBegin;
714 PetscCall(PetscNew(&pipegcr));
715 pipegcr->mmax = KSPPIPEGCR_DEFAULT_MMAX;
716 pipegcr->nprealloc = KSPPIPEGCR_DEFAULT_NPREALLOC;
717 pipegcr->nvecs = 0;
718 pipegcr->vecb = KSPPIPEGCR_DEFAULT_VECB;
719 pipegcr->nchunks = 0;
720 pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
721 pipegcr->n_restarts = 0;
722 pipegcr->unroll_w = KSPPIPEGCR_DEFAULT_UNROLL_W;
723
724 ksp->data = (void *)pipegcr;
725
726 /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
727 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
728 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 1));
729 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
730 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
731
732 ksp->ops->setup = KSPSetUp_PIPEGCR;
733 ksp->ops->solve = KSPSolve_PIPEGCR;
734 ksp->ops->reset = KSPReset_PIPEGCR;
735 ksp->ops->destroy = KSPDestroy_PIPEGCR;
736 ksp->ops->view = KSPView_PIPEGCR;
737 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
738 ksp->ops->buildsolution = KSPBuildSolutionDefault;
739 ksp->ops->buildresidual = KSPBuildResidualDefault;
740
741 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPFlexibleSetModifyPC_C", KSPFlexibleSetModifyPC_PIPEGCR));
742 PetscFunctionReturn(PETSC_SUCCESS);
743 }
744