xref: /petsc/src/ksp/ksp/impls/gcr/pipegcr/pipegcr.c (revision 65c6dbde5b77e518f6ed8bf109ce6c9ab9061e55)
1 #include <petscsys.h>
2 #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h> /*I  "petscksp.h"  I*/
3 
4 static PetscBool  cited      = PETSC_FALSE;
5 static const char citation[] = "@article{SSM2016,\n"
6                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
7                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
8                                "  journal = {SIAM Journal on Scientific Computing},\n"
9                                "  volume = {38},\n"
10                                "  number = {5},\n"
11                                "  pages = {C441-C470},\n"
12                                "  year = {2016},\n"
13                                "  doi = {10.1137/15M1049130},\n"
14                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
15                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
16                                "}\n";
17 
18 #define KSPPIPEGCR_DEFAULT_MMAX       15
19 #define KSPPIPEGCR_DEFAULT_NPREALLOC  5
20 #define KSPPIPEGCR_DEFAULT_VECB       5
21 #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
22 #define KSPPIPEGCR_DEFAULT_UNROLL_W   PETSC_TRUE
23 
24 #include <petscksp.h>
25 
KSPAllocateVectors_PIPEGCR(KSP ksp,PetscInt nvecsneeded,PetscInt chunksize)26 static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
27 {
28   PetscInt     i;
29   KSP_PIPEGCR *pipegcr;
30   PetscInt     nnewvecs, nvecsprev;
31 
32   PetscFunctionBegin;
33   pipegcr = (KSP_PIPEGCR *)ksp->data;
34 
35   /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
36   if (pipegcr->nvecs < PetscMin(pipegcr->mmax + 1, nvecsneeded)) {
37     nvecsprev = pipegcr->nvecs;
38     nnewvecs  = PetscMin(PetscMax(nvecsneeded - pipegcr->nvecs, chunksize), pipegcr->mmax + 1 - pipegcr->nvecs);
39     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ppvecs[pipegcr->nchunks], 0, NULL));
40     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->psvecs[pipegcr->nchunks], 0, NULL));
41     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->pqvecs[pipegcr->nchunks], 0, NULL));
42     if (pipegcr->unroll_w) PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ptvecs[pipegcr->nchunks], 0, NULL));
43     pipegcr->nvecs += nnewvecs;
44     for (i = 0; i < nnewvecs; i++) {
45       pipegcr->qvecs[nvecsprev + i] = pipegcr->pqvecs[pipegcr->nchunks][i];
46       pipegcr->pvecs[nvecsprev + i] = pipegcr->ppvecs[pipegcr->nchunks][i];
47       pipegcr->svecs[nvecsprev + i] = pipegcr->psvecs[pipegcr->nchunks][i];
48       if (pipegcr->unroll_w) pipegcr->tvecs[nvecsprev + i] = pipegcr->ptvecs[pipegcr->nchunks][i];
49     }
50     pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
51     pipegcr->nchunks++;
52   }
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
KSPSolve_PIPEGCR_cycle(KSP ksp)56 static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
57 {
58   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
59   Mat          A, B;
60   Vec          x, r, b, z, w, m, n, p, s, q, t, *redux;
61   PetscInt     i, j, k, idx, kdx, mi;
62   PetscScalar  alpha = 0.0, gamma, *betas, *dots;
63   PetscReal    rnorm = 0.0, delta, *eta, *etas;
64 
65   PetscFunctionBegin;
66   /* !!PS We have not checked these routines for use with complex numbers. The inner products
67      are likely not defined correctly for that case */
68   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
69 
70   PetscCall(KSPGetOperators(ksp, &A, &B));
71   x = ksp->vec_sol;
72   b = ksp->vec_rhs;
73   r = ksp->work[0];
74   z = ksp->work[1];
75   w = ksp->work[2]; /* w = Az = AB(r)                 (pipelining intermediate) */
76   m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r))    (pipelining intermediate) */
77   n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
78   p = pipegcr->pvecs[0];
79   s = pipegcr->svecs[0];
80   q = pipegcr->qvecs[0];
81   t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;
82 
83   redux = pipegcr->redux;
84   dots  = pipegcr->dots;
85   etas  = pipegcr->etas;
86   betas = dots; /* dots takes the result of all dot products of which the betas are a subset */
87 
88   /* cycle initial residual */
89   PetscCall(KSP_MatMult(ksp, A, x, r));
90   PetscCall(VecAYPX(r, -1.0, b));       /* r <- b - Ax */
91   PetscCall(KSP_PCApply(ksp, r, z));    /* z <- B(r)   */
92   PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az     */
93 
94   /* initialization of other variables and pipelining intermediates */
95   PetscCall(VecCopy(z, p));
96   PetscCall(KSP_MatMult(ksp, A, p, s));
97 
98   /* overlap initial computation of delta, gamma */
99   redux[0] = w;
100   redux[1] = r;
101   PetscCall(VecMDotBegin(w, 2, redux, dots));                               /* Start split reductions for gamma = (w,r), delta = (w,w) */
102   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s))); /* perform asynchronous reduction */
103   PetscCall(KSP_PCApply(ksp, s, q));                                        /* q = B(s) */
104   if (pipegcr->unroll_w) PetscCall(KSP_MatMult(ksp, A, q, t));              /* t = Aq   */
105   PetscCall(VecMDotEnd(w, 2, redux, dots));                                 /* Finish split reduction */
106   delta   = PetscRealPart(dots[0]);
107   etas[0] = delta;
108   gamma   = dots[1];
109   alpha   = gamma / delta;
110 
111   i = 0;
112   do {
113     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
114     ksp->its++;
115     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
116 
117     /* update solution, residuals, .. */
118     PetscCall(VecAXPY(x, +alpha, p));
119     PetscCall(VecAXPY(r, -alpha, s));
120     PetscCall(VecAXPY(z, -alpha, q));
121     if (pipegcr->unroll_w) {
122       PetscCall(VecAXPY(w, -alpha, t));
123     } else {
124       PetscCall(KSP_MatMult(ksp, A, z, w));
125     }
126 
127     /* Computations of current iteration done */
128     i++;
129 
130     if (pipegcr->modifypc) PetscCall((*pipegcr->modifypc)(ksp, ksp->its, 0 /* unused argument */, ksp->rnorm, pipegcr->modifypc_ctx));
131 
132     /* If needbe, allocate a new chunk of vectors */
133     PetscCall(KSPAllocateVectors_PIPEGCR(ksp, i + 1, pipegcr->vecb));
134 
135     /* Note that we wrap around and start clobbering old vectors */
136     idx = i % (pipegcr->mmax + 1);
137     p   = pipegcr->pvecs[idx];
138     s   = pipegcr->svecs[idx];
139     q   = pipegcr->qvecs[idx];
140     if (pipegcr->unroll_w) t = pipegcr->tvecs[idx];
141     eta = pipegcr->etas + idx;
142 
143     /* number of old directions to orthogonalize against */
144     switch (pipegcr->truncstrat) {
145     case KSP_FCD_TRUNC_TYPE_STANDARD:
146       mi = pipegcr->mmax;
147       break;
148     case KSP_FCD_TRUNC_TYPE_NOTAY:
149       mi = ((i - 1) % pipegcr->mmax) + 1;
150       break;
151     default:
152       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
153     }
154 
155     /* Pick old p,s,q,zeta in a way suitable for VecMDot */
156     for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
157       kdx              = k % (pipegcr->mmax + 1);
158       pipegcr->pold[j] = pipegcr->pvecs[kdx];
159       pipegcr->sold[j] = pipegcr->svecs[kdx];
160       pipegcr->qold[j] = pipegcr->qvecs[kdx];
161       if (pipegcr->unroll_w) pipegcr->told[j] = pipegcr->tvecs[kdx];
162       redux[j] = pipegcr->svecs[kdx];
163     }
164     /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
165     redux[j]     = r;
166     redux[j + 1] = w;
167 
168     /* Dot products */
169     /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
170     PetscCall(VecMDotBegin(w, j + 2, redux, dots));
171     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w)));
172 
173     /* B(w-r) + u stabilization */
174     PetscCall(VecWAXPY(n, -1.0, r, w));                          /* m = u + B(w-r): (a) ntmp = w-r              */
175     PetscCall(KSP_PCApply(ksp, n, m));                           /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
176     PetscCall(VecAXPY(m, 1.0, z));                               /* m = u + B(w-r): (c) m = z + mtmp            */
177     if (pipegcr->unroll_w) PetscCall(KSP_MatMult(ksp, A, m, n)); /* n = Am                                      */
178 
179     /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
180     PetscCall(VecMDotEnd(w, j + 2, redux, dots));
181     gamma = dots[j];
182     delta = PetscRealPart(dots[j + 1]);
183 
184     /* compute new residual norm.
185        this cannot be done before this point so that the natural norm
186        is available for free and the communication involved is overlapped */
187     switch (ksp->normtype) {
188     case KSP_NORM_PRECONDITIONED:
189       PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
190       break;
191     case KSP_NORM_UNPRECONDITIONED:
192       PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
193       break;
194     case KSP_NORM_NATURAL:
195       rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w)  */
196       break;
197     case KSP_NORM_NONE:
198       rnorm = 0.0;
199       break;
200     default:
201       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
202     }
203 
204     /* Check for convergence */
205     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
206     ksp->rnorm = rnorm;
207     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
208     PetscCall(KSPLogResidualHistory(ksp, rnorm));
209     PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
210     PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
211     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
212 
213     /* compute new eta and scale beta */
214     *eta = 0.;
215     for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
216       kdx = k % (pipegcr->mmax + 1);
217       betas[j] /= -etas[kdx]; /* betak  /= etak */
218       *eta -= PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]) * etas[kdx];
219       /* etaitmp = -betaik^2 * etak */
220     }
221     *eta += delta; /* etai    = delta -betaik^2 * etak */
222 
223     /* check breakdown of eta = (s,s) */
224     if (*eta < 0.) {
225       pipegcr->norm_breakdown = PETSC_TRUE;
226       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
227       break;
228     } else {
229       alpha = gamma / (*eta); /* alpha = gamma/etai */
230     }
231 
232     /* project out stored search directions using classical G-S */
233     PetscCall(VecCopy(z, p));
234     PetscCall(VecCopy(w, s));
235     PetscCall(VecCopy(m, q));
236     if (pipegcr->unroll_w) {
237       PetscCall(VecCopy(n, t));
238       PetscCall(VecMAXPY(t, j, betas, pipegcr->told)); /* ti <- n  - sum_k beta_k t_k */
239     }
240     PetscCall(VecMAXPY(p, j, betas, pipegcr->pold)); /* pi <- ui - sum_k beta_k p_k */
241     PetscCall(VecMAXPY(s, j, betas, pipegcr->sold)); /* si <- wi - sum_k beta_k s_k */
242     PetscCall(VecMAXPY(q, j, betas, pipegcr->qold)); /* qi <- m  - sum_k beta_k q_k */
243 
244   } while (ksp->its < ksp->max_it);
245   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
246   PetscFunctionReturn(PETSC_SUCCESS);
247 }
248 
KSPSolve_PIPEGCR(KSP ksp)249 static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
250 {
251   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
252   Mat          A, B;
253   Vec          x, b, r, z, w;
254   PetscScalar  gamma;
255   PetscReal    rnorm = 0.0;
256   PetscBool    issym;
257 
258   PetscFunctionBegin;
259   PetscCall(PetscCitationsRegister(citation, &cited));
260 
261   PetscCall(KSPGetOperators(ksp, &A, &B));
262   x = ksp->vec_sol;
263   b = ksp->vec_rhs;
264   r = ksp->work[0];
265   z = ksp->work[1];
266   w = ksp->work[2]; /* w = Az = AB(r)                 (pipelining intermediate) */
267 
268   /* compute initial residual */
269   if (!ksp->guess_zero) {
270     PetscCall(KSP_MatMult(ksp, A, x, r));
271     PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax       */
272   } else {
273     PetscCall(VecCopy(b, r)); /* r <- b            */
274   }
275 
276   /* initial residual norm */
277   PetscCall(KSP_PCApply(ksp, r, z));    /* z <- B(r)         */
278   PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az           */
279   PetscCall(VecDot(r, w, &gamma));      /* gamma = (r,w)     */
280 
281   switch (ksp->normtype) {
282   case KSP_NORM_PRECONDITIONED:
283     PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
284     break;
285   case KSP_NORM_UNPRECONDITIONED:
286     PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
287     break;
288   case KSP_NORM_NATURAL:
289     rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w)  */
290     break;
291   case KSP_NORM_NONE:
292     rnorm = 0.0;
293     break;
294   default:
295     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
296   }
297 
298   /* Is A symmetric? */
299   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &issym, MATSBAIJ, MATSEQSBAIJ, MATMPISBAIJ, ""));
300   if (!issym) PetscCall(PetscInfo(A, "Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?\n"));
301 
302   /* logging */
303   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
304   ksp->its    = 0;
305   ksp->rnorm0 = rnorm;
306   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
307   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm0));
308   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm0));
309   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm0, &ksp->reason, ksp->cnvP));
310   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
311 
312   do {
313     PetscCall(KSPSolve_PIPEGCR_cycle(ksp));
314     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
315     if (pipegcr->norm_breakdown) {
316       pipegcr->n_restarts++;
317       pipegcr->norm_breakdown = PETSC_FALSE;
318     }
319   } while (ksp->its < ksp->max_it);
320 
321   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
KSPView_PIPEGCR(KSP ksp,PetscViewer viewer)325 static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
326 {
327   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
328   PetscBool    isascii, isstring;
329   const char  *truncstr;
330 
331   PetscFunctionBegin;
332   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
333   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
334 
335   if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
336     truncstr = "Using standard truncation strategy";
337   } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
338     truncstr = "Using Notay's truncation strategy";
339   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
340 
341   if (isascii) {
342     PetscCall(PetscViewerASCIIPrintf(viewer, "  max previous directions = %" PetscInt_FMT "\n", pipegcr->mmax));
343     PetscCall(PetscViewerASCIIPrintf(viewer, "  preallocated %" PetscInt_FMT " directions\n", PetscMin(pipegcr->nprealloc, pipegcr->mmax + 1)));
344     PetscCall(PetscViewerASCIIPrintf(viewer, "  %s\n", truncstr));
345     PetscCall(PetscViewerASCIIPrintf(viewer, "  w unrolling = %s \n", PetscBools[pipegcr->unroll_w]));
346     PetscCall(PetscViewerASCIIPrintf(viewer, "  restarts performed = %" PetscInt_FMT " \n", pipegcr->n_restarts));
347   } else if (isstring) {
348     PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipegcr->mmax, pipegcr->nprealloc, truncstr));
349   }
350   PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 
KSPSetUp_PIPEGCR(KSP ksp)353 static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
354 {
355   KSP_PIPEGCR   *pipegcr = (KSP_PIPEGCR *)ksp->data;
356   Mat            A;
357   PetscBool      diagonalscale;
358   const PetscInt nworkstd = 5;
359 
360   PetscFunctionBegin;
361   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
362   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
363 
364   PetscCall(KSPGetOperators(ksp, &A, NULL));
365 
366   /* Allocate "standard" work vectors */
367   PetscCall(KSPSetWorkVecs(ksp, nworkstd));
368 
369   /* Allocated space for pointers to additional work vectors
370     note that mmax is the number of previous directions, so we add 1 for the current direction */
371   PetscCall(PetscMalloc6(pipegcr->mmax + 1, &pipegcr->pvecs, pipegcr->mmax + 1, &pipegcr->ppvecs, pipegcr->mmax + 1, &pipegcr->svecs, pipegcr->mmax + 1, &pipegcr->psvecs, pipegcr->mmax + 1, &pipegcr->qvecs, pipegcr->mmax + 1, &pipegcr->pqvecs));
372   if (pipegcr->unroll_w) PetscCall(PetscMalloc3(pipegcr->mmax + 1, &pipegcr->tvecs, pipegcr->mmax + 1, &pipegcr->ptvecs, pipegcr->mmax + 2, &pipegcr->told));
373   PetscCall(PetscMalloc4(pipegcr->mmax + 2, &pipegcr->pold, pipegcr->mmax + 2, &pipegcr->sold, pipegcr->mmax + 2, &pipegcr->qold, pipegcr->mmax + 2, &pipegcr->chunksizes));
374   PetscCall(PetscMalloc3(pipegcr->mmax + 2, &pipegcr->dots, pipegcr->mmax + 1, &pipegcr->etas, pipegcr->mmax + 2, &pipegcr->redux));
375   /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
376   if (pipegcr->nprealloc > pipegcr->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipegcr->nprealloc, pipegcr->mmax + 1));
377 
378   /* Preallocate additional work vectors */
379   PetscCall(KSPAllocateVectors_PIPEGCR(ksp, pipegcr->nprealloc, pipegcr->nprealloc));
380   PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 
KSPReset_PIPEGCR(KSP ksp)383 static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
384 {
385   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
386 
387   PetscFunctionBegin;
388   if (pipegcr->modifypc_destroy) PetscCall((*pipegcr->modifypc_destroy)(&pipegcr->modifypc_ctx));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
KSPDestroy_PIPEGCR(KSP ksp)392 static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
393 {
394   PetscInt     i;
395   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
396 
397   PetscFunctionBegin;
398   PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work)); /* Destroy "standard" work vecs */
399 
400   /* Destroy vectors for old directions and the arrays that manage pointers to them */
401   if (pipegcr->nvecs) {
402     for (i = 0; i < pipegcr->nchunks; i++) {
403       PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ppvecs[i]));
404       PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->psvecs[i]));
405       PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->pqvecs[i]));
406       if (pipegcr->unroll_w) PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ptvecs[i]));
407     }
408   }
409 
410   PetscCall(PetscFree6(pipegcr->pvecs, pipegcr->ppvecs, pipegcr->svecs, pipegcr->psvecs, pipegcr->qvecs, pipegcr->pqvecs));
411   PetscCall(PetscFree4(pipegcr->pold, pipegcr->sold, pipegcr->qold, pipegcr->chunksizes));
412   PetscCall(PetscFree3(pipegcr->dots, pipegcr->etas, pipegcr->redux));
413   if (pipegcr->unroll_w) PetscCall(PetscFree3(pipegcr->tvecs, pipegcr->ptvecs, pipegcr->told));
414 
415   PetscCall(KSPReset_PIPEGCR(ksp));
416   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPFlexibleSetModifyPC_C", NULL));
417   PetscCall(KSPDestroyDefault(ksp));
418   PetscFunctionReturn(PETSC_SUCCESS);
419 }
420 
421 /*@
422   KSPPIPEGCRSetUnrollW - Set to `PETSC_TRUE` to use `KSPPIPEGCR` with unrolling of the w vector
423 
424   Logically Collective
425 
426   Input Parameters:
427 + ksp      - the Krylov space context
428 - unroll_w - use unrolling
429 
430   Level: intermediate
431 
432   Options Database Key:
433 . -ksp_pipegcr_unroll_w <bool> - use unrolling
434 
435 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRGetUnrollW()`
436 @*/
KSPPIPEGCRSetUnrollW(KSP ksp,PetscBool unroll_w)437 PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp, PetscBool unroll_w)
438 {
439   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
440 
441   PetscFunctionBegin;
442   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
443   PetscValidLogicalCollectiveBool(ksp, unroll_w, 2);
444   pipegcr->unroll_w = unroll_w;
445   PetscFunctionReturn(PETSC_SUCCESS);
446 }
447 
448 /*@
449   KSPPIPEGCRGetUnrollW - Get information on `KSPPIPEGCR` if it uses unrolling the w vector
450 
451   Logically Collective
452 
453   Input Parameter:
454 . ksp - the Krylov space context
455 
456   Output Parameter:
457 . unroll_w - `KSPPIPEGCR` uses unrolling (bool)
458 
459   Level: intermediate
460 
461 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetUnrollW()`
462 @*/
KSPPIPEGCRGetUnrollW(KSP ksp,PetscBool * unroll_w)463 PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp, PetscBool *unroll_w)
464 {
465   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
466 
467   PetscFunctionBegin;
468   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
469   *unroll_w = pipegcr->unroll_w;
470   PetscFunctionReturn(PETSC_SUCCESS);
471 }
472 
473 /*@
474   KSPPIPEGCRSetMmax - set the maximum number of previous directions `KSPPIPEGCR` will store for orthogonalization
475 
476   Logically Collective
477 
478   Input Parameters:
479 + ksp  - the Krylov space context
480 - mmax - the maximum number of previous directions to orthogonalize against
481 
482   Options Database Key:
483 . -ksp_pipegcr_mmax <mmax> - maximum number of previous directions
484 
485   Level: intermediate
486 
487   Note:
488   `mmax` + 1 directions are stored (`mmax` previous ones along with a current one)
489   and whether all are used in each iteration also depends on the truncation strategy
490   (see `KSPPIPEGCRSetTruncationType`)
491 
492 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
493 @*/
KSPPIPEGCRSetMmax(KSP ksp,PetscInt mmax)494 PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp, PetscInt mmax)
495 {
496   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
497 
498   PetscFunctionBegin;
499   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
500   PetscValidLogicalCollectiveInt(ksp, mmax, 2);
501   pipegcr->mmax = mmax;
502   PetscFunctionReturn(PETSC_SUCCESS);
503 }
504 
505 /*@
506   KSPPIPEGCRGetMmax - get the maximum number of previous directions `KSPPIPEGCR` will store
507 
508   Not Collective
509 
510   Input Parameter:
511 . ksp - the Krylov space context
512 
513   Output Parameter:
514 . mmax - the maximum number of previous directions allowed for orthogonalization
515 
516   Level: intermediate
517 
518 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetMmax()`
519 @*/
KSPPIPEGCRGetMmax(KSP ksp,PetscInt * mmax)520 PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp, PetscInt *mmax)
521 {
522   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
523 
524   PetscFunctionBegin;
525   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
526   *mmax = pipegcr->mmax;
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 /*@
531   KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with `KSPPIPEGCR`
532 
533   Logically Collective
534 
535   Input Parameters:
536 + ksp       - the Krylov space context
537 - nprealloc - the number of vectors to preallocate
538 
539   Level: advanced
540 
541   Options Database Key:
542 . -ksp_pipegcr_nprealloc <N> - number of vectors to preallocate
543 
544 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`
545 @*/
KSPPIPEGCRSetNprealloc(KSP ksp,PetscInt nprealloc)546 PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp, PetscInt nprealloc)
547 {
548   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
549 
550   PetscFunctionBegin;
551   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
552   PetscValidLogicalCollectiveInt(ksp, nprealloc, 2);
553   pipegcr->nprealloc = nprealloc;
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 /*@
558   KSPPIPEGCRGetNprealloc - get the number of directions preallocate by `KSPPIPEGCR`
559 
560   Not Collective
561 
562   Input Parameter:
563 . ksp - the Krylov space context
564 
565   Output Parameter:
566 . nprealloc - the number of directions preallocated
567 
568   Level: advanced
569 
570 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
571 @*/
KSPPIPEGCRGetNprealloc(KSP ksp,PetscInt * nprealloc)572 PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp, PetscInt *nprealloc)
573 {
574   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
575 
576   PetscFunctionBegin;
577   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
578   *nprealloc = pipegcr->nprealloc;
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
582 /*@
583   KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions `KSPPIPEGCR` uses during orthogonalization
584 
585   Logically Collective
586 
587   Input Parameters:
588 + ksp        - the Krylov space context
589 - truncstrat - the choice of strategy
590 .vb
591   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
592   KSP_FCD_TRUNC_TYPE_NOTAY uses the last `max(1,mod(i,mmax))` directions at iteration i = 0, 1, ..
593 .ve
594 
595   Options Database Key:
596 . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
597 
598   Level: intermediate
599 
600 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPFCDTruncationType`, `KSPPIPEGCRGetTruncationType()`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
601 @*/
KSPPIPEGCRSetTruncationType(KSP ksp,KSPFCDTruncationType truncstrat)602 PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
603 {
604   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
605 
606   PetscFunctionBegin;
607   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
608   PetscValidLogicalCollectiveEnum(ksp, truncstrat, 2);
609   pipegcr->truncstrat = truncstrat;
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 /*@
614   KSPPIPEGCRGetTruncationType - get the truncation strategy employed by `KSPPIPEGCR`
615 
616   Not Collective
617 
618   Input Parameter:
619 . ksp - the Krylov space context
620 
621   Output Parameter:
622 . truncstrat - the strategy type
623 .vb
624   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
625   KSP_FCD_TRUNC_TYPE_NOTAY uses the last `max(1,mod(i,mmax))` directions at iteration i =0, 1, ..
626 .ve
627 
628   Level: intermediate
629 
630 .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
631 @*/
KSPPIPEGCRGetTruncationType(KSP ksp,KSPFCDTruncationType * truncstrat)632 PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
633 {
634   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
635 
636   PetscFunctionBegin;
637   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
638   *truncstrat = pipegcr->truncstrat;
639   PetscFunctionReturn(PETSC_SUCCESS);
640 }
641 
KSPSetFromOptions_PIPEGCR(KSP ksp,PetscOptionItems PetscOptionsObject)642 static PetscErrorCode KSPSetFromOptions_PIPEGCR(KSP ksp, PetscOptionItems PetscOptionsObject)
643 {
644   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
645   PetscInt     mmax, nprealloc;
646   PetscBool    flg;
647 
648   PetscFunctionBegin;
649   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEGCR options");
650   PetscCall(PetscOptionsInt("-ksp_pipegcr_mmax", "Number of search directions to storue", "KSPPIPEGCRSetMmax", pipegcr->mmax, &mmax, &flg));
651   if (flg) PetscCall(KSPPIPEGCRSetMmax(ksp, mmax));
652   PetscCall(PetscOptionsInt("-ksp_pipegcr_nprealloc", "Number of directions to preallocate", "KSPPIPEGCRSetNprealloc", pipegcr->nprealloc, &nprealloc, &flg));
653   if (flg) PetscCall(KSPPIPEGCRSetNprealloc(ksp, nprealloc));
654   PetscCall(PetscOptionsEnum("-ksp_pipegcr_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipegcr->truncstrat, (PetscEnum *)&pipegcr->truncstrat, NULL));
655   PetscCall(PetscOptionsBool("-ksp_pipegcr_unroll_w", "Use unrolling of w", "KSPPIPEGCRSetUnrollW", pipegcr->unroll_w, &pipegcr->unroll_w, NULL));
656   PetscOptionsHeadEnd();
657   PetscFunctionReturn(PETSC_SUCCESS);
658 }
659 
KSPFlexibleSetModifyPC_PIPEGCR(KSP ksp,KSPFlexibleModifyPCFn * function,PetscCtx ctx,PetscCtxDestroyFn * destroy)660 static PetscErrorCode KSPFlexibleSetModifyPC_PIPEGCR(KSP ksp, KSPFlexibleModifyPCFn *function, PetscCtx ctx, PetscCtxDestroyFn *destroy)
661 {
662   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
663 
664   PetscFunctionBegin;
665   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
666   pipegcr->modifypc         = function;
667   pipegcr->modifypc_destroy = destroy;
668   pipegcr->modifypc_ctx     = ctx;
669   PetscFunctionReturn(PETSC_SUCCESS);
670 }
671 
672 /*MC
673   KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method {cite}`sananschneppmay2016`. [](sec_flexibleksp). [](sec_pipelineksp)
674 
675   Options Database Keys:
676 +   -ksp_pipegcr_mmax <N>                         - the max number of Krylov directions to orthogonalize against
677 .   -ksp_pipegcr_unroll_w                         - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: `PETSC_TRUE`)
678 .   -ksp_pipegcr_nprealloc <N>                    - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
679 -   -ksp_pipegcr_truncation_type <standard,notay> - which previous search directions to orthogonalize against
680 
681   Level: intermediate
682 
683   Notes:
684   Pipelined version of `KSPGCR` that overlaps communication (global reductions) with computation (preconditioner application and matrix-vector products) to reduce the impact of communication latency on parallel performance.
685 
686   The `KSPPIPEGCR` Krylov method supports non-symmetric matrices and permits the use of a preconditioner
687   which may vary from one iteration to the next. Users can define a method to vary the
688   preconditioner between iterates via `KSPFlexibleSetModifyPC()`.
689   Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
690   solution is given by the current estimate for `x` which was obtained by the last restart
691   iterations of the `KSPPIPEGCR` algorithm.
692   The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as `KSPGCR`.
693 
694   Only supports left preconditioning.
695 
696   The natural "norm" for this method is $(u,Au)$, where $u$ is the preconditioned residual. This norm is available at no additional computational cost, as with standard
697   `KSPCG`.  Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.
698 
699   MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
700   See [](doc_faq_pipelined)
701 
702   Contributed by:
703   Sascha M. Schnepp and Patrick Sanan
704 
705 .seealso: [](ch_ksp), [](sec_flexibleksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`,
706           `KSPFlexibleSetModifyPC()`, `KSPPIPEFGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPIPEFCG`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRSetUnrollW()`, `KSPPIPEGCRSetMmax()`,
707           `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRGetMmax()`, `KSPGCR`, `KSPPIPEGCRGetUnrollW()`
708 M*/
KSPCreate_PIPEGCR(KSP ksp)709 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
710 {
711   KSP_PIPEGCR *pipegcr;
712 
713   PetscFunctionBegin;
714   PetscCall(PetscNew(&pipegcr));
715   pipegcr->mmax       = KSPPIPEGCR_DEFAULT_MMAX;
716   pipegcr->nprealloc  = KSPPIPEGCR_DEFAULT_NPREALLOC;
717   pipegcr->nvecs      = 0;
718   pipegcr->vecb       = KSPPIPEGCR_DEFAULT_VECB;
719   pipegcr->nchunks    = 0;
720   pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
721   pipegcr->n_restarts = 0;
722   pipegcr->unroll_w   = KSPPIPEGCR_DEFAULT_UNROLL_W;
723 
724   ksp->data = (void *)pipegcr;
725 
726   /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
727   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
728   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 1));
729   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
730   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
731 
732   ksp->ops->setup          = KSPSetUp_PIPEGCR;
733   ksp->ops->solve          = KSPSolve_PIPEGCR;
734   ksp->ops->reset          = KSPReset_PIPEGCR;
735   ksp->ops->destroy        = KSPDestroy_PIPEGCR;
736   ksp->ops->view           = KSPView_PIPEGCR;
737   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
738   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
739   ksp->ops->buildresidual  = KSPBuildResidualDefault;
740 
741   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPFlexibleSetModifyPC_C", KSPFlexibleSetModifyPC_PIPEGCR));
742   PetscFunctionReturn(PETSC_SUCCESS);
743 }
744