xref: /petsc/src/ksp/ksp/impls/fcg/pipefcg/pipefcg.c (revision 65c6dbde5b77e518f6ed8bf109ce6c9ab9061e55)
1 #include <../src/ksp/ksp/impls/fcg/pipefcg/pipefcgimpl.h> /*I  "petscksp.h"  I*/
2 
3 static PetscBool  cited      = PETSC_FALSE;
4 static const char citation[] = "@article{SSM2016,\n"
5                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
6                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
7                                "  journal = {SIAM Journal on Scientific Computing},\n"
8                                "  volume = {38},\n"
9                                "  number = {5},\n"
10                                "  pages = {C441-C470},\n"
11                                "  year = {2016},\n"
12                                "  doi = {10.1137/15M1049130},\n"
13                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
14                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
15                                "}\n";
16 
17 #define KSPPIPEFCG_DEFAULT_MMAX       15
18 #define KSPPIPEFCG_DEFAULT_NPREALLOC  5
19 #define KSPPIPEFCG_DEFAULT_VECB       5
20 #define KSPPIPEFCG_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
21 
KSPAllocateVectors_PIPEFCG(KSP ksp,PetscInt nvecsneeded,PetscInt chunksize)22 static PetscErrorCode KSPAllocateVectors_PIPEFCG(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
23 {
24   PetscInt     i;
25   KSP_PIPEFCG *pipefcg;
26   PetscInt     nnewvecs, nvecsprev;
27 
28   PetscFunctionBegin;
29   pipefcg = (KSP_PIPEFCG *)ksp->data;
30 
31   /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
32   if (pipefcg->nvecs < PetscMin(pipefcg->mmax + 1, nvecsneeded)) {
33     nvecsprev = pipefcg->nvecs;
34     nnewvecs  = PetscMin(PetscMax(nvecsneeded - pipefcg->nvecs, chunksize), pipefcg->mmax + 1 - pipefcg->nvecs);
35     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pQvecs[pipefcg->nchunks], 0, NULL));
36     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pZETAvecs[pipefcg->nchunks], 0, NULL));
37     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pPvecs[pipefcg->nchunks], 0, NULL));
38     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pSvecs[pipefcg->nchunks], 0, NULL));
39     pipefcg->nvecs += nnewvecs;
40     for (i = 0; i < nnewvecs; ++i) {
41       pipefcg->Qvecs[nvecsprev + i]    = pipefcg->pQvecs[pipefcg->nchunks][i];
42       pipefcg->ZETAvecs[nvecsprev + i] = pipefcg->pZETAvecs[pipefcg->nchunks][i];
43       pipefcg->Pvecs[nvecsprev + i]    = pipefcg->pPvecs[pipefcg->nchunks][i];
44       pipefcg->Svecs[nvecsprev + i]    = pipefcg->pSvecs[pipefcg->nchunks][i];
45     }
46     pipefcg->chunksizes[pipefcg->nchunks] = nnewvecs;
47     ++pipefcg->nchunks;
48   }
49   PetscFunctionReturn(PETSC_SUCCESS);
50 }
51 
KSPSetUp_PIPEFCG(KSP ksp)52 static PetscErrorCode KSPSetUp_PIPEFCG(KSP ksp)
53 {
54   KSP_PIPEFCG   *pipefcg;
55   const PetscInt nworkstd = 5;
56 
57   PetscFunctionBegin;
58   pipefcg = (KSP_PIPEFCG *)ksp->data;
59 
60   /* Allocate "standard" work vectors (not including the basis and transformed basis vectors) */
61   PetscCall(KSPSetWorkVecs(ksp, nworkstd));
62 
63   /* Allocated space for pointers to additional work vectors
64    note that mmax is the number of previous directions, so we add 1 for the current direction,
65    and an extra 1 for the prealloc (which might be empty) */
66   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &pipefcg->Pvecs, pipefcg->mmax + 1, &pipefcg->pPvecs, pipefcg->mmax + 1, &pipefcg->Svecs, pipefcg->mmax + 1, &pipefcg->pSvecs));
67   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &pipefcg->Qvecs, pipefcg->mmax + 1, &pipefcg->pQvecs, pipefcg->mmax + 1, &pipefcg->ZETAvecs, pipefcg->mmax + 1, &pipefcg->pZETAvecs));
68   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &pipefcg->Pold, pipefcg->mmax + 1, &pipefcg->Sold, pipefcg->mmax + 1, &pipefcg->Qold, pipefcg->mmax + 1, &pipefcg->ZETAold));
69   PetscCall(PetscMalloc1(pipefcg->mmax + 1, &pipefcg->chunksizes));
70   PetscCall(PetscMalloc3(pipefcg->mmax + 2, &pipefcg->dots, pipefcg->mmax + 1, &pipefcg->etas, pipefcg->mmax + 2, &pipefcg->redux));
71 
72   /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
73   if (pipefcg->nprealloc > pipefcg->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipefcg->nprealloc, pipefcg->mmax + 1));
74 
75   /* Preallocate additional work vectors */
76   PetscCall(KSPAllocateVectors_PIPEFCG(ksp, pipefcg->nprealloc, pipefcg->nprealloc));
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
KSPSolve_PIPEFCG_cycle(KSP ksp)80 static PetscErrorCode KSPSolve_PIPEFCG_cycle(KSP ksp)
81 {
82   PetscInt     i, j, k, idx, kdx, mi;
83   KSP_PIPEFCG *pipefcg;
84   PetscScalar  alpha = 0.0, gamma, *betas, *dots;
85   PetscReal    dp    = 0.0, delta, *eta, *etas;
86   Vec          B, R, Z, X, Qcurr, W, ZETAcurr, M, N, Pcurr, Scurr, *redux;
87   Mat          Amat, Pmat;
88 
89   PetscFunctionBegin;
90   /* We have not checked these routines for use with complex numbers. The inner products
91      are likely not defined correctly for that case */
92   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
93 
94 #define VecXDot(x, y, a)          (pipefcg->type == KSP_CG_HERMITIAN ? VecDot(x, y, a) : VecTDot(x, y, a))
95 #define VecXDotBegin(x, y, a)     (pipefcg->type == KSP_CG_HERMITIAN ? VecDotBegin(x, y, a) : VecTDotBegin(x, y, a))
96 #define VecXDotEnd(x, y, a)       (pipefcg->type == KSP_CG_HERMITIAN ? VecDotEnd(x, y, a) : VecTDotEnd(x, y, a))
97 #define VecMXDot(x, n, y, a)      (pipefcg->type == KSP_CG_HERMITIAN ? VecMDot(x, n, y, a) : VecMTDot(x, n, y, a))
98 #define VecMXDotBegin(x, n, y, a) (pipefcg->type == KSP_CG_HERMITIAN ? VecMDotBegin(x, n, y, a) : VecMTDotBegin(x, n, y, a))
99 #define VecMXDotEnd(x, n, y, a)   (pipefcg->type == KSP_CG_HERMITIAN ? VecMDotEnd(x, n, y, a) : VecMTDotEnd(x, n, y, a))
100 
101   pipefcg = (KSP_PIPEFCG *)ksp->data;
102   X       = ksp->vec_sol;
103   B       = ksp->vec_rhs;
104   R       = ksp->work[0];
105   Z       = ksp->work[1];
106   W       = ksp->work[2];
107   M       = ksp->work[3];
108   N       = ksp->work[4];
109 
110   redux = pipefcg->redux;
111   dots  = pipefcg->dots;
112   etas  = pipefcg->etas;
113   betas = dots; /* dots takes the result of all dot products of which the betas are a subset */
114 
115   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
116 
117   /* Compute cycle initial residual */
118   PetscCall(KSP_MatMult(ksp, Amat, X, R));
119   PetscCall(VecAYPX(R, -1.0, B));    /* r <- b - Ax */
120   PetscCall(KSP_PCApply(ksp, R, Z)); /* z <- Br     */
121 
122   Pcurr    = pipefcg->Pvecs[0];
123   Scurr    = pipefcg->Svecs[0];
124   Qcurr    = pipefcg->Qvecs[0];
125   ZETAcurr = pipefcg->ZETAvecs[0];
126   PetscCall(VecCopy(Z, Pcurr));
127   PetscCall(KSP_MatMult(ksp, Amat, Pcurr, Scurr)); /* S = Ap     */
128   PetscCall(VecCopy(Scurr, W));                    /* w = s = Az */
129 
130   /* Initial state of pipelining intermediates */
131   redux[0] = R;
132   redux[1] = W;
133   PetscCall(VecMXDotBegin(Z, 2, redux, dots));
134   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z))); /* perform asynchronous reduction */
135   PetscCall(KSP_PCApply(ksp, W, M));                                        /* m = B(w) */
136   PetscCall(KSP_MatMult(ksp, Amat, M, N));                                  /* n = Am   */
137   PetscCall(VecCopy(M, Qcurr));                                             /* q = m    */
138   PetscCall(VecCopy(N, ZETAcurr));                                          /* zeta = n */
139   PetscCall(VecMXDotEnd(Z, 2, redux, dots));
140   gamma   = dots[0];
141   delta   = PetscRealPart(dots[1]);
142   etas[0] = delta;
143   alpha   = gamma / delta;
144 
145   i = 0;
146   do {
147     ksp->its++;
148 
149     /* Update X, R, Z, W */
150     PetscCall(VecAXPY(X, +alpha, Pcurr));    /* x <- x + alpha * pi    */
151     PetscCall(VecAXPY(R, -alpha, Scurr));    /* r <- r - alpha * si    */
152     PetscCall(VecAXPY(Z, -alpha, Qcurr));    /* z <- z - alpha * qi    */
153     PetscCall(VecAXPY(W, -alpha, ZETAcurr)); /* w <- w - alpha * zetai */
154 
155     /* Compute norm for convergence check */
156     switch (ksp->normtype) {
157     case KSP_NORM_PRECONDITIONED:
158       PetscCall(VecNorm(Z, NORM_2, &dp)); /* dp <- sqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
159       break;
160     case KSP_NORM_UNPRECONDITIONED:
161       PetscCall(VecNorm(R, NORM_2, &dp)); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
162       break;
163     case KSP_NORM_NATURAL:
164       dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
165       break;
166     case KSP_NORM_NONE:
167       dp = 0.0;
168       break;
169     default:
170       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
171     }
172 
173     /* Check for convergence */
174     ksp->rnorm = dp;
175     PetscCall(KSPLogResidualHistory(ksp, dp));
176     PetscCall(KSPMonitor(ksp, ksp->its, dp));
177     PetscCall((*ksp->converged)(ksp, ksp->its, dp, &ksp->reason, ksp->cnvP));
178     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
179 
180     /* Computations of current iteration done */
181     ++i;
182 
183     /* If needbe, allocate a new chunk of vectors in P and C */
184     PetscCall(KSPAllocateVectors_PIPEFCG(ksp, i + 1, pipefcg->vecb));
185 
186     /* Note that we wrap around and start clobbering old vectors */
187     idx      = i % (pipefcg->mmax + 1);
188     Pcurr    = pipefcg->Pvecs[idx];
189     Scurr    = pipefcg->Svecs[idx];
190     Qcurr    = pipefcg->Qvecs[idx];
191     ZETAcurr = pipefcg->ZETAvecs[idx];
192     eta      = pipefcg->etas + idx;
193 
194     /* number of old directions to orthogonalize against */
195     switch (pipefcg->truncstrat) {
196     case KSP_FCD_TRUNC_TYPE_STANDARD:
197       mi = pipefcg->mmax;
198       break;
199     case KSP_FCD_TRUNC_TYPE_NOTAY:
200       mi = ((i - 1) % pipefcg->mmax) + 1;
201       break;
202     default:
203       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
204     }
205 
206     /* Pick old p,s,q,zeta in a way suitable for VecMDot */
207     PetscCall(VecCopy(Z, Pcurr));
208     for (k = PetscMax(0, i - mi), j = 0; k < i; ++j, ++k) {
209       kdx                 = k % (pipefcg->mmax + 1);
210       pipefcg->Pold[j]    = pipefcg->Pvecs[kdx];
211       pipefcg->Sold[j]    = pipefcg->Svecs[kdx];
212       pipefcg->Qold[j]    = pipefcg->Qvecs[kdx];
213       pipefcg->ZETAold[j] = pipefcg->ZETAvecs[kdx];
214       redux[j]            = pipefcg->Svecs[kdx];
215     }
216     redux[j]     = R; /* If the above loop is not executed redux contains only R => all beta_k = 0, only gamma, delta != 0 */
217     redux[j + 1] = W;
218 
219     PetscCall(VecMXDotBegin(Z, j + 2, redux, betas));                         /* Start split reductions for beta_k = (z,s_k), gamma = (z,r), delta = (z,w) */
220     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z))); /* perform asynchronous reduction */
221     PetscCall(VecWAXPY(N, -1.0, R, W));                                       /* m = u + B(w-r): (a) ntmp = w-r              */
222     PetscCall(KSP_PCApply(ksp, N, M));                                        /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
223     PetscCall(VecAXPY(M, 1.0, Z));                                            /* m = u + B(w-r): (c) m = z + mtmp            */
224     PetscCall(KSP_MatMult(ksp, Amat, M, N));                                  /* n = Am                                      */
225     PetscCall(VecMXDotEnd(Z, j + 2, redux, betas));                           /* Finish split reductions */
226     gamma = betas[j];
227     delta = PetscRealPart(betas[j + 1]);
228 
229     *eta = 0.;
230     for (k = PetscMax(0, i - mi), j = 0; k < i; ++j, ++k) {
231       kdx = k % (pipefcg->mmax + 1);
232       betas[j] /= -etas[kdx]; /* betak  /= etak */
233       *eta -= PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]) * etas[kdx];
234       /* etaitmp = -betaik^2 * etak */
235     }
236     *eta += delta; /* etai    = delta -betaik^2 * etak */
237     if (*eta < 0.) {
238       pipefcg->norm_breakdown = PETSC_TRUE;
239       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
240       break;
241     } else {
242       alpha = gamma / (*eta); /* alpha = gamma/etai */
243     }
244 
245     /* project out stored search directions using classical G-S */
246     PetscCall(VecCopy(Z, Pcurr));
247     PetscCall(VecCopy(W, Scurr));
248     PetscCall(VecCopy(M, Qcurr));
249     PetscCall(VecCopy(N, ZETAcurr));
250     PetscCall(VecMAXPY(Pcurr, j, betas, pipefcg->Pold));       /* pi    <- ui - sum_k beta_k p_k    */
251     PetscCall(VecMAXPY(Scurr, j, betas, pipefcg->Sold));       /* si    <- wi - sum_k beta_k s_k    */
252     PetscCall(VecMAXPY(Qcurr, j, betas, pipefcg->Qold));       /* qi    <- m  - sum_k beta_k q_k    */
253     PetscCall(VecMAXPY(ZETAcurr, j, betas, pipefcg->ZETAold)); /* zetai <- n  - sum_k beta_k zeta_k */
254 
255   } while (ksp->its < ksp->max_it);
256   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
KSPSolve_PIPEFCG(KSP ksp)260 static PetscErrorCode KSPSolve_PIPEFCG(KSP ksp)
261 {
262   KSP_PIPEFCG *pipefcg;
263   PetscScalar  gamma;
264   PetscReal    dp = 0.0;
265   Vec          B, R, Z, X;
266   Mat          Amat, Pmat;
267 
268 #define VecXDot(x, y, a) (pipefcg->type == KSP_CG_HERMITIAN ? VecDot(x, y, a) : VecTDot(x, y, a))
269 
270   PetscFunctionBegin;
271   PetscCall(PetscCitationsRegister(citation, &cited));
272 
273   pipefcg = (KSP_PIPEFCG *)ksp->data;
274   X       = ksp->vec_sol;
275   B       = ksp->vec_rhs;
276   R       = ksp->work[0];
277   Z       = ksp->work[1];
278 
279   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
280 
281   /* Compute initial residual needed for convergence check*/
282   ksp->its = 0;
283   if (!ksp->guess_zero) {
284     PetscCall(KSP_MatMult(ksp, Amat, X, R));
285     PetscCall(VecAYPX(R, -1.0, B)); /* r <- b - Ax                             */
286   } else {
287     PetscCall(VecCopy(B, R)); /* r <- b (x is 0)                         */
288   }
289   switch (ksp->normtype) {
290   case KSP_NORM_PRECONDITIONED:
291     PetscCall(KSP_PCApply(ksp, R, Z));  /* z <- Br                                 */
292     PetscCall(VecNorm(Z, NORM_2, &dp)); /* dp <- dqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
293     break;
294   case KSP_NORM_UNPRECONDITIONED:
295     PetscCall(VecNorm(R, NORM_2, &dp)); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
296     break;
297   case KSP_NORM_NATURAL:
298     PetscCall(KSP_PCApply(ksp, R, Z)); /* z <- Br                                 */
299     PetscCall(VecXDot(Z, R, &gamma));
300     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
301     break;
302   case KSP_NORM_NONE:
303     dp = 0.0;
304     break;
305   default:
306     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
307   }
308 
309   /* Initial Convergence Check */
310   PetscCall(KSPLogResidualHistory(ksp, dp));
311   PetscCall(KSPMonitor(ksp, 0, dp));
312   ksp->rnorm = dp;
313   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP));
314   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
315 
316   do {
317     /* A cycle is broken only if a norm breakdown occurs. If not the entire solve happens in a single cycle.
318        This is coded this way to allow both truncation and truncation-restart strategy
319        (see KSPFCDGetNumOldDirections()) */
320     PetscCall(KSPSolve_PIPEFCG_cycle(ksp));
321     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
322     if (pipefcg->norm_breakdown) {
323       pipefcg->n_restarts++;
324       pipefcg->norm_breakdown = PETSC_FALSE;
325     }
326   } while (ksp->its < ksp->max_it);
327 
328   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
329   PetscFunctionReturn(PETSC_SUCCESS);
330 }
331 
KSPDestroy_PIPEFCG(KSP ksp)332 static PetscErrorCode KSPDestroy_PIPEFCG(KSP ksp)
333 {
334   PetscInt     i;
335   KSP_PIPEFCG *pipefcg;
336 
337   PetscFunctionBegin;
338   pipefcg = (KSP_PIPEFCG *)ksp->data;
339 
340   /* Destroy "standard" work vecs */
341   PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
342 
343   /* Destroy vectors of old directions and the arrays that manage pointers to them */
344   if (pipefcg->nvecs) {
345     for (i = 0; i < pipefcg->nchunks; ++i) {
346       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pPvecs[i]));
347       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pSvecs[i]));
348       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pQvecs[i]));
349       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pZETAvecs[i]));
350     }
351   }
352   PetscCall(PetscFree4(pipefcg->Pvecs, pipefcg->Svecs, pipefcg->pPvecs, pipefcg->pSvecs));
353   PetscCall(PetscFree4(pipefcg->Qvecs, pipefcg->ZETAvecs, pipefcg->pQvecs, pipefcg->pZETAvecs));
354   PetscCall(PetscFree4(pipefcg->Pold, pipefcg->Sold, pipefcg->Qold, pipefcg->ZETAold));
355   PetscCall(PetscFree(pipefcg->chunksizes));
356   PetscCall(PetscFree3(pipefcg->dots, pipefcg->etas, pipefcg->redux));
357   PetscCall(KSPDestroyDefault(ksp));
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
KSPView_PIPEFCG(KSP ksp,PetscViewer viewer)361 static PetscErrorCode KSPView_PIPEFCG(KSP ksp, PetscViewer viewer)
362 {
363   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
364   PetscBool    isascii, isstring;
365   const char  *truncstr;
366 
367   PetscFunctionBegin;
368   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
369   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
370 
371   if (pipefcg->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
372     truncstr = "Using standard truncation strategy";
373   } else if (pipefcg->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
374     truncstr = "Using Notay's truncation strategy";
375   } else {
376     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
377   }
378 
379   if (isascii) {
380     PetscCall(PetscViewerASCIIPrintf(viewer, "  max previous directions = %" PetscInt_FMT "\n", pipefcg->mmax));
381     PetscCall(PetscViewerASCIIPrintf(viewer, "  preallocated %" PetscInt_FMT " directions\n", PetscMin(pipefcg->nprealloc, pipefcg->mmax + 1)));
382     PetscCall(PetscViewerASCIIPrintf(viewer, "  %s\n", truncstr));
383     PetscCall(PetscViewerASCIIPrintf(viewer, "  restarts performed = %" PetscInt_FMT " \n", pipefcg->n_restarts));
384   } else if (isstring) {
385     PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipefcg->mmax, pipefcg->nprealloc, truncstr));
386   }
387   PetscFunctionReturn(PETSC_SUCCESS);
388 }
389 
390 /*@
391   KSPPIPEFCGSetMmax - set the maximum number of previous directions `KSPPIPEFCG` will store for orthogonalization
392 
393   Logically Collective
394 
395   Input Parameters:
396 + ksp  - the Krylov space context
397 - mmax - the maximum number of previous directions to orthogonalize against
398 
399   Options Database Key:
400 . -ksp_pipefcg_mmax <N> - maximum number of previous directions
401 
402   Level: intermediate
403 
404   Note:
405   `mmax` + 1 directions are stored (`mmax` previous ones along with the current one)
406   and whether all are used in each iteration also depends on the truncation strategy, see `KSPPIPEFCGSetTruncationType()`
407 
408 .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGSetNprealloc()`, `KSPFCGSetMmax()`, `KSPFCGGetMmax()`
409 @*/
KSPPIPEFCGSetMmax(KSP ksp,PetscInt mmax)410 PetscErrorCode KSPPIPEFCGSetMmax(KSP ksp, PetscInt mmax)
411 {
412   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
413 
414   PetscFunctionBegin;
415   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
416   PetscValidLogicalCollectiveInt(ksp, mmax, 2);
417   pipefcg->mmax = mmax;
418   PetscFunctionReturn(PETSC_SUCCESS);
419 }
420 
421 /*@
422   KSPPIPEFCGGetMmax - get the maximum number of previous directions `KSPPIPEFCG` will store
423 
424   Not Collective
425 
426   Input Parameter:
427 . ksp - the Krylov space context
428 
429   Output Parameter:
430 . mmax - the maximum number of previous directions allowed for orthogonalization
431 
432   Level: intermediate
433 
434 .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType()`, `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetMmax()`, `KSPFCGGetMmax()`, `KSPFCGSetMmax()`
435 @*/
KSPPIPEFCGGetMmax(KSP ksp,PetscInt * mmax)436 PetscErrorCode KSPPIPEFCGGetMmax(KSP ksp, PetscInt *mmax)
437 {
438   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
439 
440   PetscFunctionBegin;
441   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
442   *mmax = pipefcg->mmax;
443   PetscFunctionReturn(PETSC_SUCCESS);
444 }
445 
446 /*@
447   KSPPIPEFCGSetNprealloc - set the number of directions to preallocate with `KSPPIPEFCG`
448 
449   Logically Collective
450 
451   Input Parameters:
452 + ksp       - the Krylov space context
453 - nprealloc - the number of vectors to preallocate
454 
455   Options Database Key:
456 . -ksp_pipefcg_nprealloc <N> - the number of vectors to preallocate
457 
458   Level: advanced
459 
460 .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`
461 @*/
KSPPIPEFCGSetNprealloc(KSP ksp,PetscInt nprealloc)462 PetscErrorCode KSPPIPEFCGSetNprealloc(KSP ksp, PetscInt nprealloc)
463 {
464   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
465 
466   PetscFunctionBegin;
467   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
468   PetscValidLogicalCollectiveInt(ksp, nprealloc, 2);
469   pipefcg->nprealloc = nprealloc;
470   PetscFunctionReturn(PETSC_SUCCESS);
471 }
472 
473 /*@
474   KSPPIPEFCGGetNprealloc - get the number of directions to preallocate by `KSPPIPEFCG`
475 
476   Not Collective
477 
478   Input Parameter:
479 . ksp - the Krylov space context
480 
481   Output Parameter:
482 . nprealloc - the number of directions preallocated
483 
484   Level: advanced
485 
486 .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType()`, `KSPPIPEFCGSetNprealloc()`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`
487 @*/
KSPPIPEFCGGetNprealloc(KSP ksp,PetscInt * nprealloc)488 PetscErrorCode KSPPIPEFCGGetNprealloc(KSP ksp, PetscInt *nprealloc)
489 {
490   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
491 
492   PetscFunctionBegin;
493   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
494   *nprealloc = pipefcg->nprealloc;
495   PetscFunctionReturn(PETSC_SUCCESS);
496 }
497 
498 /*@
499   KSPPIPEFCGSetTruncationType - specify how many of its stored previous directions `KSPPIPEFCG` uses during orthogonalization
500 
501   Logically Collective
502 
503   Input Parameters:
504 + ksp        - the Krylov space context
505 - truncstrat - the choice of strategy
506 .vb
507   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
508   KSP_FCD_TRUNC_TYPE_NOTAY uses `max(1,mod(i,mmax))` stored directions at iteration i = 0, 1, ...
509 .ve
510 
511   Options Database Key:
512 . -ksp_pipefcg_truncation_type <standard,notay> - which stored search directions to orthogonalize against
513 
514   Level: intermediate
515 
516 .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
517 @*/
KSPPIPEFCGSetTruncationType(KSP ksp,KSPFCDTruncationType truncstrat)518 PetscErrorCode KSPPIPEFCGSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
519 {
520   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
521 
522   PetscFunctionBegin;
523   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
524   PetscValidLogicalCollectiveEnum(ksp, truncstrat, 2);
525   pipefcg->truncstrat = truncstrat;
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 /*@
530   KSPPIPEFCGGetTruncationType - get the truncation strategy employed by `KSPPIPEFCG`
531 
532   Not Collective
533 
534   Input Parameter:
535 . ksp - the Krylov space context
536 
537   Output Parameter:
538 . truncstrat - the strategy type
539 
540   Level: intermediate
541 
542 .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType()`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
543 @*/
KSPPIPEFCGGetTruncationType(KSP ksp,KSPFCDTruncationType * truncstrat)544 PetscErrorCode KSPPIPEFCGGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
545 {
546   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
547 
548   PetscFunctionBegin;
549   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
550   *truncstrat = pipefcg->truncstrat;
551   PetscFunctionReturn(PETSC_SUCCESS);
552 }
553 
KSPSetFromOptions_PIPEFCG(KSP ksp,PetscOptionItems PetscOptionsObject)554 static PetscErrorCode KSPSetFromOptions_PIPEFCG(KSP ksp, PetscOptionItems PetscOptionsObject)
555 {
556   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
557   PetscInt     mmax, nprealloc;
558   PetscBool    flg;
559 
560   PetscFunctionBegin;
561   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEFCG options");
562   PetscCall(PetscOptionsInt("-ksp_pipefcg_mmax", "Number of search directions to storue", "KSPPIPEFCGSetMmax", pipefcg->mmax, &mmax, &flg));
563   if (flg) PetscCall(KSPPIPEFCGSetMmax(ksp, mmax));
564   PetscCall(PetscOptionsInt("-ksp_pipefcg_nprealloc", "Number of directions to preallocate", "KSPPIPEFCGSetNprealloc", pipefcg->nprealloc, &nprealloc, &flg));
565   if (flg) PetscCall(KSPPIPEFCGSetNprealloc(ksp, nprealloc));
566   PetscCall(PetscOptionsEnum("-ksp_pipefcg_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipefcg->truncstrat, (PetscEnum *)&pipefcg->truncstrat, NULL));
567   PetscOptionsHeadEnd();
568   PetscFunctionReturn(PETSC_SUCCESS);
569 }
570 
571 /*MC
572   KSPPIPEFCG - Implements a Pipelined, Flexible Conjugate Gradient method {cite}`sananschneppmay2016`. [](sec_pipelineksp). [](sec_flexibleksp)
573 
574   Options Database Keys:
575 +   -ksp_pipefcg_mmax <N>                         - The number of previous search directions to store
576 .   -ksp_pipefcg_nprealloc <N>                    - The number of previous search directions to preallocate
577 -   -ksp_pipefcg_truncation_type <standard,notay> - which stored search directions to orthogonalize against
578 
579   Level: intermediate
580 
581   Notes:
582   Pipelined version of `KSPFCG` that overlaps communication (global reductions) with computation (preconditioner application and matrix-vector products) to reduce the impact of communication latency on parallel performance.
583 
584   Supports left preconditioning only.
585 
586   The natural "norm" for this method is $(u,Au)$, where $u$ is the preconditioned residual. As with standard `KSPCG`, this norm is available at no additional computational cost.
587   Choosing preconditioned or unpreconditioned norms involve an extra blocking global reduction, thus removing any benefit from pipelining.
588 
589   MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
590   See [](doc_faq_pipelined)
591 
592   Contributed by:
593   Patrick Sanan and Sascha M. Schnepp
594 
595 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPFCG`, `KSPPIPECG`, `KSPPIPECR`, `KSPGCR`, `KSPPIPEGCR`, `KSPFGMRES`,
596           `KSPCG`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`, `KSPPIPEFCGSetNprealloc()`,
597           `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGGetTruncationType()`
598 M*/
KSPCreate_PIPEFCG(KSP ksp)599 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFCG(KSP ksp)
600 {
601   KSP_PIPEFCG *pipefcg;
602 
603   PetscFunctionBegin;
604   PetscCall(PetscNew(&pipefcg));
605   pipefcg->type       = !PetscDefined(USE_COMPLEX) ? KSP_CG_SYMMETRIC : KSP_CG_HERMITIAN;
606   pipefcg->mmax       = KSPPIPEFCG_DEFAULT_MMAX;
607   pipefcg->nprealloc  = KSPPIPEFCG_DEFAULT_NPREALLOC;
608   pipefcg->nvecs      = 0;
609   pipefcg->vecb       = KSPPIPEFCG_DEFAULT_VECB;
610   pipefcg->nchunks    = 0;
611   pipefcg->truncstrat = KSPPIPEFCG_DEFAULT_TRUNCSTRAT;
612   pipefcg->n_restarts = 0;
613 
614   ksp->data = (void *)pipefcg;
615 
616   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
617   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 1));
618   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
619   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
620 
621   ksp->ops->setup          = KSPSetUp_PIPEFCG;
622   ksp->ops->solve          = KSPSolve_PIPEFCG;
623   ksp->ops->destroy        = KSPDestroy_PIPEFCG;
624   ksp->ops->view           = KSPView_PIPEFCG;
625   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFCG;
626   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
627   ksp->ops->buildresidual  = KSPBuildResidualDefault;
628   PetscFunctionReturn(PETSC_SUCCESS);
629 }
630