xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision f6714f7e2f29a00d39a12212bd9dea2f7b8ed983)
1 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I  "petscksp.h"  I*/
2 
3 static PetscBool  cited      = PETSC_FALSE;
4 static const char citation[] = "@article{SSM2016,\n"
5                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
6                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
7                                "  journal = {SIAM Journal on Scientific Computing},\n"
8                                "  volume = {38},\n"
9                                "  number = {5},\n"
10                                "  pages = {C441-C470},\n"
11                                "  year = {2016},\n"
12                                "  doi = {10.1137/15M1049130},\n"
13                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
14                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
15                                "}\n";
16 
17 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt);
18 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
19 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
20 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
21 
KSPSetUp_PIPEFGMRES(KSP ksp)22 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
23 {
24   PetscInt        k;
25   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
26   const PetscInt  max_k      = pipefgmres->max_k;
27 
28   PetscFunctionBegin;
29   PetscCall(KSPSetUp_GMRES(ksp));
30 
31   PetscCall(PetscMalloc1(VEC_OFFSET + max_k, &pipefgmres->prevecs));
32   PetscCall(PetscMalloc1(VEC_OFFSET + max_k, &pipefgmres->prevecs_user_work));
33 
34   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL));
35   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
36 
37   PetscCall(PetscMalloc1(VEC_OFFSET + max_k, &pipefgmres->zvecs));
38   PetscCall(PetscMalloc1(VEC_OFFSET + max_k, &pipefgmres->zvecs_user_work));
39 
40   PetscCall(PetscMalloc1(VEC_OFFSET + max_k, &pipefgmres->redux));
41 
42   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL));
43   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
44   PetscFunctionReturn(PETSC_SUCCESS);
45 }
46 
KSPPIPEFGMRESCycle(PetscInt * itcount,KSP ksp)47 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp)
48 {
49   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
50   PetscReal       res_norm;
51   PetscReal       hapbnd, tt;
52   PetscScalar    *hh, *hes, *lhh, shift = pipefgmres->shift;
53   PetscBool       hapend = PETSC_FALSE;      /* indicates happy breakdown ending */
54   PetscInt        loc_it;                    /* local count of # of dir. in Krylov space */
55   PetscInt        max_k = pipefgmres->max_k; /* max # of directions Krylov space */
56   PetscInt        i, j, k;
57   Mat             Amat, Pmat;
58   Vec             Q, W;                      /* Pipelining vectors */
59   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
60 
61   PetscFunctionBegin;
62   if (itcount) *itcount = 0;
63 
64   /* Assign simpler names to these vectors, allocated as pipelining workspace */
65   Q = VEC_Q;
66   W = VEC_W;
67 
68   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
69   /* Note that we add an extra value here to allow for a single reduction */
70   if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork));
71   lhh = pipefgmres->orthogwork;
72 
73   /* Number of pseudo iterations since last restart is the number
74      of prestart directions */
75   loc_it = 0;
76 
77   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
78      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
79      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
80      (loc_it -1) is passed, so the two are equivalent */
81   pipefgmres->it = (loc_it - 1);
82 
83   /* initial residual is in VEC_VV(0)  - compute its norm*/
84   PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm));
85 
86   /* first entry in the right-hand side of the Hessenberg system is just
87      the initial residual norm */
88   *RS(0) = res_norm;
89 
90   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
91   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
92   else ksp->rnorm = 0;
93   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
94   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
95   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
96 
97   /* check for the convergence - maybe the current guess is good enough */
98   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
99   if (ksp->reason) {
100     if (itcount) *itcount = 0;
101     PetscFunctionReturn(PETSC_SUCCESS);
102   }
103 
104   /* scale VEC_VV (the initial residual) */
105   PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm));
106 
107   /* Fill the pipeline */
108   PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it)));
109   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
110   PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it)));
111   PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */
112 
113   /* MAIN ITERATION LOOP BEGINNING*/
114   /* keep iterating until we have converged OR generated the max number
115      of directions OR reached the max number of iterations for the method */
116   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
117     if (loc_it) {
118       PetscCall(KSPLogResidualHistory(ksp, res_norm));
119       PetscCall(KSPMonitor(ksp, ksp->its, res_norm));
120     }
121     pipefgmres->it = (loc_it - 1);
122 
123     /* see if more space is needed for work vectors */
124     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
125       PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1));
126       /* (loc_it+1) is passed in as number of the first vector that should be allocated */
127     }
128 
129     /* Note that these inner products are with "Z" now, so
130        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
131        not the value from the equivalent FGMRES run (even in exact arithmetic)
132        That is, the H we need for the Arnoldi relation is different from the
133        coefficients we use in the orthogonalization process,because of the shift */
134 
135     /* Do some local twiddling to allow for a single reduction */
136     for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i);
137     redux[loc_it + 1] = ZVEC(loc_it);
138 
139     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
140     PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh));
141 
142     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
143     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it))));
144 
145     /* The work to be overlapped with the inner products follows.
146        This is application of the preconditioner and the operator
147        to compute intermediate quantities which will be combined (locally)
148        with the results of the inner products.
149        */
150     PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q));
151     PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
152     PetscCall(KSP_MatMult(ksp, Amat, Q, W));
153 
154     /* Compute inner products of the new direction with previous directions,
155        and the norm of the to-be-orthogonalized direction "Z".
156        This information is enough to build the required entries
157        of H. The inner product with VEC_VV(it_loc) is
158        *different* than in the standard FGMRES and need to be dealt with specially.
159        That is, for standard FGMRES the orthogonalization coefficients are the same
160        as the coefficients used in the Arnoldi relation to reconstruct, but here this
161        is not true (albeit only for the one entry of H which we "unshift" below. */
162 
163     /* Finish the dot product, retrieving the extra entry */
164     PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh));
165     tt = PetscRealPart(lhh[loc_it + 1]);
166 
167     /* Hessenberg entries, and entries for (naive) classical Gram-Schmidt
168       Note that the Hessenberg entries require a shift, as these are for the
169       relation AU = VH, which is wrt unshifted basis vectors */
170     hh  = HH(0, loc_it);
171     hes = HES(0, loc_it);
172     for (j = 0; j < loc_it; j++) {
173       hh[j]  = lhh[j];
174       hes[j] = lhh[j];
175     }
176     hh[loc_it]  = lhh[loc_it] + shift;
177     hes[loc_it] = lhh[loc_it] + shift;
178 
179     /* we delay applying the shift here */
180     for (j = 0; j <= loc_it; j++) lhh[j] = -lhh[j]; /* flip sign */
181 
182     /* Compute the norm of the un-normalized new direction using the rearranged formula
183        Note that these are shifted ("barred") quantities */
184     for (k = 0; k <= loc_it; k++) tt -= PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]);
185     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
186     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0;
187     if (tt < 0.0) {
188       /* If we detect square root breakdown in the norm, we must restart the algorithm.
189          Here this means we simply break the current loop and reconstruct the solution
190          using the basis we have computed thus far. Note that by breaking immediately,
191          we do not update the iteration count, so computation done in this iteration
192          should be disregarded.
193          */
194       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt));
195       break;
196     } else {
197       tt = PetscSqrtReal(tt);
198     }
199 
200     /* new entry in hessenburg is the 2-norm of our new direction */
201     hh[loc_it + 1]  = tt;
202     hes[loc_it + 1] = tt;
203 
204     /* The recurred computation for the new direction
205        The division by tt is delayed to the happy breakdown check later
206        Note placement BEFORE the unshift
207        */
208     PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1)));
209     PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0)));
210     /* (VEC_VV(loc_it+1) is not normalized yet) */
211 
212     /* The recurred computation for the preconditioned vector (u) */
213     PetscCall(VecCopy(Q, PREVEC(loc_it + 1)));
214     PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0)));
215     if (tt) PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt));
216 
217     /* Unshift an entry in the GS coefficients ("removing the bar") */
218     lhh[loc_it] -= shift;
219 
220     /* The recurred computation for z (Au)
221        Note placement AFTER the "unshift" */
222     PetscCall(VecCopy(W, ZVEC(loc_it + 1)));
223     PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0)));
224     if (tt) PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt));
225 
226     /* Happy Breakdown Check */
227     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
228     /* RS(loc_it) contains the res_norm from the last iteration  */
229     hapbnd = PetscMin(pipefgmres->haptol, hapbnd);
230     if (tt > hapbnd) {
231       /* scale new direction by its norm  */
232       PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt));
233     } else {
234       /* This happens when the solution is exactly reached. */
235       /* So there is no new direction... */
236       PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */
237       hapend = PETSC_TRUE;
238     }
239     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
240        current solution would not be exact if HES was singular.  Note that
241        HH non-singular implies that HES is not singular, and HES is guaranteed
242        to be nonsingular when PREVECS are linearly independent and A is
243        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
244        of HES). So we should really add a check to verify that HES is nonsingular.*/
245 
246     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
247        here to check that the Hessenberg matrix is indeed non-singular (since
248        FGMRES does not guarantee this) */
249 
250     /* Now apply rotations to new col of Hessenberg (and right side of system),
251        calculate new rotation, and get new residual norm at the same time*/
252     PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm));
253     if (ksp->reason) break;
254 
255     loc_it++;
256     pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */
257 
258     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
259     ksp->its++;
260     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
261     else ksp->rnorm = 0;
262     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
263 
264     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
265 
266     /* Catch error in happy breakdown and signal convergence and break from loop */
267     if (hapend) {
268       if (!ksp->reason) {
269         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm);
270         ksp->reason = KSP_DIVERGED_BREAKDOWN;
271         break;
272       }
273     }
274   }
275   /* END OF ITERATION LOOP */
276 
277   if (itcount) *itcount = loc_it;
278 
279   /*
280     Solve for the "best" coefficients of the Krylov
281     columns, add the solution values together, and possibly unwind the
282     preconditioning from the solution
283    */
284 
285   /* Form the solution (or the solution so far) */
286   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln properly navigates */
287 
288   PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1));
289 
290   /*
291      Monitor if we know that we will not return for a restart
292   */
293   if (ksp->reason == KSP_CONVERGED_ITERATING && ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
294   if (loc_it && ksp->reason) {
295     PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
296     PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
297   }
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
KSPSolve_PIPEFGMRES(KSP ksp)301 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
302 {
303   PetscInt        its, itcount;
304   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
305   PetscBool       guess_zero = ksp->guess_zero;
306 
307   PetscFunctionBegin;
308   /* We have not checked these routines for use with complex numbers. The inner products are likely not defined correctly for that case */
309   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
310 
311   PetscCall(PetscCitationsRegister(citation, &cited));
312 
313   PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
314   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
315   ksp->its = 0;
316   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
317 
318   itcount     = 0;
319   ksp->reason = KSP_CONVERGED_ITERATING;
320   while (!ksp->reason) {
321     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
322     PetscCall(KSPPIPEFGMRESCycle(&its, ksp));
323     itcount += its;
324     if (itcount >= ksp->max_it) {
325       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
326       break;
327     }
328     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
329   }
330   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
KSPDestroy_PIPEFGMRES(KSP ksp)334 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
335 {
336   PetscFunctionBegin;
337   PetscCall(KSPReset_PIPEFGMRES(ksp));
338   PetscCall(KSPDestroy_GMRES(ksp));
339   PetscFunctionReturn(PETSC_SUCCESS);
340 }
341 
KSPPIPEFGMRESBuildSoln(PetscScalar * nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)342 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it)
343 {
344   PetscScalar     tt;
345   PetscInt        k, j;
346   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
347 
348   PetscFunctionBegin;
349   if (it < 0) {                        /* no pipefgmres steps have been performed */
350     PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
351     PetscFunctionReturn(PETSC_SUCCESS);
352   }
353 
354   /* Solve for solution vector that minimizes the residual */
355   /* solve the upper triangular system - RS is the right side and HH is
356      the upper triangular matrix  - put soln in nrs */
357   if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
358   else nrs[it] = 0.0;
359 
360   for (k = it - 1; k >= 0; k--) {
361     tt = *RS(k);
362     for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
363     nrs[k] = tt / *HH(k, k);
364   }
365 
366   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
367   PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &PREVEC(0)));
368 
369   /* add solution to previous solution */
370   if (vdest == vguess) {
371     PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
372   } else {
373     PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess));
374   }
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool * hapend,PetscReal * res)378 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res)
379 {
380   PetscScalar    *hh, *cc, *ss, *rs;
381   PetscInt        j;
382   PetscReal       hapbnd;
383   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
384 
385   PetscFunctionBegin;
386   hh = HH(0, it); /* pointer to beginning of column to update */
387   cc = CC(0);     /* beginning of cosine rotations */
388   ss = SS(0);     /* beginning of sine rotations */
389   rs = RS(0);     /* right-hand side of least squares system */
390 
391   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
392   for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
393 
394   /* check for the happy breakdown */
395   hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol);
396   if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
397     PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it))));
398     *hapend = PETSC_TRUE;
399   }
400 
401   /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */
402   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
403      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
404 
405   for (j = 0; j < it; j++) {
406     PetscScalar hhj = hh[j];
407     hh[j]           = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
408     hh[j + 1]       = -ss[j] * hhj + cc[j] * hh[j + 1];
409   }
410 
411   /*
412     compute the new plane rotation, and apply it to:
413      1) the right-hand side of the Hessenberg system (RS)
414         note: it affects RS(it) and RS(it+1)
415      2) the new column of the Hessenberg matrix
416         note: it affects HH(it,it) which is currently pointed to
417         by hh and HH(it+1, it) (*(hh+1))
418     thus obtaining the updated value of the residual...
419   */
420 
421   /* compute new plane rotation */
422 
423   if (!*hapend) {
424     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
425     if (delta == 0.0) {
426       ksp->reason = KSP_DIVERGED_NULL;
427       PetscFunctionReturn(PETSC_SUCCESS);
428     }
429 
430     cc[it] = hh[it] / delta;     /* new cosine value */
431     ss[it] = hh[it + 1] / delta; /* new sine value */
432 
433     hh[it]     = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
434     rs[it + 1] = -ss[it] * rs[it];
435     rs[it]     = PetscConj(cc[it]) * rs[it];
436     *res       = PetscAbsScalar(rs[it + 1]);
437   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
438             another rotation matrix (so RH doesn't change).  The new residual is
439             always the new sine term times the residual from last time (RS(it)),
440             but now the new sine rotation would be zero...so the residual should
441             be zero...so we will multiply "zero" by the last residual.  This might
442             not be exactly what we want to do here -could just return "zero". */
443     *res = 0.0;
444   }
445   PetscFunctionReturn(PETSC_SUCCESS);
446 }
447 
KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec * result)448 static PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result)
449 {
450   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
451 
452   PetscFunctionBegin;
453   if (!ptr) {
454     if (!pipefgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp));
455     ptr = pipefgmres->sol_temp;
456   }
457   if (!pipefgmres->nrs) {
458     /* allocate the work area */
459     PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs));
460   }
461 
462   PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it));
463   if (result) *result = ptr;
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
KSPSetFromOptions_PIPEFGMRES(KSP ksp,PetscOptionItems PetscOptionsObject)467 static PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems PetscOptionsObject)
468 {
469   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
470   PetscBool       flg;
471   PetscScalar     shift;
472 
473   PetscFunctionBegin;
474   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
475   PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options");
476   PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg));
477   if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift));
478   PetscOptionsHeadEnd();
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)482 static PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer)
483 {
484   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
485   PetscBool       isascii, isstring;
486 
487   PetscFunctionBegin;
488   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
489   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
490 
491   if (isascii) {
492     PetscCall(PetscViewerASCIIPrintf(viewer, "  restart=%" PetscInt_FMT "\n", pipefgmres->max_k));
493     PetscCall(PetscViewerASCIIPrintf(viewer, "  happy breakdown tolerance=%g\n", (double)pipefgmres->haptol));
494 #if defined(PETSC_USE_COMPLEX)
495     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
496 #else
497     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g\n", (double)pipefgmres->shift));
498 #endif
499   } else if (isstring) {
500     PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k));
501 #if defined(PETSC_USE_COMPLEX)
502     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
503 #else
504     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g\n", (double)pipefgmres->shift));
505 #endif
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
KSPReset_PIPEFGMRES(KSP ksp)510 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
511 {
512   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
513   PetscInt        i;
514 
515   PetscFunctionBegin;
516   PetscCall(PetscFree(pipefgmres->prevecs));
517   PetscCall(PetscFree(pipefgmres->zvecs));
518   for (i = 0; i < pipefgmres->nwork_alloc; i++) {
519     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i]));
520     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i]));
521   }
522   PetscCall(PetscFree(pipefgmres->prevecs_user_work));
523   PetscCall(PetscFree(pipefgmres->zvecs_user_work));
524   PetscCall(PetscFree(pipefgmres->redux));
525   PetscCall(KSPReset_GMRES(ksp));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 /*MC
530    KSPPIPEFGMRES - Implements the Pipelined (1-stage) Flexible Generalized Minimal Residual method {cite}`sananschneppmay2016`. [](sec_pipelineksp). [](sec_flexibleksp)
531 
532    Options Database Keys:
533 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
534 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
535 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of vectors are allocated as needed)
536 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See `KSPPIPEFGMRESSetShift()`
537 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
538 
539    Level: intermediate
540 
541    Notes:
542    Compare to `KSPPGMRES` and `KSPFGMRES`
543 
544    This variant is not "explicitly normalized" like `KSPPGMRES`, and requires a shift parameter.
545 
546    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
547 
548    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with `KSPFGMRES`).
549 
550    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
551    See [](doc_faq_pipelined)
552 
553    Developer Note:
554    This class is subclassed off of `KSPGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code
555 
556    Contributed by:
557    P. Sanan and S.M. Schnepp
558 
559 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES`
560           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()`
561 M*/
562 
KSPCreate_PIPEFGMRES(KSP ksp)563 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
564 {
565   KSP_PIPEFGMRES *pipefgmres;
566 
567   PetscFunctionBegin;
568   PetscCall(PetscNew(&pipefgmres));
569 
570   ksp->data                              = (void *)pipefgmres;
571   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
572   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
573   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
574   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
575   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
576   ksp->ops->view                         = KSPView_PIPEFGMRES;
577   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
578   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
579   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
580 
581   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3));
582   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
583 
584   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
585   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
586   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES));
587   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES));
588 
589   pipefgmres->nextra_vecs    = 1;
590   pipefgmres->haptol         = 1.0e-30;
591   pipefgmres->q_preallocate  = PETSC_FALSE;
592   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
593   pipefgmres->orthog         = NULL;
594   pipefgmres->nrs            = NULL;
595   pipefgmres->sol_temp       = NULL;
596   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
597   pipefgmres->Rsvd           = NULL;
598   pipefgmres->orthogwork     = NULL;
599   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
600   pipefgmres->shift          = 1.0;
601   PetscFunctionReturn(PETSC_SUCCESS);
602 }
603 
KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)604 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it)
605 {
606   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
607   PetscInt        nwork      = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
608   PetscInt        nalloc;                               /* number to allocate */
609   PetscInt        k;
610 
611   PetscFunctionBegin;
612   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
613                                       in a single chunk */
614 
615   /* Adjust the number to allocate to make sure that we don't exceed the
616      number of available slots (pipefgmres->vecs_allocated)*/
617   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
618   if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);
619 
620   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
621 
622   /* work vectors */
623   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL));
624   for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k];
625   /* specify size of chunk allocated */
626   pipefgmres->mwork_alloc[nwork] = nalloc;
627 
628   /* preconditioned vectors (note we don't use VEC_OFFSET) */
629   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL));
630   for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k];
631 
632   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL));
633   for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k];
634 
635   /* increment the number of work vector chunks */
636   pipefgmres->nwork_alloc++;
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 /*@
641   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined `KSPPIPEFGMRES` solver.
642 
643   Logically Collective
644 
645   Input Parameters:
646 + ksp   - the Krylov space context
647 - shift - the shift
648 
649   Options Database Key:
650 . -ksp_pipefgmres_shift <shift> - set the shift parameter
651 
652   Level: intermediate
653 
654   Note:
655   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator.
656   This can be achieved with PETSc itself by using a few iterations of a Krylov method.
657   See `KSPComputeEigenvalues()` (and note the caveats there).
658 
659 .seealso: [](ch_ksp), `KSPPIPEFGMRES`, `KSPComputeEigenvalues()`
660 @*/
KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)661 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift)
662 {
663   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
664 
665   PetscFunctionBegin;
666   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
667   PetscValidLogicalCollectiveScalar(ksp, shift, 2);
668   pipefgmres->shift = shift;
669   PetscFunctionReturn(PETSC_SUCCESS);
670 }
671