xref: /petsc/src/ksp/ksp/impls/bcgsl/bcgsl.c (revision 6d8694c4fbab79f9439f1ad13c0386ba7ee1ca4b) !
1 /*
2    Implementation of BiCGstab(L) the paper by D.R. Fokkema,
3    "Enhanced implementation of BiCGStab(L) for solving linear systems
4    of equations". This uses tricky delayed updating ideas to prevent
5    round-off buildup.
6 */
7 #include <petsc/private/kspimpl.h> /*I   "petscksp.h" I*/
8 #include <../src/ksp/ksp/impls/bcgsl/bcgslimpl.h>
9 #include <petscblaslapack.h>
10 
KSPSolve_BCGSL(KSP ksp)11 static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
12 {
13   KSP_BCGSL   *bcgsl = (KSP_BCGSL *)ksp->data;
14   PetscScalar  alpha, beta, omega, sigma;
15   PetscScalar  rho0, rho1;
16   PetscReal    kappa0, kappaA, kappa1;
17   PetscReal    ghat;
18   PetscReal    zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
19   PetscBool    bUpdateX;
20   PetscInt     maxit;
21   PetscInt     h, i, j, k, vi, ell;
22   PetscBLASInt ldMZ, bierr;
23   PetscScalar  utb;
24   PetscReal    max_s, pinv_tol;
25 
26   PetscFunctionBegin;
27   /* set up temporary vectors */
28   vi        = 0;
29   ell       = bcgsl->ell;
30   bcgsl->vB = ksp->work[vi];
31   vi++;
32   bcgsl->vRt = ksp->work[vi];
33   vi++;
34   bcgsl->vTm = ksp->work[vi];
35   vi++;
36   bcgsl->vvR = ksp->work + vi;
37   vi += ell + 1;
38   bcgsl->vvU = ksp->work + vi;
39   vi += ell + 1;
40   bcgsl->vXr = ksp->work[vi];
41   vi++;
42   PetscCall(PetscBLASIntCast(ell + 1, &ldMZ));
43 
44   /* Prime the iterative solver */
45   PetscCall(KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs));
46   PetscCall(VecNorm(VVR[0], NORM_2, &zeta0));
47   KSPCheckNorm(ksp, zeta0);
48   rnmax_computed = zeta0;
49   rnmax_true     = zeta0;
50 
51   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
52   ksp->its = 0;
53   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta0;
54   else ksp->rnorm = 0.0;
55   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
56   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
57   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
58   PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP));
59   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
60 
61   PetscCall(VecSet(VVU[0], 0.0));
62   alpha = 0.;
63   rho0 = omega = 1;
64 
65   if (bcgsl->delta > 0.0) {
66     PetscCall(VecCopy(VX, VXR));
67     PetscCall(VecSet(VX, 0.0));
68     PetscCall(VecCopy(VVR[0], VB));
69   } else {
70     PetscCall(VecCopy(ksp->vec_rhs, VB));
71   }
72 
73   /* Life goes on */
74   PetscCall(VecCopy(VVR[0], VRT));
75   zeta = zeta0;
76 
77   PetscCall(KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit));
78 
79   for (k = 0; k < maxit; k += bcgsl->ell) {
80     ksp->its = k;
81     if (k > 0) {
82       if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta;
83       else ksp->rnorm = 0.0;
84 
85       PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
86       PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
87 
88       PetscCall((*ksp->converged)(ksp, k, ksp->rnorm, &ksp->reason, ksp->cnvP));
89       if (ksp->reason < 0) PetscFunctionReturn(PETSC_SUCCESS);
90       if (ksp->reason) {
91         if (bcgsl->delta > 0.0) PetscCall(VecAXPY(VX, 1.0, VXR));
92         PetscFunctionReturn(PETSC_SUCCESS);
93       }
94     }
95 
96     /* BiCG part */
97     rho0 = -omega * rho0;
98     nrm0 = zeta;
99     for (j = 0; j < bcgsl->ell; j++) {
100       /* rho1 <- r_j' * r_tilde */
101       PetscCall(VecDot(VVR[j], VRT, &rho1));
102       KSPCheckDot(ksp, rho1);
103       if (rho1 == 0.0) {
104         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
105         PetscFunctionReturn(PETSC_SUCCESS);
106       }
107       beta = alpha * (rho1 / rho0);
108       rho0 = rho1;
109       for (i = 0; i <= j; i++) {
110         /* u_i <- r_i - beta*u_i */
111         PetscCall(VecAYPX(VVU[i], -beta, VVR[i]));
112       }
113       /* u_{j+1} <- inv(K)*A*u_j */
114       PetscCall(KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j + 1], VTM));
115 
116       PetscCall(VecDot(VVU[j + 1], VRT, &sigma));
117       KSPCheckDot(ksp, sigma);
118       if (sigma == 0.0) {
119         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
120         PetscFunctionReturn(PETSC_SUCCESS);
121       }
122       alpha = rho1 / sigma;
123 
124       /* x <- x + alpha*u_0 */
125       PetscCall(VecAXPY(VX, alpha, VVU[0]));
126 
127       for (i = 0; i <= j; i++) {
128         /* r_i <- r_i - alpha*u_{i+1} */
129         PetscCall(VecAXPY(VVR[i], -alpha, VVU[i + 1]));
130       }
131 
132       /* r_{j+1} <- inv(K)*A*r_j */
133       PetscCall(KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j + 1], VTM));
134 
135       PetscCall(VecNorm(VVR[0], NORM_2, &nrm0));
136       KSPCheckNorm(ksp, nrm0);
137       if (bcgsl->delta > 0.0) {
138         if (rnmax_computed < nrm0) rnmax_computed = nrm0;
139         if (rnmax_true < nrm0) rnmax_true = nrm0;
140       }
141 
142       /* NEW: check for early exit */
143       PetscCall((*ksp->converged)(ksp, k + j, nrm0, &ksp->reason, ksp->cnvP));
144       if (ksp->reason) {
145         PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
146         ksp->its   = k + j;
147         ksp->rnorm = nrm0;
148 
149         PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
150         if (ksp->reason < 0) PetscFunctionReturn(PETSC_SUCCESS);
151       }
152     }
153 
154     /* Polynomial part */
155     for (i = 0; i <= bcgsl->ell; ++i) PetscCall(VecMDot(VVR[i], i + 1, VVR, &MZa[i * ldMZ]));
156     /* Symmetrize MZa */
157     for (i = 0; i <= bcgsl->ell; ++i) {
158       for (j = i + 1; j <= bcgsl->ell; ++j) MZa[i * ldMZ + j] = MZa[j * ldMZ + i] = PetscConj(MZa[j * ldMZ + i]);
159     }
160     /* Copy MZa to MZb */
161     PetscCall(PetscArraycpy(MZb, MZa, ldMZ * ldMZ));
162 
163     if (!bcgsl->bConvex || bcgsl->ell == 1) {
164       PetscBLASInt ione = 1, bell;
165       PetscCall(PetscBLASIntCast(bcgsl->ell, &bell));
166 
167       AY0c[0] = -1;
168       if (bcgsl->pinv) {
169 #if defined(PETSC_USE_COMPLEX)
170         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "A", &bell, &bell, &MZa[1 + ldMZ], &ldMZ, bcgsl->s, bcgsl->u, &bell, bcgsl->v, &bell, bcgsl->work, &bcgsl->lwork, bcgsl->realwork, &bierr));
171 #else
172         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "A", &bell, &bell, &MZa[1 + ldMZ], &ldMZ, bcgsl->s, bcgsl->u, &bell, bcgsl->v, &bell, bcgsl->work, &bcgsl->lwork, &bierr));
173 #endif
174         if (bierr != 0) {
175           ksp->reason = KSP_DIVERGED_BREAKDOWN;
176           PetscFunctionReturn(PETSC_SUCCESS);
177         }
178         /* Apply pseudo-inverse */
179         max_s = bcgsl->s[0];
180         for (i = 1; i < bell; i++) {
181           if (bcgsl->s[i] > max_s) max_s = bcgsl->s[i];
182         }
183         /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
184         pinv_tol = bell * max_s * PETSC_MACHINE_EPSILON;
185         PetscCall(PetscArrayzero(&AY0c[1], bell));
186         for (i = 0; i < bell; i++) {
187           if (bcgsl->s[i] >= pinv_tol) {
188             utb = 0.;
189             for (j = 0; j < bell; j++) utb += MZb[1 + j] * bcgsl->u[i * bell + j];
190 
191             for (j = 0; j < bell; j++) AY0c[1 + j] += utb / bcgsl->s[i] * bcgsl->v[j * bell + i];
192           }
193         }
194       } else {
195         PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("Lower", &bell, &MZa[1 + ldMZ], &ldMZ, &bierr));
196         if (bierr != 0) {
197           ksp->reason = KSP_DIVERGED_BREAKDOWN;
198           PetscFunctionReturn(PETSC_SUCCESS);
199         }
200         PetscCall(PetscArraycpy(&AY0c[1], &MZb[1], bcgsl->ell));
201         PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("Lower", &bell, &ione, &MZa[1 + ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
202       }
203     } else {
204       PetscBLASInt ione = 1;
205       PetscScalar  aone = 1.0, azero = 0.0;
206       PetscBLASInt neqs;
207       PetscCall(PetscBLASIntCast(bcgsl->ell - 1, &neqs));
208 
209       PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("Lower", &neqs, &MZa[1 + ldMZ], &ldMZ, &bierr));
210       if (bierr != 0) {
211         ksp->reason = KSP_DIVERGED_BREAKDOWN;
212         PetscFunctionReturn(PETSC_SUCCESS);
213       }
214       PetscCall(PetscArraycpy(&AY0c[1], &MZb[1], bcgsl->ell - 1));
215       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1 + ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
216       AY0c[0]          = -1;
217       AY0c[bcgsl->ell] = 0.;
218 
219       PetscCall(PetscArraycpy(&AYlc[1], &MZb[1 + ldMZ * (bcgsl->ell)], bcgsl->ell - 1));
220       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1 + ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));
221 
222       AYlc[0]          = 0.;
223       AYlc[bcgsl->ell] = -1;
224 
225       PetscCallBLAS("BLASgemv", BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));
226 
227       kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));
228 
229       /* round-off can cause negative kappa's */
230       if (kappa0 < 0) kappa0 = -kappa0;
231       kappa0 = PetscSqrtReal(kappa0);
232 
233       kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));
234 
235       PetscCallBLAS("BLASgemv", BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));
236 
237       kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));
238 
239       if (kappa1 < 0) kappa1 = -kappa1;
240       kappa1 = PetscSqrtReal(kappa1);
241 
242       if (kappa0 != 0.0 && kappa1 != 0.0) {
243         if (kappaA < 0.7 * kappa0 * kappa1) {
244           ghat = (kappaA < 0.0) ? -0.7 * kappa0 / kappa1 : 0.7 * kappa0 / kappa1;
245         } else {
246           ghat = kappaA / (kappa1 * kappa1);
247         }
248         for (i = 0; i <= bcgsl->ell; i++) AY0c[i] = AY0c[i] - ghat * AYlc[i];
249       }
250     }
251 
252     omega = AY0c[bcgsl->ell];
253     for (h = bcgsl->ell; h > 0 && omega == 0.0; h--) omega = AY0c[h];
254     if (omega == 0.0) {
255       ksp->reason = KSP_DIVERGED_BREAKDOWN;
256       PetscFunctionReturn(PETSC_SUCCESS);
257     }
258 
259     PetscCall(VecMAXPY(VX, bcgsl->ell, AY0c + 1, VVR));
260     for (i = 1; i <= bcgsl->ell; i++) AY0c[i] *= -1.0;
261     PetscCall(VecMAXPY(VVU[0], bcgsl->ell, AY0c + 1, VVU + 1));
262     PetscCall(VecMAXPY(VVR[0], bcgsl->ell, AY0c + 1, VVR + 1));
263     for (i = 1; i <= bcgsl->ell; i++) AY0c[i] *= -1.0;
264     PetscCall(VecNorm(VVR[0], NORM_2, &zeta));
265     KSPCheckNorm(ksp, zeta);
266 
267     /* Accurate Update */
268     if (bcgsl->delta > 0.0) {
269       if (rnmax_computed < zeta) rnmax_computed = zeta;
270       if (rnmax_true < zeta) rnmax_true = zeta;
271 
272       bUpdateX = (PetscBool)(zeta < bcgsl->delta * zeta0 && zeta0 <= rnmax_computed);
273       if ((zeta < bcgsl->delta * rnmax_true && zeta0 <= rnmax_true) || bUpdateX) {
274         /* r0 <- b-inv(K)*A*X */
275         PetscCall(KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM));
276         PetscCall(VecAYPX(VVR[0], -1.0, VB));
277         rnmax_true = zeta;
278 
279         if (bUpdateX) {
280           PetscCall(VecAXPY(VXR, 1.0, VX));
281           PetscCall(VecSet(VX, 0.0));
282           PetscCall(VecCopy(VVR[0], VB));
283           rnmax_computed = zeta;
284         }
285       }
286     }
287   }
288   if (bcgsl->delta > 0.0) PetscCall(VecAXPY(VX, 1.0, VXR));
289 
290   ksp->its = k;
291   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta;
292   else ksp->rnorm = 0.0;
293   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
294   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
295   PetscCall((*ksp->converged)(ksp, k, ksp->rnorm, &ksp->reason, ksp->cnvP));
296   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 
300 /*@
301   KSPBCGSLSetXRes - Sets the parameter governing when
302   exact residuals will be used instead of computed residuals for `KSPCBGSL`.
303 
304   Logically Collective
305 
306   Input Parameters:
307 + ksp   - iterative context of type `KSPBCGSL`
308 - delta - computed residuals are used alone when delta is not positive
309 
310   Options Database Key:
311 . -ksp_bcgsl_xres delta - Threshold used to decide when to refresh computed residuals
312 
313   Level: intermediate
314 
315 .seealso: [](ch_ksp), `KSPBCGSLSetEll()`, `KSPBCGSLSetPol()`, `KSP`, `KSPCBGSL`, `KSPBCGSLSetUsePseudoinverse()`
316 @*/
KSPBCGSLSetXRes(KSP ksp,PetscReal delta)317 PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
318 {
319   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
320 
321   PetscFunctionBegin;
322   PetscValidLogicalCollectiveReal(ksp, delta, 2);
323   if (ksp->setupstage) {
324     if ((delta <= 0 && bcgsl->delta > 0) || (delta > 0 && bcgsl->delta <= 0)) {
325       PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
326       PetscCall(PetscFree5(AY0c, AYlc, AYtc, MZa, MZb));
327       PetscCall(PetscFree4(bcgsl->work, bcgsl->s, bcgsl->u, bcgsl->v));
328       ksp->setupstage = KSP_SETUP_NEW;
329     }
330   }
331   bcgsl->delta = delta;
332   PetscFunctionReturn(PETSC_SUCCESS);
333 }
334 
335 /*@
336   KSPBCGSLSetUsePseudoinverse - Use pseudoinverse (via SVD) to solve polynomial part of the update in `KSPCBGSL` solver
337 
338   Logically Collective
339 
340   Input Parameters:
341 + ksp      - iterative context of type `KSPCBGSL`
342 - use_pinv - set to `PETSC_TRUE` when using pseudoinverse
343 
344   Options Database Key:
345 . -ksp_bcgsl_pinv <true,false> - use pseudoinverse
346 
347   Level: intermediate
348 
349 .seealso: [](ch_ksp), `KSPBCGSLSetEll()`, `KSP`, `KSPCBGSL`, `KSPBCGSLSetPol()`, `KSPBCGSLSetXRes()`
350 @*/
KSPBCGSLSetUsePseudoinverse(KSP ksp,PetscBool use_pinv)351 PetscErrorCode KSPBCGSLSetUsePseudoinverse(KSP ksp, PetscBool use_pinv)
352 {
353   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
354 
355   PetscFunctionBegin;
356   bcgsl->pinv = use_pinv;
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 /*@
361   KSPBCGSLSetPol - Sets the type of polynomial part that will
362   be used in the `KSPCBGSL` `KSPSolve()`
363 
364   Logically Collective
365 
366   Input Parameters:
367 + ksp   - iterative context of type `KSPCBGSL`
368 - uMROR - set to `PETSC_TRUE` when the polynomial is a convex combination of an MR and an OR step.
369 
370   Options Database Keys:
371 + -ksp_bcgsl_cxpoly - use enhanced polynomial
372 - -ksp_bcgsl_mrpoly - use standard polynomial
373 
374   Level: intermediate
375 
376 .seealso: [](ch_ksp), `KSP`, `KSPBCGSL`, `KSPCreate()`, `KSPSetType()`, `KSPCBGSL`, `KSPBCGSLSetUsePseudoinverse()`, `KSPBCGSLSetEll()`, `KSPBCGSLSetXRes()`
377 @*/
KSPBCGSLSetPol(KSP ksp,PetscBool uMROR)378 PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscBool uMROR)
379 {
380   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
381 
382   PetscFunctionBegin;
383   PetscValidLogicalCollectiveBool(ksp, uMROR, 2);
384 
385   if (!ksp->setupstage) bcgsl->bConvex = uMROR;
386   else if (bcgsl->bConvex != uMROR) {
387     /* free the data structures,
388        then create them again
389      */
390     PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
391     PetscCall(PetscFree5(AY0c, AYlc, AYtc, MZa, MZb));
392     PetscCall(PetscFree4(bcgsl->work, bcgsl->s, bcgsl->u, bcgsl->v));
393 
394     bcgsl->bConvex  = uMROR;
395     ksp->setupstage = KSP_SETUP_NEW;
396   }
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 /*@
401   KSPBCGSLSetEll - Sets the number of search directions to use in the `KSPBCGSL` Krylov solver
402 
403   Logically Collective
404 
405   Input Parameters:
406 + ksp - iterative context, `KSP`, of type `KSPBCGSL`
407 - ell - number of search directions to use
408 
409   Options Database Key:
410 . -ksp_bcgsl_ell ell - Number of Krylov search directions
411 
412   Level: intermediate
413 
414   Notes:
415   For large `ell` it is common for the polynomial update problem to become singular (due to happy breakdown for smallish
416   test problems, but also for larger problems). Consequently, by default, the system is solved by using the pseudoinverse, which
417   allows the iteration to complete successfully. See `KSPBCGSLSetUsePseudoinverse()` to switch to a conventional solve.
418 
419 .seealso: [](ch_ksp), `KSPBCGSLSetUsePseudoinverse()`, `KSP`, `KSPBCGSL`, `KSPBCGSLSetPol()`, `KSPBCGSLSetXRes()`
420 @*/
KSPBCGSLSetEll(KSP ksp,PetscInt ell)421 PetscErrorCode KSPBCGSLSetEll(KSP ksp, PetscInt ell)
422 {
423   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
424 
425   PetscFunctionBegin;
426   PetscCheck(ell >= 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
427   PetscValidLogicalCollectiveInt(ksp, ell, 2);
428 
429   if (!ksp->setupstage) bcgsl->ell = ell;
430   else if (bcgsl->ell != ell) {
431     /* free the data structures, then create them again */
432     PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
433     PetscCall(PetscFree5(AY0c, AYlc, AYtc, MZa, MZb));
434     PetscCall(PetscFree4(bcgsl->work, bcgsl->s, bcgsl->u, bcgsl->v));
435 
436     bcgsl->ell      = ell;
437     ksp->setupstage = KSP_SETUP_NEW;
438   }
439   PetscFunctionReturn(PETSC_SUCCESS);
440 }
441 
KSPView_BCGSL(KSP ksp,PetscViewer viewer)442 static PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
443 {
444   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
445   PetscBool  isascii;
446 
447   PetscFunctionBegin;
448   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
449 
450   if (isascii) {
451     PetscCall(PetscViewerASCIIPrintf(viewer, "  Ell = %" PetscInt_FMT "\n", bcgsl->ell));
452     PetscCall(PetscViewerASCIIPrintf(viewer, "  Delta = %g\n", (double)bcgsl->delta));
453   }
454   PetscFunctionReturn(PETSC_SUCCESS);
455 }
456 
KSPSetFromOptions_BCGSL(KSP ksp,PetscOptionItems PetscOptionsObject)457 static PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp, PetscOptionItems PetscOptionsObject)
458 {
459   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
460   PetscInt   this_ell;
461   PetscReal  delta;
462   PetscBool  flga = PETSC_FALSE, flg;
463 
464   PetscFunctionBegin;
465   PetscOptionsHeadBegin(PetscOptionsObject, "KSPBCGSL Options");
466 
467   /* Set number of search directions */
468   PetscCall(PetscOptionsInt("-ksp_bcgsl_ell", "Number of Krylov search directions", "KSPBCGSLSetEll", bcgsl->ell, &this_ell, &flg));
469   if (flg) PetscCall(KSPBCGSLSetEll(ksp, this_ell));
470 
471   /* Set polynomial type */
472   PetscCall(PetscOptionsBool("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga, &flga, NULL));
473   if (flga) {
474     PetscCall(KSPBCGSLSetPol(ksp, PETSC_TRUE));
475   } else {
476     flg = PETSC_FALSE;
477     PetscCall(PetscOptionsBool("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg, &flg, NULL));
478     PetscCall(KSPBCGSLSetPol(ksp, PETSC_FALSE));
479   }
480 
481   /* Will computed residual be refreshed? */
482   PetscCall(PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg));
483   if (flg) PetscCall(KSPBCGSLSetXRes(ksp, delta));
484 
485   /* Use pseudoinverse? */
486   flg = bcgsl->pinv;
487   PetscCall(PetscOptionsBool("-ksp_bcgsl_pinv", "Polynomial correction via pseudoinverse", "KSPBCGSLSetUsePseudoinverse", flg, &flg, NULL));
488   PetscCall(KSPBCGSLSetUsePseudoinverse(ksp, flg));
489   PetscOptionsHeadEnd();
490   PetscFunctionReturn(PETSC_SUCCESS);
491 }
492 
KSPSetUp_BCGSL(KSP ksp)493 static PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
494 {
495   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
496   PetscInt   ell = bcgsl->ell, ldMZ = ell + 1;
497 
498   PetscFunctionBegin;
499   PetscCall(KSPSetWorkVecs(ksp, 6 + 2 * ell));
500   PetscCall(PetscMalloc5(ldMZ, &AY0c, ldMZ, &AYlc, ldMZ, &AYtc, ldMZ * ldMZ, &MZa, ldMZ * ldMZ, &MZb));
501   PetscCall(PetscBLASIntCast(5 * ell, &bcgsl->lwork));
502   PetscCall(PetscMalloc5(bcgsl->lwork, &bcgsl->work, ell, &bcgsl->s, ell * ell, &bcgsl->u, ell * ell, &bcgsl->v, 5 * ell, &bcgsl->realwork));
503   PetscFunctionReturn(PETSC_SUCCESS);
504 }
505 
KSPReset_BCGSL(KSP ksp)506 static PetscErrorCode KSPReset_BCGSL(KSP ksp)
507 {
508   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
509 
510   PetscFunctionBegin;
511   PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
512   PetscCall(PetscFree5(AY0c, AYlc, AYtc, MZa, MZb));
513   PetscCall(PetscFree5(bcgsl->work, bcgsl->s, bcgsl->u, bcgsl->v, bcgsl->realwork));
514   PetscFunctionReturn(PETSC_SUCCESS);
515 }
516 
KSPDestroy_BCGSL(KSP ksp)517 static PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
518 {
519   PetscFunctionBegin;
520   PetscCall(KSPReset_BCGSL(ksp));
521   PetscCall(KSPDestroyDefault(ksp));
522   PetscFunctionReturn(PETSC_SUCCESS);
523 }
524 
525 /*MC
526     KSPBCGSL - Implements a slight variant of the Enhanced BiCGStab(L) algorithm in {cite}`fokkema1996enhanced`
527                and {cite}`sleijpen1994bicgstab`, see also {cite}`sleijpen1995overview`. The variation
528                concerns cases when either kappa0**2 or kappa1**2 is
529                negative due to round-off. Kappa0 has also been pulled
530                out of the denominator in the formula for ghat.
531 
532    Options Database Keys:
533 +  -ksp_bcgsl_ell <ell>         - Number of Krylov search directions to use, defaults to 2, cf. `KSPBCGSLSetEll()`
534 .  -ksp_bcgsl_cxpol             - Use a convex function of the MinRes and OR polynomials after the BiCG step instead of default MinRes, cf. `KSPBCGSLSetPol()`
535 .  -ksp_bcgsl_mrpoly            - Use the default MinRes polynomial after the BiCG step, cf. `KSPBCGSLSetPol()`
536 .  -ksp_bcgsl_xres <res>        - Threshold used to decide when to refresh computed residuals, cf. `KSPBCGSLSetXRes()`
537 -  -ksp_bcgsl_pinv <true/false> - (de)activate use of pseudoinverse, cf. `KSPBCGSLSetUsePseudoinverse()`
538 
539    Level: intermediate
540 
541    Note:
542    The "sub-iterations" of this solver are not reported by `-ksp_monitor` or recorded in `KSPSetResidualHistory()` since the solution is not directly computed for
543    these sub-iterations.
544 
545    Contributed by:
546    Joel M. Malard, email jm.malard@pnl.gov
547 
548    Developer Notes:
549    This has not been completely cleaned up into PETSc style.
550 
551    All the BLAS and LAPACK calls in the source should be removed and replaced with loops and the macros for block solvers converted from LINPACK.
552 
553 .seealso: [](ch_ksp), `KSPFBCGS`, `KSPFBCGSR`, `KSPBCGS`, `KSPPIPEBCGS`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPFGMRES`, `KSPBCGS`, `KSPSetPCSide()`,
554           `KSPBCGSLSetEll()`, `KSPBCGSLSetXRes()`, `KSPBCGSLSetUsePseudoinverse()`, `KSPBCGSLSetPol()`
555 M*/
KSPCreate_BCGSL(KSP ksp)556 PETSC_EXTERN PetscErrorCode KSPCreate_BCGSL(KSP ksp)
557 {
558   KSP_BCGSL *bcgsl;
559 
560   PetscFunctionBegin;
561   /* allocate BiCGStab(L) context */
562   PetscCall(PetscNew(&bcgsl));
563   ksp->data = (void *)bcgsl;
564 
565   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
566   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
567   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
568 
569   ksp->ops->setup          = KSPSetUp_BCGSL;
570   ksp->ops->solve          = KSPSolve_BCGSL;
571   ksp->ops->reset          = KSPReset_BCGSL;
572   ksp->ops->destroy        = KSPDestroy_BCGSL;
573   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
574   ksp->ops->buildresidual  = KSPBuildResidualDefault;
575   ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
576   ksp->ops->view           = KSPView_BCGSL;
577 
578   /* Let the user redefine the number of directions vectors */
579   bcgsl->ell = 2;
580 
581   /*Choose between a single MR step or an averaged MR/OR */
582   bcgsl->bConvex = PETSC_FALSE;
583 
584   bcgsl->pinv = PETSC_TRUE; /* Use the reliable method by default */
585 
586   /* Set the threshold for when exact residuals will be used */
587   bcgsl->delta = 0.0;
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590