xref: /petsc/src/ksp/ksp/impls/ibcgs/ibcgs.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67)
1 #include <petsc/private/kspimpl.h>
2 #include <petsc/private/vecimpl.h>
3 
KSPSetUp_IBCGS(KSP ksp)4 static PetscErrorCode KSPSetUp_IBCGS(KSP ksp)
5 {
6   PetscBool diagonalscale;
7 
8   PetscFunctionBegin;
9   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
10   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
11   PetscCall(KSPSetWorkVecs(ksp, 9));
12   PetscFunctionReturn(PETSC_SUCCESS);
13 }
14 
15 /*
16     The code below "cheats" from PETSc style
17        1) VecRestoreArray() is called immediately after VecGetArray() and the array values are still accessed; the reason for the immediate
18           restore is that Vec operations are done on some of the vectors during the solve and if we did not restore immediately it would
19           generate two VecGetArray() (the second one inside the Vec operation) calls without a restore between them.
20        2) The vector operations on done directly on the arrays instead of with VecXXXX() calls
21 
22        For clarity in the code we name single VECTORS with two names, for example, Rn_1 and R, but they actually always
23      the exact same memory. We do this with macro defines so that compiler won't think they are
24      two different variables.
25 
26 */
27 #define Xn_1 Xn
28 #define xn_1 xn
29 #define Rn_1 Rn
30 #define rn_1 rn
31 #define Un_1 Un
32 #define un_1 un
33 #define Vn_1 Vn
34 #define vn_1 vn
35 #define Qn_1 Qn
36 #define qn_1 qn
37 #define Zn_1 Zn
38 #define zn_1 zn
KSPSolve_IBCGS(KSP ksp)39 static PetscErrorCode KSPSolve_IBCGS(KSP ksp)
40 {
41   PetscInt  i, N;
42   PetscReal rnorm = 0.0, rnormin = 0.0;
43 #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
44   /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem
45      on the same number of processes  with different runs) we support computing the inner products using Intel's 80 bit arithmetic
46      rather than just 64-bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits)
47      and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE.
48 
49      Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double
50      precision into a 16 byte space (the rest of the space is ignored)  */
51   long double insums[7], outsums[7];
52 #else
53   PetscScalar insums[7], outsums[7];
54 #endif
55   PetscScalar                       sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin, tmp1, tmp2;
56   PetscScalar                       taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan;
57   const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un;
58   PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn;
59   /* the rest do not have to keep n_1 values */
60   PetscScalar                       kappan, thetan, etan, gamman, betan, deltan;
61   const PetscScalar *PETSC_RESTRICT tn;
62   PetscScalar *PETSC_RESTRICT       sn;
63   Vec                               R0, Rn, Xn, F0, Vn, Zn, Qn, Tn, Sn, B, Un;
64   Mat                               A;
65 
66   PetscFunctionBegin;
67   PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors");
68 
69 #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
70   /* since 80 bit long doubls do not fill the upper bits, we fill them initially so that
71      valgrind won't detect MPI_Allreduce() with uninitialized data */
72   PetscCall(PetscMemzero(insums, sizeof(insums)));
73   PetscCall(PetscMemzero(insums, sizeof(insums)));
74 #endif
75 
76   PetscCall(PCGetOperators(ksp->pc, &A, NULL));
77   PetscCall(VecGetLocalSize(ksp->vec_sol, &N));
78   Xn = ksp->vec_sol;
79   PetscCall(VecGetArray(Xn_1, (PetscScalar **)&xn_1));
80   PetscCall(VecRestoreArray(Xn_1, NULL));
81   B = ksp->vec_rhs;
82   PetscCall(VecGetArrayRead(B, (const PetscScalar **)&b));
83   PetscCall(VecRestoreArrayRead(B, NULL));
84   R0 = ksp->work[0];
85   PetscCall(VecGetArrayRead(R0, (const PetscScalar **)&r0));
86   PetscCall(VecRestoreArrayRead(R0, NULL));
87   Rn = ksp->work[1];
88   PetscCall(VecGetArray(Rn_1, (PetscScalar **)&rn_1));
89   PetscCall(VecRestoreArray(Rn_1, NULL));
90   Un = ksp->work[2];
91   PetscCall(VecGetArrayRead(Un_1, (const PetscScalar **)&un_1));
92   PetscCall(VecRestoreArrayRead(Un_1, NULL));
93   F0 = ksp->work[3];
94   PetscCall(VecGetArrayRead(F0, (const PetscScalar **)&f0));
95   PetscCall(VecRestoreArrayRead(F0, NULL));
96   Vn = ksp->work[4];
97   PetscCall(VecGetArray(Vn_1, (PetscScalar **)&vn_1));
98   PetscCall(VecRestoreArray(Vn_1, NULL));
99   Zn = ksp->work[5];
100   PetscCall(VecGetArray(Zn_1, (PetscScalar **)&zn_1));
101   PetscCall(VecRestoreArray(Zn_1, NULL));
102   Qn = ksp->work[6];
103   PetscCall(VecGetArrayRead(Qn_1, (const PetscScalar **)&qn_1));
104   PetscCall(VecRestoreArrayRead(Qn_1, NULL));
105   Tn = ksp->work[7];
106   PetscCall(VecGetArrayRead(Tn, (const PetscScalar **)&tn));
107   PetscCall(VecRestoreArrayRead(Tn, NULL));
108   Sn = ksp->work[8];
109   PetscCall(VecGetArrayRead(Sn, (const PetscScalar **)&sn));
110   PetscCall(VecRestoreArrayRead(Sn, NULL));
111 
112   /* r0 = rn_1 = b - A*xn_1; */
113   /* PetscCall(KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn));
114      PetscCall(VecAYPX(Rn_1,-1.0,B)); */
115   PetscCall(KSPInitialResidual(ksp, Xn_1, Tn, Sn, Rn_1, B));
116   if (ksp->normtype != KSP_NORM_NONE) {
117     PetscCall(VecNorm(Rn_1, NORM_2, &rnorm));
118     KSPCheckNorm(ksp, rnorm);
119   }
120   PetscCall(KSPMonitor(ksp, 0, rnorm));
121   PetscCall((*ksp->converged)(ksp, 0, rnorm, &ksp->reason, ksp->cnvP));
122   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
123 
124   PetscCall(VecCopy(Rn_1, R0));
125 
126   /* un_1 = A*rn_1; */
127   PetscCall(KSP_PCApplyBAorAB(ksp, Rn_1, Un_1, Tn));
128 
129   /* f0   = A'*rn_1; */
130   if (ksp->pc_side == PC_RIGHT) { /* B' A' */
131     PetscCall(KSP_MatMultTranspose(ksp, A, R0, Tn));
132     PetscCall(KSP_PCApplyTranspose(ksp, Tn, F0));
133   } else if (ksp->pc_side == PC_LEFT) { /* A' B' */
134     PetscCall(KSP_PCApplyTranspose(ksp, R0, Tn));
135     PetscCall(KSP_MatMultTranspose(ksp, A, Tn, F0));
136   }
137 
138   /*qn_1 = vn_1 = zn_1 = 0.0; */
139   PetscCall(VecSet(Qn_1, 0.0));
140   PetscCall(VecSet(Vn_1, 0.0));
141   PetscCall(VecSet(Zn_1, 0.0));
142 
143   sigman_2 = pin_1 = taun_1 = 0.0;
144 
145   /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */
146   PetscCall(VecDot(R0, R0, &phin_1));
147   KSPCheckDot(ksp, phin_1);
148 
149   /* sigman_1 = rn_1'un_1  */
150   PetscCall(VecDot(R0, Un_1, &sigman_1));
151 
152   alphan_1 = omegan_1 = 1.0;
153 
154   for (ksp->its = 1; ksp->its < ksp->max_it + 1; ksp->its++) {
155     rhon = phin_1 - omegan_1 * sigman_2 + omegan_1 * alphan_1 * pin_1;
156     if (ksp->its == 1) deltan = rhon;
157     else deltan = rhon / taun_1;
158     betan = deltan / omegan_1;
159     taun  = sigman_1 + betan * taun_1 - deltan * pin_1;
160     if (taun == 0.0) {
161       PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to taun is zero, iteration %" PetscInt_FMT, ksp->its);
162       ksp->reason = KSP_DIVERGED_NANORINF;
163       PetscFunctionReturn(PETSC_SUCCESS);
164     }
165     alphan = rhon / taun;
166     PetscCall(PetscLogFlops(15.0));
167 
168     /*
169         zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1
170         vn = un_1 + betan*vn_1 - deltan*qn_1
171         sn = rn_1 - alphan*vn
172 
173        The algorithm in the paper is missing the alphan/alphan_1 term in the zn update
174     */
175     PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
176     tmp1 = (alphan / alphan_1) * betan;
177     tmp2 = alphan * deltan;
178     for (i = 0; i < N; i++) {
179       zn[i] = alphan * rn_1[i] + tmp1 * zn_1[i] - tmp2 * vn_1[i];
180       vn[i] = un_1[i] + betan * vn_1[i] - deltan * qn_1[i];
181       sn[i] = rn_1[i] - alphan * vn[i];
182     }
183     PetscCall(PetscLogFlops(3.0 + 11.0 * N));
184     PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
185 
186     /*
187         qn = A*vn
188     */
189     PetscCall(KSP_PCApplyBAorAB(ksp, Vn, Qn, Tn));
190 
191     /*
192         tn = un_1 - alphan*qn
193     */
194     PetscCall(VecWAXPY(Tn, -alphan, Qn, Un_1));
195 
196     /*
197         phin = r0'sn
198         pin  = r0'qn
199         gamman = f0'sn
200         etan   = f0'tn
201         thetan = sn'tn
202         kappan = tn'tn
203     */
204     PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
205     phin = pin = gamman = etan = thetan = kappan = 0.0;
206     for (i = 0; i < N; i++) {
207       phin += r0[i] * sn[i];
208       pin += r0[i] * qn[i];
209       gamman += f0[i] * sn[i];
210       etan += f0[i] * tn[i];
211       thetan += sn[i] * tn[i];
212       kappan += tn[i] * tn[i];
213     }
214     PetscCall(PetscLogFlops(12.0 * N));
215     PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
216 
217     insums[0] = phin;
218     insums[1] = pin;
219     insums[2] = gamman;
220     insums[3] = etan;
221     insums[4] = thetan;
222     insums[5] = kappan;
223     insums[6] = rnormin;
224 
225     PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
226 #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
227     if (ksp->lagnorm && ksp->its > 1) {
228       PetscCallMPI(MPIU_Allreduce(insums, outsums, 7, MPI_LONG_DOUBLE, MPI_SUM, PetscObjectComm((PetscObject)ksp)));
229     } else {
230       PetscCallMPI(MPIU_Allreduce(insums, outsums, 6, MPI_LONG_DOUBLE, MPI_SUM, PetscObjectComm((PetscObject)ksp)));
231     }
232 #else
233     if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) {
234       PetscCallMPI(MPIU_Allreduce(insums, outsums, 7, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
235     } else {
236       PetscCallMPI(MPIU_Allreduce(insums, outsums, 6, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
237     }
238 #endif
239     PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
240     phin   = outsums[0];
241     pin    = outsums[1];
242     gamman = outsums[2];
243     etan   = outsums[3];
244     thetan = outsums[4];
245     kappan = outsums[5];
246     if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) rnorm = PetscSqrtReal(PetscRealPart(outsums[6]));
247 
248     if (kappan == 0.0) {
249       PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to kappan is zero, iteration %" PetscInt_FMT, ksp->its);
250       ksp->reason = KSP_DIVERGED_NANORINF;
251       PetscFunctionReturn(PETSC_SUCCESS);
252     }
253     if (thetan == 0.0) {
254       PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to thetan is zero, iteration %" PetscInt_FMT, ksp->its);
255       ksp->reason = KSP_DIVERGED_NANORINF;
256       PetscFunctionReturn(PETSC_SUCCESS);
257     }
258     omegan = thetan / kappan;
259     sigman = gamman - omegan * etan;
260 
261     /*
262         rn = sn - omegan*tn
263         xn = xn_1 + zn + omegan*sn
264     */
265     PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
266     rnormin = 0.0;
267     for (i = 0; i < N; i++) {
268       rn[i] = sn[i] - omegan * tn[i];
269       rnormin += PetscRealPart(PetscConj(rn[i]) * rn[i]);
270       xn[i] += zn[i] + omegan * sn[i];
271     }
272     PetscCall(PetscObjectStateIncrease((PetscObject)Xn));
273     PetscCall(PetscLogFlops(7.0 * N));
274     PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
275 
276     if (!ksp->lagnorm && ksp->chknorm < ksp->its && ksp->normtype != KSP_NORM_NONE) {
277       PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
278       PetscCallMPI(MPIU_Allreduce(&rnormin, &rnorm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
279       PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
280       rnorm = PetscSqrtReal(rnorm);
281     }
282 
283     /* Test for convergence */
284     PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
285     PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
286     if (ksp->reason) {
287       PetscCall(KSPUnwindPreconditioner(ksp, Xn, Tn));
288       PetscFunctionReturn(PETSC_SUCCESS);
289     }
290 
291     /* un = A*rn */
292     PetscCall(KSP_PCApplyBAorAB(ksp, Rn, Un, Tn));
293 
294     /* Update n-1 locations with n locations */
295     sigman_2 = sigman_1;
296     sigman_1 = sigman;
297     pin_1    = pin;
298     phin_1   = phin;
299     alphan_1 = alphan;
300     taun_1   = taun;
301     omegan_1 = omegan;
302   }
303   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
304   PetscCall(KSPUnwindPreconditioner(ksp, Xn, Tn));
305   PetscFunctionReturn(PETSC_SUCCESS);
306 }
307 
308 /*MC
309    KSPIBCGS - Implements the IBiCGStab (Improved Stabilized version of BiConjugate Gradient) method {cite}`yang:brent:2002`
310    in an alternative form to have only a single global reduction operation instead of the usual 3 (or 4)
311 
312    Level: beginner
313 
314    Notes:
315    Supports left and right preconditioning
316 
317    See `KSPBCGSL` for additional stabilization
318 
319    Unlike the Bi-CG-stab algorithm, this requires one multiplication be the transpose of the operator
320    before the iteration starts.
321 
322    The paper has two errors in the algorithm presented, they are fixed in the code in `KSPSolve_IBCGS()`
323 
324    For maximum reduction in the number of global reduction operations, this solver should be used with
325    `KSPSetLagNorm()`.
326 
327    This is not supported for complex numbers.
328 
329 .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPBCGSL`, `KSPIBCGS`, `KSPSetLagNorm()`
330 M*/
331 
KSPCreate_IBCGS(KSP ksp)332 PETSC_EXTERN PetscErrorCode KSPCreate_IBCGS(KSP ksp)
333 {
334   PetscFunctionBegin;
335   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
336   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
337   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
338 
339   ksp->ops->setup          = KSPSetUp_IBCGS;
340   ksp->ops->solve          = KSPSolve_IBCGS;
341   ksp->ops->destroy        = KSPDestroyDefault;
342   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
343   ksp->ops->buildresidual  = KSPBuildResidualDefault;
344   ksp->ops->setfromoptions = NULL;
345   ksp->ops->view           = NULL;
346   PetscCheck(!PetscDefined(USE_COMPLEX), PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "This is not supported for complex numbers");
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349