1 #include <petsc/private/kspimpl.h>
2 #include <petsc/private/vecimpl.h>
3
KSPSetUp_IBCGS(KSP ksp)4 static PetscErrorCode KSPSetUp_IBCGS(KSP ksp)
5 {
6 PetscBool diagonalscale;
7
8 PetscFunctionBegin;
9 PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
10 PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
11 PetscCall(KSPSetWorkVecs(ksp, 9));
12 PetscFunctionReturn(PETSC_SUCCESS);
13 }
14
15 /*
16 The code below "cheats" from PETSc style
17 1) VecRestoreArray() is called immediately after VecGetArray() and the array values are still accessed; the reason for the immediate
18 restore is that Vec operations are done on some of the vectors during the solve and if we did not restore immediately it would
19 generate two VecGetArray() (the second one inside the Vec operation) calls without a restore between them.
20 2) The vector operations on done directly on the arrays instead of with VecXXXX() calls
21
22 For clarity in the code we name single VECTORS with two names, for example, Rn_1 and R, but they actually always
23 the exact same memory. We do this with macro defines so that compiler won't think they are
24 two different variables.
25
26 */
27 #define Xn_1 Xn
28 #define xn_1 xn
29 #define Rn_1 Rn
30 #define rn_1 rn
31 #define Un_1 Un
32 #define un_1 un
33 #define Vn_1 Vn
34 #define vn_1 vn
35 #define Qn_1 Qn
36 #define qn_1 qn
37 #define Zn_1 Zn
38 #define zn_1 zn
KSPSolve_IBCGS(KSP ksp)39 static PetscErrorCode KSPSolve_IBCGS(KSP ksp)
40 {
41 PetscInt i, N;
42 PetscReal rnorm = 0.0, rnormin = 0.0;
43 #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
44 /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem
45 on the same number of processes with different runs) we support computing the inner products using Intel's 80 bit arithmetic
46 rather than just 64-bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits)
47 and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE.
48
49 Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double
50 precision into a 16 byte space (the rest of the space is ignored) */
51 long double insums[7], outsums[7];
52 #else
53 PetscScalar insums[7], outsums[7];
54 #endif
55 PetscScalar sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin, tmp1, tmp2;
56 PetscScalar taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan;
57 const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un;
58 PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn;
59 /* the rest do not have to keep n_1 values */
60 PetscScalar kappan, thetan, etan, gamman, betan, deltan;
61 const PetscScalar *PETSC_RESTRICT tn;
62 PetscScalar *PETSC_RESTRICT sn;
63 Vec R0, Rn, Xn, F0, Vn, Zn, Qn, Tn, Sn, B, Un;
64 Mat A;
65
66 PetscFunctionBegin;
67 PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors");
68
69 #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
70 /* since 80 bit long doubls do not fill the upper bits, we fill them initially so that
71 valgrind won't detect MPI_Allreduce() with uninitialized data */
72 PetscCall(PetscMemzero(insums, sizeof(insums)));
73 PetscCall(PetscMemzero(insums, sizeof(insums)));
74 #endif
75
76 PetscCall(PCGetOperators(ksp->pc, &A, NULL));
77 PetscCall(VecGetLocalSize(ksp->vec_sol, &N));
78 Xn = ksp->vec_sol;
79 PetscCall(VecGetArray(Xn_1, (PetscScalar **)&xn_1));
80 PetscCall(VecRestoreArray(Xn_1, NULL));
81 B = ksp->vec_rhs;
82 PetscCall(VecGetArrayRead(B, (const PetscScalar **)&b));
83 PetscCall(VecRestoreArrayRead(B, NULL));
84 R0 = ksp->work[0];
85 PetscCall(VecGetArrayRead(R0, (const PetscScalar **)&r0));
86 PetscCall(VecRestoreArrayRead(R0, NULL));
87 Rn = ksp->work[1];
88 PetscCall(VecGetArray(Rn_1, (PetscScalar **)&rn_1));
89 PetscCall(VecRestoreArray(Rn_1, NULL));
90 Un = ksp->work[2];
91 PetscCall(VecGetArrayRead(Un_1, (const PetscScalar **)&un_1));
92 PetscCall(VecRestoreArrayRead(Un_1, NULL));
93 F0 = ksp->work[3];
94 PetscCall(VecGetArrayRead(F0, (const PetscScalar **)&f0));
95 PetscCall(VecRestoreArrayRead(F0, NULL));
96 Vn = ksp->work[4];
97 PetscCall(VecGetArray(Vn_1, (PetscScalar **)&vn_1));
98 PetscCall(VecRestoreArray(Vn_1, NULL));
99 Zn = ksp->work[5];
100 PetscCall(VecGetArray(Zn_1, (PetscScalar **)&zn_1));
101 PetscCall(VecRestoreArray(Zn_1, NULL));
102 Qn = ksp->work[6];
103 PetscCall(VecGetArrayRead(Qn_1, (const PetscScalar **)&qn_1));
104 PetscCall(VecRestoreArrayRead(Qn_1, NULL));
105 Tn = ksp->work[7];
106 PetscCall(VecGetArrayRead(Tn, (const PetscScalar **)&tn));
107 PetscCall(VecRestoreArrayRead(Tn, NULL));
108 Sn = ksp->work[8];
109 PetscCall(VecGetArrayRead(Sn, (const PetscScalar **)&sn));
110 PetscCall(VecRestoreArrayRead(Sn, NULL));
111
112 /* r0 = rn_1 = b - A*xn_1; */
113 /* PetscCall(KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn));
114 PetscCall(VecAYPX(Rn_1,-1.0,B)); */
115 PetscCall(KSPInitialResidual(ksp, Xn_1, Tn, Sn, Rn_1, B));
116 if (ksp->normtype != KSP_NORM_NONE) {
117 PetscCall(VecNorm(Rn_1, NORM_2, &rnorm));
118 KSPCheckNorm(ksp, rnorm);
119 }
120 PetscCall(KSPMonitor(ksp, 0, rnorm));
121 PetscCall((*ksp->converged)(ksp, 0, rnorm, &ksp->reason, ksp->cnvP));
122 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
123
124 PetscCall(VecCopy(Rn_1, R0));
125
126 /* un_1 = A*rn_1; */
127 PetscCall(KSP_PCApplyBAorAB(ksp, Rn_1, Un_1, Tn));
128
129 /* f0 = A'*rn_1; */
130 if (ksp->pc_side == PC_RIGHT) { /* B' A' */
131 PetscCall(KSP_MatMultTranspose(ksp, A, R0, Tn));
132 PetscCall(KSP_PCApplyTranspose(ksp, Tn, F0));
133 } else if (ksp->pc_side == PC_LEFT) { /* A' B' */
134 PetscCall(KSP_PCApplyTranspose(ksp, R0, Tn));
135 PetscCall(KSP_MatMultTranspose(ksp, A, Tn, F0));
136 }
137
138 /*qn_1 = vn_1 = zn_1 = 0.0; */
139 PetscCall(VecSet(Qn_1, 0.0));
140 PetscCall(VecSet(Vn_1, 0.0));
141 PetscCall(VecSet(Zn_1, 0.0));
142
143 sigman_2 = pin_1 = taun_1 = 0.0;
144
145 /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */
146 PetscCall(VecDot(R0, R0, &phin_1));
147 KSPCheckDot(ksp, phin_1);
148
149 /* sigman_1 = rn_1'un_1 */
150 PetscCall(VecDot(R0, Un_1, &sigman_1));
151
152 alphan_1 = omegan_1 = 1.0;
153
154 for (ksp->its = 1; ksp->its < ksp->max_it + 1; ksp->its++) {
155 rhon = phin_1 - omegan_1 * sigman_2 + omegan_1 * alphan_1 * pin_1;
156 if (ksp->its == 1) deltan = rhon;
157 else deltan = rhon / taun_1;
158 betan = deltan / omegan_1;
159 taun = sigman_1 + betan * taun_1 - deltan * pin_1;
160 if (taun == 0.0) {
161 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to taun is zero, iteration %" PetscInt_FMT, ksp->its);
162 ksp->reason = KSP_DIVERGED_NANORINF;
163 PetscFunctionReturn(PETSC_SUCCESS);
164 }
165 alphan = rhon / taun;
166 PetscCall(PetscLogFlops(15.0));
167
168 /*
169 zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1
170 vn = un_1 + betan*vn_1 - deltan*qn_1
171 sn = rn_1 - alphan*vn
172
173 The algorithm in the paper is missing the alphan/alphan_1 term in the zn update
174 */
175 PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
176 tmp1 = (alphan / alphan_1) * betan;
177 tmp2 = alphan * deltan;
178 for (i = 0; i < N; i++) {
179 zn[i] = alphan * rn_1[i] + tmp1 * zn_1[i] - tmp2 * vn_1[i];
180 vn[i] = un_1[i] + betan * vn_1[i] - deltan * qn_1[i];
181 sn[i] = rn_1[i] - alphan * vn[i];
182 }
183 PetscCall(PetscLogFlops(3.0 + 11.0 * N));
184 PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
185
186 /*
187 qn = A*vn
188 */
189 PetscCall(KSP_PCApplyBAorAB(ksp, Vn, Qn, Tn));
190
191 /*
192 tn = un_1 - alphan*qn
193 */
194 PetscCall(VecWAXPY(Tn, -alphan, Qn, Un_1));
195
196 /*
197 phin = r0'sn
198 pin = r0'qn
199 gamman = f0'sn
200 etan = f0'tn
201 thetan = sn'tn
202 kappan = tn'tn
203 */
204 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
205 phin = pin = gamman = etan = thetan = kappan = 0.0;
206 for (i = 0; i < N; i++) {
207 phin += r0[i] * sn[i];
208 pin += r0[i] * qn[i];
209 gamman += f0[i] * sn[i];
210 etan += f0[i] * tn[i];
211 thetan += sn[i] * tn[i];
212 kappan += tn[i] * tn[i];
213 }
214 PetscCall(PetscLogFlops(12.0 * N));
215 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
216
217 insums[0] = phin;
218 insums[1] = pin;
219 insums[2] = gamman;
220 insums[3] = etan;
221 insums[4] = thetan;
222 insums[5] = kappan;
223 insums[6] = rnormin;
224
225 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
226 #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
227 if (ksp->lagnorm && ksp->its > 1) {
228 PetscCallMPI(MPIU_Allreduce(insums, outsums, 7, MPI_LONG_DOUBLE, MPI_SUM, PetscObjectComm((PetscObject)ksp)));
229 } else {
230 PetscCallMPI(MPIU_Allreduce(insums, outsums, 6, MPI_LONG_DOUBLE, MPI_SUM, PetscObjectComm((PetscObject)ksp)));
231 }
232 #else
233 if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) {
234 PetscCallMPI(MPIU_Allreduce(insums, outsums, 7, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
235 } else {
236 PetscCallMPI(MPIU_Allreduce(insums, outsums, 6, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
237 }
238 #endif
239 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
240 phin = outsums[0];
241 pin = outsums[1];
242 gamman = outsums[2];
243 etan = outsums[3];
244 thetan = outsums[4];
245 kappan = outsums[5];
246 if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) rnorm = PetscSqrtReal(PetscRealPart(outsums[6]));
247
248 if (kappan == 0.0) {
249 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to kappan is zero, iteration %" PetscInt_FMT, ksp->its);
250 ksp->reason = KSP_DIVERGED_NANORINF;
251 PetscFunctionReturn(PETSC_SUCCESS);
252 }
253 if (thetan == 0.0) {
254 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to thetan is zero, iteration %" PetscInt_FMT, ksp->its);
255 ksp->reason = KSP_DIVERGED_NANORINF;
256 PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 omegan = thetan / kappan;
259 sigman = gamman - omegan * etan;
260
261 /*
262 rn = sn - omegan*tn
263 xn = xn_1 + zn + omegan*sn
264 */
265 PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
266 rnormin = 0.0;
267 for (i = 0; i < N; i++) {
268 rn[i] = sn[i] - omegan * tn[i];
269 rnormin += PetscRealPart(PetscConj(rn[i]) * rn[i]);
270 xn[i] += zn[i] + omegan * sn[i];
271 }
272 PetscCall(PetscObjectStateIncrease((PetscObject)Xn));
273 PetscCall(PetscLogFlops(7.0 * N));
274 PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
275
276 if (!ksp->lagnorm && ksp->chknorm < ksp->its && ksp->normtype != KSP_NORM_NONE) {
277 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
278 PetscCallMPI(MPIU_Allreduce(&rnormin, &rnorm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
279 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
280 rnorm = PetscSqrtReal(rnorm);
281 }
282
283 /* Test for convergence */
284 PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
285 PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
286 if (ksp->reason) {
287 PetscCall(KSPUnwindPreconditioner(ksp, Xn, Tn));
288 PetscFunctionReturn(PETSC_SUCCESS);
289 }
290
291 /* un = A*rn */
292 PetscCall(KSP_PCApplyBAorAB(ksp, Rn, Un, Tn));
293
294 /* Update n-1 locations with n locations */
295 sigman_2 = sigman_1;
296 sigman_1 = sigman;
297 pin_1 = pin;
298 phin_1 = phin;
299 alphan_1 = alphan;
300 taun_1 = taun;
301 omegan_1 = omegan;
302 }
303 if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
304 PetscCall(KSPUnwindPreconditioner(ksp, Xn, Tn));
305 PetscFunctionReturn(PETSC_SUCCESS);
306 }
307
308 /*MC
309 KSPIBCGS - Implements the IBiCGStab (Improved Stabilized version of BiConjugate Gradient) method {cite}`yang:brent:2002`
310 in an alternative form to have only a single global reduction operation instead of the usual 3 (or 4)
311
312 Level: beginner
313
314 Notes:
315 Supports left and right preconditioning
316
317 See `KSPBCGSL` for additional stabilization
318
319 Unlike the Bi-CG-stab algorithm, this requires one multiplication be the transpose of the operator
320 before the iteration starts.
321
322 The paper has two errors in the algorithm presented, they are fixed in the code in `KSPSolve_IBCGS()`
323
324 For maximum reduction in the number of global reduction operations, this solver should be used with
325 `KSPSetLagNorm()`.
326
327 This is not supported for complex numbers.
328
329 .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPBCGSL`, `KSPIBCGS`, `KSPSetLagNorm()`
330 M*/
331
KSPCreate_IBCGS(KSP ksp)332 PETSC_EXTERN PetscErrorCode KSPCreate_IBCGS(KSP ksp)
333 {
334 PetscFunctionBegin;
335 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
336 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
337 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
338
339 ksp->ops->setup = KSPSetUp_IBCGS;
340 ksp->ops->solve = KSPSolve_IBCGS;
341 ksp->ops->destroy = KSPDestroyDefault;
342 ksp->ops->buildsolution = KSPBuildSolutionDefault;
343 ksp->ops->buildresidual = KSPBuildResidualDefault;
344 ksp->ops->setfromoptions = NULL;
345 ksp->ops->view = NULL;
346 PetscCheck(!PetscDefined(USE_COMPLEX), PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "This is not supported for complex numbers");
347 PetscFunctionReturn(PETSC_SUCCESS);
348 }
349