1 /*
2 This file implements FBiCGStab-R.
3 FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
4 (1) There are fewer MPI_Allreduce calls.
5 (2) The convergence occasionally is much faster than that of FBiCGStab.
6 */
7 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/
8 #include <petsc/private/vecimpl.h>
9
KSPSetUp_FBCGSR(KSP ksp)10 static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
11 {
12 PetscFunctionBegin;
13 PetscCall(KSPSetWorkVecs(ksp, 8));
14 PetscFunctionReturn(PETSC_SUCCESS);
15 }
16
KSPSolve_FBCGSR(KSP ksp)17 static PetscErrorCode KSPSolve_FBCGSR(KSP ksp)
18 {
19 PetscInt i, j, N;
20 PetscScalar tau, sigma, alpha, omega, beta;
21 PetscReal rho;
22 PetscScalar xi1, xi2, xi3, xi4;
23 Vec X, B, P, P2, RP, R, V, S, T, S2;
24 PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
25 PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
26 PetscScalar insums[4], outsums[4];
27 KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data;
28 PC pc;
29 Mat mat;
30
31 PetscFunctionBegin;
32 PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors");
33 PetscCall(VecGetLocalSize(ksp->vec_sol, &N));
34
35 X = ksp->vec_sol;
36 B = ksp->vec_rhs;
37 P2 = ksp->work[0];
38
39 /* The following are involved in modified inner product calculations and vector updates */
40 RP = ksp->work[1];
41 PetscCall(VecGetArray(RP, (PetscScalar **)&rp));
42 PetscCall(VecRestoreArray(RP, NULL));
43 R = ksp->work[2];
44 PetscCall(VecGetArray(R, (PetscScalar **)&r));
45 PetscCall(VecRestoreArray(R, NULL));
46 P = ksp->work[3];
47 PetscCall(VecGetArray(P, (PetscScalar **)&p));
48 PetscCall(VecRestoreArray(P, NULL));
49 V = ksp->work[4];
50 PetscCall(VecGetArray(V, (PetscScalar **)&v));
51 PetscCall(VecRestoreArray(V, NULL));
52 S = ksp->work[5];
53 PetscCall(VecGetArray(S, (PetscScalar **)&s));
54 PetscCall(VecRestoreArray(S, NULL));
55 T = ksp->work[6];
56 PetscCall(VecGetArray(T, (PetscScalar **)&t));
57 PetscCall(VecRestoreArray(T, NULL));
58 S2 = ksp->work[7];
59 PetscCall(VecGetArray(S2, (PetscScalar **)&s2));
60 PetscCall(VecRestoreArray(S2, NULL));
61
62 /* Only supports right preconditioning */
63 PetscCheck(ksp->pc_side == PC_RIGHT, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "KSP fbcgsr does not support %s", PCSides[ksp->pc_side]);
64 if (!ksp->guess_zero) {
65 if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess));
66 PetscCall(VecCopy(X, bcgs->guess));
67 } else {
68 PetscCall(VecSet(X, 0.0));
69 }
70
71 /* Compute initial residual */
72 PetscCall(KSPGetPC(ksp, &pc));
73 PetscCall(PCGetOperators(pc, &mat, NULL));
74 if (!ksp->guess_zero) {
75 PetscCall(KSP_MatMult(ksp, mat, X, P2)); /* P2 is used as temporary storage */
76 PetscCall(VecCopy(B, R));
77 PetscCall(VecAXPY(R, -1.0, P2));
78 } else {
79 PetscCall(VecCopy(B, R));
80 }
81
82 /* Test for nothing to do */
83 PetscCall(VecNorm(R, NORM_2, &rho));
84 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
85 ksp->its = 0;
86 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
87 else ksp->rnorm = 0;
88 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
89 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
90 PetscCall(KSPMonitor(ksp, 0, ksp->rnorm));
91 PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP));
92 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
93
94 /* Initialize iterates */
95 PetscCall(VecCopy(R, RP)); /* rp <- r */
96 PetscCall(VecCopy(R, P)); /* p <- r */
97
98 /* Big loop */
99 for (i = 0; i < ksp->max_it; i++) {
100 /* matmult and pc */
101 PetscCall(KSP_PCApply(ksp, P, P2)); /* p2 <- K p */
102 PetscCall(KSP_MatMult(ksp, mat, P2, V)); /* v <- A p2 */
103
104 /* inner products */
105 if (i == 0) {
106 tau = rho * rho;
107 PetscCall(VecDot(V, RP, &sigma)); /* sigma <- (v,rp) */
108 } else {
109 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
110 tau = sigma = 0.0;
111 for (j = 0; j < N; j++) {
112 tau += r[j] * rp[j]; /* tau <- (r,rp) */
113 sigma += v[j] * rp[j]; /* sigma <- (v,rp) */
114 }
115 PetscCall(PetscLogFlops(4.0 * N));
116 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
117 insums[0] = tau;
118 insums[1] = sigma;
119 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
120 PetscCallMPI(MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
121 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
122 tau = outsums[0];
123 sigma = outsums[1];
124 }
125
126 /* scalar update */
127 alpha = tau / sigma;
128
129 /* vector update */
130 PetscCall(VecWAXPY(S, -alpha, V, R)); /* s <- r - alpha v */
131
132 /* matmult and pc */
133 PetscCall(KSP_PCApply(ksp, S, S2)); /* s2 <- K s */
134 PetscCall(KSP_MatMult(ksp, mat, S2, T)); /* t <- A s2 */
135
136 /* inner products */
137 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
138 xi1 = xi2 = xi3 = xi4 = 0.0;
139 for (j = 0; j < N; j++) {
140 xi1 += s[j] * s[j]; /* xi1 <- (s,s) */
141 xi2 += t[j] * s[j]; /* xi2 <- (t,s) */
142 xi3 += t[j] * t[j]; /* xi3 <- (t,t) */
143 xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */
144 }
145 PetscCall(PetscLogFlops(8.0 * N));
146 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
147
148 insums[0] = xi1;
149 insums[1] = xi2;
150 insums[2] = xi3;
151 insums[3] = xi4;
152
153 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
154 PetscCallMPI(MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
155 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
156 xi1 = outsums[0];
157 xi2 = outsums[1];
158 xi3 = outsums[2];
159 xi4 = outsums[3];
160
161 /* test denominator */
162 if ((xi3 == 0.0) || (sigma == 0.0)) {
163 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has failed due to zero inner product");
164 ksp->reason = KSP_DIVERGED_BREAKDOWN;
165 PetscCall(PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n"));
166 break;
167 }
168
169 /* scalar updates */
170 omega = xi2 / xi3;
171 beta = -xi4 / sigma;
172 rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */
173
174 /* vector updates */
175 PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2)); /* x <- alpha * p2 + omega * s2 + x */
176
177 /* convergence test */
178 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
179 ksp->its++;
180 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
181 else ksp->rnorm = 0;
182 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
183 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
184 PetscCall(KSPMonitor(ksp, i + 1, ksp->rnorm));
185 PetscCall((*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP));
186 if (ksp->reason) break;
187
188 /* vector updates */
189 PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
190 for (j = 0; j < N; j++) {
191 r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
192 p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
193 }
194 PetscCall(PetscLogFlops(6.0 * N));
195 PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
196 }
197
198 if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
199 PetscFunctionReturn(PETSC_SUCCESS);
200 }
201
202 /*MC
203 KSPFBCGSR - Implements a mathematically equivalent variant of flexible bi-CG-stab, `KSPFBCGS`. [](sec_flexibleksp)
204
205 Level: beginner
206
207 Notes:
208 This implementation requires fewer `MPI_Allreduce()` calls than `KSPFBCGS` and may converge faster
209
210 See `KSPPIPEBCGS` for a pipelined version of the algorithm
211
212 Flexible BiCGStab, unlike most Krylov methods, allows the preconditioner to be nonlinear, that is the action of the preconditioner to a vector need not be linear
213 in the vector entries.
214
215 Only supports right preconditioning
216
217 .seealso: [](ch_ksp), [](sec_flexibleksp), `KSPFBCGSR`, `KSPPIPEBCGS`, `KSPBCGSL`, `KSPBCGS`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPSetPCSide()`
218 M*/
KSPCreate_FBCGSR(KSP ksp)219 PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
220 {
221 KSP_BCGS *bcgs;
222
223 PetscFunctionBegin;
224 PetscCall(PetscNew(&bcgs));
225
226 ksp->data = bcgs;
227 ksp->ops->setup = KSPSetUp_FBCGSR;
228 ksp->ops->solve = KSPSolve_FBCGSR;
229 ksp->ops->destroy = KSPDestroy_BCGS;
230 ksp->ops->reset = KSPReset_BCGS;
231 ksp->ops->buildsolution = KSPBuildSolution_BCGS;
232 ksp->ops->buildresidual = KSPBuildResidualDefault;
233 ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
234 ksp->pc_side = PC_RIGHT; /* set default PC side */
235
236 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
237 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
238 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
239 PetscFunctionReturn(PETSC_SUCCESS);
240 }
241