1 #include <petsc/private/kspimpl.h>
2
3 /*
4 KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.
5
6 This is called once, usually automatically by KSPSolve() or KSPSetUp()
7 but can be called directly by KSPSetUp()
8 */
KSPSetUp_PIPECGRR(KSP ksp)9 static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
10 {
11 PetscFunctionBegin;
12 /* get work vectors needed by PIPECGRR */
13 PetscCall(KSPSetWorkVecs(ksp, 9));
14 PetscFunctionReturn(PETSC_SUCCESS);
15 }
16
17 /*
18 KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
19 */
KSPSolve_PIPECGRR(KSP ksp)20 static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
21 {
22 PetscInt i = 0, replace = 0, nsize;
23 PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0, alphap = 0.0, betap = 0.0;
24 PetscReal dp = 0.0, nsi = 0.0, sqn = 0.0, Anorm = 0.0, rnp = 0.0, pnp = 0.0, snp = 0.0, unp = 0.0, wnp = 0.0, xnp = 0.0, qnp = 0.0, znp = 0.0, mnz = 5.0, tol = PETSC_SQRT_MACHINE_EPSILON, eps = PETSC_MACHINE_EPSILON;
25 PetscReal ds = 0.0, dz = 0.0, dx = 0.0, dpp = 0.0, dq = 0.0, dm = 0.0, du = 0.0, dw = 0.0, db = 0.0, errr = 0.0, errrprev = 0.0, errs = 0.0, errw = 0.0, errz = 0.0, errncr = 0.0, errncs = 0.0, errncw = 0.0, errncz = 0.0;
26 Vec X, B, Z, P, W, Q, U, M, N, R, S;
27 Mat Amat, Pmat;
28 PetscBool diagonalscale;
29
30 PetscFunctionBegin;
31 PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
32 PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
33
34 X = ksp->vec_sol;
35 B = ksp->vec_rhs;
36 M = ksp->work[0];
37 Z = ksp->work[1];
38 P = ksp->work[2];
39 N = ksp->work[3];
40 W = ksp->work[4];
41 Q = ksp->work[5];
42 U = ksp->work[6];
43 R = ksp->work[7];
44 S = ksp->work[8];
45
46 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
47
48 ksp->its = 0;
49 if (!ksp->guess_zero) {
50 PetscCall(KSP_MatMult(ksp, Amat, X, R)); /* r <- b - Ax */
51 PetscCall(VecAYPX(R, -1.0, B));
52 } else {
53 PetscCall(VecCopy(B, R)); /* r <- b (x is 0) */
54 }
55
56 PetscCall(KSP_PCApply(ksp, R, U)); /* u <- Br */
57
58 switch (ksp->normtype) {
59 case KSP_NORM_PRECONDITIONED:
60 PetscCall(VecNormBegin(U, NORM_2, &dp)); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
61 PetscCall(VecNormBegin(B, NORM_2, &db));
62 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
63 PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
64 PetscCall(VecNormEnd(U, NORM_2, &dp));
65 PetscCall(VecNormEnd(B, NORM_2, &db));
66 break;
67 case KSP_NORM_UNPRECONDITIONED:
68 PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- r'*r = e'*A'*A*e */
69 PetscCall(VecNormBegin(B, NORM_2, &db));
70 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
71 PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
72 PetscCall(VecNormEnd(R, NORM_2, &dp));
73 PetscCall(VecNormEnd(B, NORM_2, &db));
74 break;
75 case KSP_NORM_NATURAL:
76 PetscCall(VecDotBegin(R, U, &gamma)); /* gamma <- u'*r */
77 PetscCall(VecNormBegin(B, NORM_2, &db));
78 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
79 PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
80 PetscCall(VecDotEnd(R, U, &gamma));
81 PetscCall(VecNormEnd(B, NORM_2, &db));
82 KSPCheckDot(ksp, gamma);
83 dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
84 break;
85 case KSP_NORM_NONE:
86 PetscCall(KSP_MatMult(ksp, Amat, U, W));
87 dp = 0.0;
88 break;
89 default:
90 SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
91 }
92 PetscCall(KSPLogResidualHistory(ksp, dp));
93 PetscCall(KSPMonitor(ksp, 0, dp));
94 ksp->rnorm = dp;
95 PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /* test for convergence */
96 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
97
98 PetscCall(MatNorm(Amat, NORM_INFINITY, &Anorm));
99 PetscCall(VecGetSize(B, &nsize));
100 nsi = (PetscReal)nsize;
101 sqn = PetscSqrtReal(nsi);
102
103 do {
104 if (i > 1) {
105 pnp = dpp;
106 snp = ds;
107 qnp = dq;
108 znp = dz;
109 }
110 if (i > 0) {
111 rnp = dp;
112 unp = du;
113 wnp = dw;
114 xnp = dx;
115 alphap = alpha;
116 betap = beta;
117 }
118
119 if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
120 PetscCall(VecNormBegin(R, NORM_2, &dp));
121 } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
122 PetscCall(VecNormBegin(U, NORM_2, &dp));
123 }
124 if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
125 PetscCall(VecDotBegin(W, U, &delta));
126
127 if (i > 0) {
128 PetscCall(VecNormBegin(S, NORM_2, &ds));
129 PetscCall(VecNormBegin(Z, NORM_2, &dz));
130 PetscCall(VecNormBegin(P, NORM_2, &dpp));
131 PetscCall(VecNormBegin(Q, NORM_2, &dq));
132 PetscCall(VecNormBegin(M, NORM_2, &dm));
133 }
134 PetscCall(VecNormBegin(X, NORM_2, &dx));
135 PetscCall(VecNormBegin(U, NORM_2, &du));
136 PetscCall(VecNormBegin(W, NORM_2, &dw));
137
138 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
139 PetscCall(KSP_PCApply(ksp, W, M)); /* m <- Bw */
140 PetscCall(KSP_MatMult(ksp, Amat, M, N)); /* n <- Am */
141
142 if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
143 PetscCall(VecNormEnd(R, NORM_2, &dp));
144 } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
145 PetscCall(VecNormEnd(U, NORM_2, &dp));
146 }
147 if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
148 PetscCall(VecDotEnd(W, U, &delta));
149
150 if (i > 0) {
151 PetscCall(VecNormEnd(S, NORM_2, &ds));
152 PetscCall(VecNormEnd(Z, NORM_2, &dz));
153 PetscCall(VecNormEnd(P, NORM_2, &dpp));
154 PetscCall(VecNormEnd(Q, NORM_2, &dq));
155 PetscCall(VecNormEnd(M, NORM_2, &dm));
156 }
157 PetscCall(VecNormEnd(X, NORM_2, &dx));
158 PetscCall(VecNormEnd(U, NORM_2, &du));
159 PetscCall(VecNormEnd(W, NORM_2, &dw));
160
161 if (i > 0) {
162 if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
163 else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
164
165 ksp->rnorm = dp;
166 PetscCall(KSPLogResidualHistory(ksp, dp));
167 PetscCall(KSPMonitor(ksp, i, dp));
168 PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
169 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
170 }
171
172 if (i == 0) {
173 alpha = gamma / delta;
174 PetscCall(VecCopy(N, Z)); /* z <- n */
175 PetscCall(VecCopy(M, Q)); /* q <- m */
176 PetscCall(VecCopy(U, P)); /* p <- u */
177 PetscCall(VecCopy(W, S)); /* s <- w */
178 } else {
179 beta = gamma / gammaold;
180 alpha = gamma / (delta - beta / alpha * gamma);
181 PetscCall(VecAYPX(Z, beta, N)); /* z <- n + beta * z */
182 PetscCall(VecAYPX(Q, beta, M)); /* q <- m + beta * q */
183 PetscCall(VecAYPX(P, beta, U)); /* p <- u + beta * p */
184 PetscCall(VecAYPX(S, beta, W)); /* s <- w + beta * s */
185 }
186 PetscCall(VecAXPY(X, alpha, P)); /* x <- x + alpha * p */
187 PetscCall(VecAXPY(U, -alpha, Q)); /* u <- u - alpha * q */
188 PetscCall(VecAXPY(W, -alpha, Z)); /* w <- w - alpha * z */
189 PetscCall(VecAXPY(R, -alpha, S)); /* r <- r - alpha * s */
190 gammaold = gamma;
191
192 if (i > 0) {
193 errncr = PetscSqrtReal(Anorm * xnp + 2.0 * Anorm * PetscAbsScalar(alphap) * dpp + rnp + 2.0 * PetscAbsScalar(alphap) * ds) * eps;
194 errncw = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(alphap) * dq + wnp + 2.0 * PetscAbsScalar(alphap) * dz) * eps;
195 }
196 if (i > 1) {
197 errncs = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(betap) * pnp + wnp + 2.0 * PetscAbsScalar(betap) * snp) * eps;
198 errncz = PetscSqrtReal((mnz * sqn + 2) * Anorm * dm + 2.0 * Anorm * PetscAbsScalar(betap) * qnp + 2.0 * PetscAbsScalar(betap) * znp) * eps;
199 }
200
201 if (i > 0) {
202 if (i == 1) {
203 errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * xnp + db) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dpp) * eps + errncr;
204 errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
205 errw = PetscSqrtReal(mnz * sqn * Anorm * unp) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dq) * eps + errncw;
206 errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
207 } else if (replace == 1) {
208 errrprev = errr;
209 errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * dx + db) * eps;
210 errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
211 errw = PetscSqrtReal(mnz * sqn * Anorm * du) * eps;
212 errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
213 replace = 0;
214 } else {
215 errrprev = errr;
216 errr = errr + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errs + PetscAbsScalar(alphap) * errw + errncr + PetscAbsScalar(alphap) * errncs;
217 errs = errw + PetscAbsScalar(betap) * errs + errncs;
218 errw = errw + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errz + errncw + PetscAbsScalar(alphap) * errncz;
219 errz = PetscAbsScalar(betap) * errz + errncz;
220 }
221 if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
222 PetscCall(KSP_MatMult(ksp, Amat, X, R)); /* r <- Ax - b */
223 PetscCall(VecAYPX(R, -1.0, B));
224 PetscCall(KSP_PCApply(ksp, R, U)); /* u <- Br */
225 PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
226 PetscCall(KSP_MatMult(ksp, Amat, P, S)); /* s <- Ap */
227 PetscCall(KSP_PCApply(ksp, S, Q)); /* q <- Bs */
228 PetscCall(KSP_MatMult(ksp, Amat, Q, Z)); /* z <- Aq */
229 replace = 1;
230 }
231 }
232
233 i++;
234 ksp->its = i;
235
236 } while (i <= ksp->max_it);
237 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
238 PetscFunctionReturn(PETSC_SUCCESS);
239 }
240
241 /*MC
242 KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements {cite}`cools2018analyzing`. [](sec_pipelineksp)
243
244 Level: intermediate
245
246 Notes:
247 This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`. The
248 non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
249
250 `KSPPIPECGRR` improves the robustness of `KSPPIPECG` by adding an automated residual replacement strategy.
251 True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
252 iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.
253
254 See also `KSPPIPECG`, which is identical to `KSPPIPECGRR` without residual replacements.
255 See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product.
256
257 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
258 performance of pipelined methods. See [](doc_faq_pipelined)
259
260 Contributed by:
261 Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
262 European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)
263
264 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPIPECG`, `KSPPGMRES`, `KSPCG`, `KSPPIPEBCGS`, `KSPCGUseSingleReduction()`
265 M*/
KSPCreate_PIPECGRR(KSP ksp)266 PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
267 {
268 PetscFunctionBegin;
269 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
270 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
271 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
272 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
273
274 ksp->ops->setup = KSPSetUp_PIPECGRR;
275 ksp->ops->solve = KSPSolve_PIPECGRR;
276 ksp->ops->destroy = KSPDestroyDefault;
277 ksp->ops->view = NULL;
278 ksp->ops->setfromoptions = NULL;
279 ksp->ops->buildsolution = KSPBuildSolutionDefault;
280 ksp->ops->buildresidual = KSPBuildResidualDefault;
281 PetscFunctionReturn(PETSC_SUCCESS);
282 }
283