xref: /petsc/src/ksp/ksp/impls/cg/pipecgrr/pipecgrr.c (revision 613ce9fe8f5e2bcdf7c72d914e9769b5d960fb4c)
1 #include <petsc/private/kspimpl.h>
2 
3 /*
4      KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.
5 
6       This is called once, usually automatically by KSPSolve() or KSPSetUp()
7      but can be called directly by KSPSetUp()
8 */
KSPSetUp_PIPECGRR(KSP ksp)9 static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
10 {
11   PetscFunctionBegin;
12   /* get work vectors needed by PIPECGRR */
13   PetscCall(KSPSetWorkVecs(ksp, 9));
14   PetscFunctionReturn(PETSC_SUCCESS);
15 }
16 
17 /*
18  KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
19 */
KSPSolve_PIPECGRR(KSP ksp)20 static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
21 {
22   PetscInt    i = 0, replace = 0, nsize;
23   PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0, alphap = 0.0, betap = 0.0;
24   PetscReal   dp = 0.0, nsi = 0.0, sqn = 0.0, Anorm = 0.0, rnp = 0.0, pnp = 0.0, snp = 0.0, unp = 0.0, wnp = 0.0, xnp = 0.0, qnp = 0.0, znp = 0.0, mnz = 5.0, tol = PETSC_SQRT_MACHINE_EPSILON, eps = PETSC_MACHINE_EPSILON;
25   PetscReal   ds = 0.0, dz = 0.0, dx = 0.0, dpp = 0.0, dq = 0.0, dm = 0.0, du = 0.0, dw = 0.0, db = 0.0, errr = 0.0, errrprev = 0.0, errs = 0.0, errw = 0.0, errz = 0.0, errncr = 0.0, errncs = 0.0, errncw = 0.0, errncz = 0.0;
26   Vec         X, B, Z, P, W, Q, U, M, N, R, S;
27   Mat         Amat, Pmat;
28   PetscBool   diagonalscale;
29 
30   PetscFunctionBegin;
31   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
32   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
33 
34   X = ksp->vec_sol;
35   B = ksp->vec_rhs;
36   M = ksp->work[0];
37   Z = ksp->work[1];
38   P = ksp->work[2];
39   N = ksp->work[3];
40   W = ksp->work[4];
41   Q = ksp->work[5];
42   U = ksp->work[6];
43   R = ksp->work[7];
44   S = ksp->work[8];
45 
46   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
47 
48   ksp->its = 0;
49   if (!ksp->guess_zero) {
50     PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*  r <- b - Ax  */
51     PetscCall(VecAYPX(R, -1.0, B));
52   } else {
53     PetscCall(VecCopy(B, R)); /*  r <- b (x is 0)  */
54   }
55 
56   PetscCall(KSP_PCApply(ksp, R, U)); /*  u <- Br  */
57 
58   switch (ksp->normtype) {
59   case KSP_NORM_PRECONDITIONED:
60     PetscCall(VecNormBegin(U, NORM_2, &dp)); /*  dp <- u'*u = e'*A'*B'*B*A'*e'  */
61     PetscCall(VecNormBegin(B, NORM_2, &db));
62     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
63     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
64     PetscCall(VecNormEnd(U, NORM_2, &dp));
65     PetscCall(VecNormEnd(B, NORM_2, &db));
66     break;
67   case KSP_NORM_UNPRECONDITIONED:
68     PetscCall(VecNormBegin(R, NORM_2, &dp)); /*  dp <- r'*r = e'*A'*A*e  */
69     PetscCall(VecNormBegin(B, NORM_2, &db));
70     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
71     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
72     PetscCall(VecNormEnd(R, NORM_2, &dp));
73     PetscCall(VecNormEnd(B, NORM_2, &db));
74     break;
75   case KSP_NORM_NATURAL:
76     PetscCall(VecDotBegin(R, U, &gamma)); /*  gamma <- u'*r  */
77     PetscCall(VecNormBegin(B, NORM_2, &db));
78     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
79     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
80     PetscCall(VecDotEnd(R, U, &gamma));
81     PetscCall(VecNormEnd(B, NORM_2, &db));
82     KSPCheckDot(ksp, gamma);
83     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /*  dp <- r'*u = r'*B*r = e'*A'*B*A*e  */
84     break;
85   case KSP_NORM_NONE:
86     PetscCall(KSP_MatMult(ksp, Amat, U, W));
87     dp = 0.0;
88     break;
89   default:
90     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
91   }
92   PetscCall(KSPLogResidualHistory(ksp, dp));
93   PetscCall(KSPMonitor(ksp, 0, dp));
94   ksp->rnorm = dp;
95   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /*  test for convergence  */
96   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
97 
98   PetscCall(MatNorm(Amat, NORM_INFINITY, &Anorm));
99   PetscCall(VecGetSize(B, &nsize));
100   nsi = (PetscReal)nsize;
101   sqn = PetscSqrtReal(nsi);
102 
103   do {
104     if (i > 1) {
105       pnp = dpp;
106       snp = ds;
107       qnp = dq;
108       znp = dz;
109     }
110     if (i > 0) {
111       rnp    = dp;
112       unp    = du;
113       wnp    = dw;
114       xnp    = dx;
115       alphap = alpha;
116       betap  = beta;
117     }
118 
119     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
120       PetscCall(VecNormBegin(R, NORM_2, &dp));
121     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
122       PetscCall(VecNormBegin(U, NORM_2, &dp));
123     }
124     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
125     PetscCall(VecDotBegin(W, U, &delta));
126 
127     if (i > 0) {
128       PetscCall(VecNormBegin(S, NORM_2, &ds));
129       PetscCall(VecNormBegin(Z, NORM_2, &dz));
130       PetscCall(VecNormBegin(P, NORM_2, &dpp));
131       PetscCall(VecNormBegin(Q, NORM_2, &dq));
132       PetscCall(VecNormBegin(M, NORM_2, &dm));
133     }
134     PetscCall(VecNormBegin(X, NORM_2, &dx));
135     PetscCall(VecNormBegin(U, NORM_2, &du));
136     PetscCall(VecNormBegin(W, NORM_2, &dw));
137 
138     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
139     PetscCall(KSP_PCApply(ksp, W, M));       /*   m <- Bw       */
140     PetscCall(KSP_MatMult(ksp, Amat, M, N)); /*   n <- Am       */
141 
142     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
143       PetscCall(VecNormEnd(R, NORM_2, &dp));
144     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
145       PetscCall(VecNormEnd(U, NORM_2, &dp));
146     }
147     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
148     PetscCall(VecDotEnd(W, U, &delta));
149 
150     if (i > 0) {
151       PetscCall(VecNormEnd(S, NORM_2, &ds));
152       PetscCall(VecNormEnd(Z, NORM_2, &dz));
153       PetscCall(VecNormEnd(P, NORM_2, &dpp));
154       PetscCall(VecNormEnd(Q, NORM_2, &dq));
155       PetscCall(VecNormEnd(M, NORM_2, &dm));
156     }
157     PetscCall(VecNormEnd(X, NORM_2, &dx));
158     PetscCall(VecNormEnd(U, NORM_2, &du));
159     PetscCall(VecNormEnd(W, NORM_2, &dw));
160 
161     if (i > 0) {
162       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
163       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
164 
165       ksp->rnorm = dp;
166       PetscCall(KSPLogResidualHistory(ksp, dp));
167       PetscCall(KSPMonitor(ksp, i, dp));
168       PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
169       if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
170     }
171 
172     if (i == 0) {
173       alpha = gamma / delta;
174       PetscCall(VecCopy(N, Z)); /*  z <- n  */
175       PetscCall(VecCopy(M, Q)); /*  q <- m  */
176       PetscCall(VecCopy(U, P)); /*  p <- u  */
177       PetscCall(VecCopy(W, S)); /*  s <- w  */
178     } else {
179       beta  = gamma / gammaold;
180       alpha = gamma / (delta - beta / alpha * gamma);
181       PetscCall(VecAYPX(Z, beta, N)); /*  z <- n + beta * z  */
182       PetscCall(VecAYPX(Q, beta, M)); /*  q <- m + beta * q  */
183       PetscCall(VecAYPX(P, beta, U)); /*  p <- u + beta * p  */
184       PetscCall(VecAYPX(S, beta, W)); /*  s <- w + beta * s  */
185     }
186     PetscCall(VecAXPY(X, alpha, P));  /*  x <- x + alpha * p  */
187     PetscCall(VecAXPY(U, -alpha, Q)); /*  u <- u - alpha * q  */
188     PetscCall(VecAXPY(W, -alpha, Z)); /*  w <- w - alpha * z  */
189     PetscCall(VecAXPY(R, -alpha, S)); /*  r <- r - alpha * s  */
190     gammaold = gamma;
191 
192     if (i > 0) {
193       errncr = PetscSqrtReal(Anorm * xnp + 2.0 * Anorm * PetscAbsScalar(alphap) * dpp + rnp + 2.0 * PetscAbsScalar(alphap) * ds) * eps;
194       errncw = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(alphap) * dq + wnp + 2.0 * PetscAbsScalar(alphap) * dz) * eps;
195     }
196     if (i > 1) {
197       errncs = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(betap) * pnp + wnp + 2.0 * PetscAbsScalar(betap) * snp) * eps;
198       errncz = PetscSqrtReal((mnz * sqn + 2) * Anorm * dm + 2.0 * Anorm * PetscAbsScalar(betap) * qnp + 2.0 * PetscAbsScalar(betap) * znp) * eps;
199     }
200 
201     if (i > 0) {
202       if (i == 1) {
203         errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * xnp + db) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dpp) * eps + errncr;
204         errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
205         errw = PetscSqrtReal(mnz * sqn * Anorm * unp) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dq) * eps + errncw;
206         errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
207       } else if (replace == 1) {
208         errrprev = errr;
209         errr     = PetscSqrtReal((mnz * sqn + 1) * Anorm * dx + db) * eps;
210         errs     = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
211         errw     = PetscSqrtReal(mnz * sqn * Anorm * du) * eps;
212         errz     = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
213         replace  = 0;
214       } else {
215         errrprev = errr;
216         errr     = errr + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errs + PetscAbsScalar(alphap) * errw + errncr + PetscAbsScalar(alphap) * errncs;
217         errs     = errw + PetscAbsScalar(betap) * errs + errncs;
218         errw     = errw + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errz + errncw + PetscAbsScalar(alphap) * errncz;
219         errz     = PetscAbsScalar(betap) * errz + errncz;
220       }
221       if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
222         PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*  r <- Ax - b  */
223         PetscCall(VecAYPX(R, -1.0, B));
224         PetscCall(KSP_PCApply(ksp, R, U));       /*  u <- Br  */
225         PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
226         PetscCall(KSP_MatMult(ksp, Amat, P, S)); /*  s <- Ap  */
227         PetscCall(KSP_PCApply(ksp, S, Q));       /*  q <- Bs  */
228         PetscCall(KSP_MatMult(ksp, Amat, Q, Z)); /*  z <- Aq  */
229         replace = 1;
230       }
231     }
232 
233     i++;
234     ksp->its = i;
235 
236   } while (i <= ksp->max_it);
237   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 /*MC
242    KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements {cite}`cools2018analyzing`. [](sec_pipelineksp)
243 
244    Level: intermediate
245 
246    Notes:
247    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`.  The
248    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
249 
250    `KSPPIPECGRR` improves the robustness of `KSPPIPECG` by adding an automated residual replacement strategy.
251    True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
252    iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.
253 
254    See also `KSPPIPECG`, which is identical to `KSPPIPECGRR` without residual replacements.
255    See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product.
256 
257    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
258    performance of pipelined methods. See [](doc_faq_pipelined)
259 
260    Contributed by:
261    Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
262    European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)
263 
264 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPIPECG`, `KSPPGMRES`, `KSPCG`, `KSPPIPEBCGS`, `KSPCGUseSingleReduction()`
265 M*/
KSPCreate_PIPECGRR(KSP ksp)266 PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
267 {
268   PetscFunctionBegin;
269   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
270   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
271   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
272   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
273 
274   ksp->ops->setup          = KSPSetUp_PIPECGRR;
275   ksp->ops->solve          = KSPSolve_PIPECGRR;
276   ksp->ops->destroy        = KSPDestroyDefault;
277   ksp->ops->view           = NULL;
278   ksp->ops->setfromoptions = NULL;
279   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
280   ksp->ops->buildresidual  = KSPBuildResidualDefault;
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283