1 /*
2 This file implements pipelined BiCGStab (pipe-BiCGStab).
3 */
4 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/
5
KSPSetUp_PIPEBCGS(KSP ksp)6 static PetscErrorCode KSPSetUp_PIPEBCGS(KSP ksp)
7 {
8 PetscFunctionBegin;
9 PetscCall(KSPSetWorkVecs(ksp, 15));
10 PetscFunctionReturn(PETSC_SUCCESS);
11 }
12
13 /* Only need a few hacks from KSPSolve_BCGS */
14 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
KSPSolve_PIPEBCGS(KSP ksp)15 static PetscErrorCode KSPSolve_PIPEBCGS(KSP ksp)
16 {
17 PetscInt i;
18 PetscScalar rho, rhoold, alpha, beta, omega = 0.0, d1, d2, d3;
19 Vec X, B, S, R, RP, Y, Q, P2, Q2, R2, S2, W, Z, W2, Z2, T, V;
20 PetscReal dp = 0.0;
21 KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data;
22 PC pc;
23
24 PetscFunctionBegin;
25 X = ksp->vec_sol;
26 B = ksp->vec_rhs;
27 R = ksp->work[0];
28 RP = ksp->work[1];
29 S = ksp->work[2];
30 Y = ksp->work[3];
31 Q = ksp->work[4];
32 Q2 = ksp->work[5];
33 P2 = ksp->work[6];
34 R2 = ksp->work[7];
35 S2 = ksp->work[8];
36 W = ksp->work[9];
37 Z = ksp->work[10];
38 W2 = ksp->work[11];
39 Z2 = ksp->work[12];
40 T = ksp->work[13];
41 V = ksp->work[14];
42
43 if (!ksp->guess_zero) {
44 if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess));
45 PetscCall(VecCopy(X, bcgs->guess));
46 } else {
47 PetscCall(VecSet(X, 0.0));
48 }
49
50 /* Compute initial residual */
51 PetscCall(KSPGetPC(ksp, &pc));
52 if (!ksp->guess_zero) {
53 PetscCall(KSP_MatMult(ksp, pc->mat, X, Q2));
54 PetscCall(VecCopy(B, R));
55 PetscCall(VecAXPY(R, -1.0, Q2));
56 } else {
57 PetscCall(VecCopy(B, R));
58 }
59
60 /* Test for nothing to do */
61 if (ksp->normtype != KSP_NORM_NONE) PetscCall(VecNorm(R, NORM_2, &dp));
62 else dp = 0.0;
63 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
64 ksp->its = 0;
65 ksp->rnorm = dp;
66 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
67 PetscCall(KSPLogResidualHistory(ksp, dp));
68 PetscCall(KSPMonitor(ksp, 0, dp));
69 PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP));
70 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
71
72 /* Initialize */
73 PetscCall(VecCopy(R, RP)); /* rp <- r */
74
75 PetscCall(VecDotBegin(R, RP, &rho)); /* rho <- (r,rp) */
76 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
77 PetscCall(KSP_PCApply(ksp, R, R2)); /* r2 <- K r */
78 PetscCall(KSP_MatMult(ksp, pc->mat, R2, W)); /* w <- A r2 */
79 PetscCall(VecDotEnd(R, RP, &rho));
80
81 PetscCall(VecDotBegin(W, RP, &d2)); /* d2 <- (w,rp) */
82 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)W)));
83 PetscCall(KSP_PCApply(ksp, W, W2)); /* w2 <- K w */
84 PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
85 PetscCall(VecDotEnd(W, RP, &d2));
86
87 alpha = rho / d2;
88 beta = 0.0;
89
90 /* Main loop */
91 i = 0;
92 do {
93 if (i == 0) {
94 PetscCall(VecCopy(R2, P2)); /* p2 <- r2 */
95 PetscCall(VecCopy(W, S)); /* s <- w */
96 PetscCall(VecCopy(W2, S2)); /* s2 <- w2 */
97 PetscCall(VecCopy(T, Z)); /* z <- t */
98 } else {
99 PetscCall(VecAXPBYPCZ(P2, 1.0, -beta * omega, beta, R2, S2)); /* p2 <- beta * p2 + r2 - beta * omega * s2 */
100 PetscCall(VecAXPBYPCZ(S, 1.0, -beta * omega, beta, W, Z)); /* s <- beta * s + w - beta * omega * z */
101 PetscCall(VecAXPBYPCZ(S2, 1.0, -beta * omega, beta, W2, Z2)); /* s2 <- beta * s2 + w2 - beta * omega * z2 */
102 PetscCall(VecAXPBYPCZ(Z, 1.0, -beta * omega, beta, T, V)); /* z <- beta * z + t - beta * omega * v */
103 }
104 PetscCall(VecWAXPY(Q, -alpha, S, R)); /* q <- r - alpha s */
105 PetscCall(VecWAXPY(Q2, -alpha, S2, R2)); /* q2 <- r2 - alpha s2 */
106 PetscCall(VecWAXPY(Y, -alpha, Z, W)); /* y <- w - alpha z */
107
108 PetscCall(VecDotBegin(Q, Y, &d1)); /* d1 <- (q,y) */
109 PetscCall(VecDotBegin(Y, Y, &d2)); /* d2 <- (y,y) */
110
111 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Q)));
112 PetscCall(KSP_PCApply(ksp, Z, Z2)); /* z2 <- K z */
113 PetscCall(KSP_MatMult(ksp, pc->mat, Z2, V)); /* v <- A z2 */
114
115 PetscCall(VecDotEnd(Q, Y, &d1));
116 PetscCall(VecDotEnd(Y, Y, &d2));
117
118 if (d2 == 0.0) {
119 /* y is 0. if q is 0, then alpha s == r, and hence alpha p may be our solution. Give it a try? */
120 PetscCall(VecDot(Q, Q, &d1));
121 if (d1 != 0.0) {
122 ksp->reason = KSP_DIVERGED_BREAKDOWN;
123 break;
124 }
125 PetscCall(VecAXPY(X, alpha, P2)); /* x <- x + alpha p2 */
126 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
127 ksp->its++;
128 ksp->rnorm = 0.0;
129 ksp->reason = KSP_CONVERGED_RTOL;
130 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
131 PetscCall(KSPLogResidualHistory(ksp, dp));
132 PetscCall(KSPMonitor(ksp, i + 1, 0.0));
133 break;
134 }
135 omega = d1 / d2; /* omega <- (y'q) / (y'y) */
136 PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, Q2)); /* x <- alpha * p2 + omega * q2 + x */
137 PetscCall(VecWAXPY(R, -omega, Y, Q)); /* r <- q - omega y */
138 PetscCall(VecWAXPY(R2, -alpha, Z2, W2)); /* r2 <- w2 - alpha z2 */
139 PetscCall(VecAYPX(R2, -omega, Q2)); /* r2 <- q2 - omega r2 */
140 PetscCall(VecWAXPY(W, -alpha, V, T)); /* w <- t - alpha v */
141 PetscCall(VecAYPX(W, -omega, Y)); /* w <- y - omega w */
142 rhoold = rho;
143
144 if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- norm(r) */
145 PetscCall(VecDotBegin(R, RP, &rho)); /* rho <- (r,rp) */
146 PetscCall(VecDotBegin(S, RP, &d1)); /* d1 <- (s,rp) */
147 PetscCall(VecDotBegin(W, RP, &d2)); /* d2 <- (w,rp) */
148 PetscCall(VecDotBegin(Z, RP, &d3)); /* d3 <- (z,rp) */
149
150 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
151 PetscCall(KSP_PCApply(ksp, W, W2)); /* w2 <- K w */
152 PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
153
154 if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) PetscCall(VecNormEnd(R, NORM_2, &dp));
155 PetscCall(VecDotEnd(R, RP, &rho));
156 PetscCall(VecDotEnd(S, RP, &d1));
157 PetscCall(VecDotEnd(W, RP, &d2));
158 PetscCall(VecDotEnd(Z, RP, &d3));
159
160 PetscCheck(d2 + beta * d1 - beta * omega * d3 != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Divide by zero");
161
162 beta = (rho / rhoold) * (alpha / omega);
163 alpha = rho / (d2 + beta * d1 - beta * omega * d3); /* alpha <- rho / (d2 + beta * d1 - beta * omega * d3) */
164
165 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
166 ksp->its++;
167
168 /* Residual replacement step */
169 if (i > 0 && i % 100 == 0 && i < 1001) {
170 PetscCall(KSP_MatMult(ksp, pc->mat, X, R));
171 PetscCall(VecAYPX(R, -1.0, B)); /* r <- b - Ax */
172 PetscCall(KSP_PCApply(ksp, R, R2)); /* r2 <- K r */
173 PetscCall(KSP_MatMult(ksp, pc->mat, R2, W)); /* w <- A r2 */
174 PetscCall(KSP_PCApply(ksp, W, W2)); /* w2 <- K w */
175 PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
176 PetscCall(KSP_MatMult(ksp, pc->mat, P2, S)); /* s <- A p2 */
177 PetscCall(KSP_PCApply(ksp, S, S2)); /* s2 <- K s */
178 PetscCall(KSP_MatMult(ksp, pc->mat, S2, Z)); /* z <- A s2 */
179 PetscCall(KSP_PCApply(ksp, Z, Z2)); /* z2 <- K z */
180 PetscCall(KSP_MatMult(ksp, pc->mat, Z2, V)); /* v <- A z2 */
181 }
182
183 ksp->rnorm = dp;
184 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
185 PetscCall(KSPLogResidualHistory(ksp, dp));
186 PetscCall(KSPMonitor(ksp, i + 1, dp));
187 PetscCall((*ksp->converged)(ksp, i + 1, dp, &ksp->reason, ksp->cnvP));
188 if (ksp->reason) break;
189 if (rho == 0.0) {
190 ksp->reason = KSP_DIVERGED_BREAKDOWN;
191 break;
192 }
193 i++;
194 } while (i < ksp->max_it);
195
196 if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
197 PetscFunctionReturn(PETSC_SUCCESS);
198 }
199
200 /*MC
201 KSPPIPEBCGS - Implements a pipelined BiCGStab method. [](sec_pipelineksp)
202
203 Level: intermediate
204
205 Notes:
206 This method has only two non-blocking reductions per iteration, compared to 3 blocking for standard `KSPFBCGS`. The
207 non-blocking reductions are overlapped by matrix-vector products and preconditioner applications.
208
209 Periodic residual replacement may be used to increase robustness and maximal attainable accuracy.
210
211 Like `KSPFBCGS`, the `KSPPIPEBCGS` implementation only allows for right preconditioning.
212
213 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
214 performance of pipelined methods. See [](doc_faq_pipelined)
215
216 Contributed by:
217 Siegfried Cools, Universiteit Antwerpen, {cite}`cools2017communication`
218 EXA2CT European Project on EXascale Algorithms and Advanced Computational Techniques, 2016.
219
220 .seealso: [](ch_ksp), `KSPFBCGS`, `KSPFBCGSR`, `KSPBCGS`, `KSPBCGSL`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPSetPCSide()`,
221 [](sec_pipelineksp), [](doc_faq_pipelined)
222 M*/
KSPCreate_PIPEBCGS(KSP ksp)223 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEBCGS(KSP ksp)
224 {
225 KSP_BCGS *bcgs;
226
227 PetscFunctionBegin;
228 PetscCall(PetscNew(&bcgs));
229
230 ksp->data = bcgs;
231 ksp->ops->setup = KSPSetUp_PIPEBCGS;
232 ksp->ops->solve = KSPSolve_PIPEBCGS;
233 ksp->ops->destroy = KSPDestroy_BCGS;
234 ksp->ops->reset = KSPReset_BCGS;
235 ksp->ops->buildresidual = KSPBuildResidualDefault;
236 ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
237
238 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
239 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
240 PetscFunctionReturn(PETSC_SUCCESS);
241 }
242