xref: /petsc/src/ksp/ksp/impls/bcgs/pipebcgs/pipebcgs.c (revision 834855d6effb0d027771461c8e947ee1ce5a1e17)
1 /*
2     This file implements pipelined BiCGStab (pipe-BiCGStab).
3 */
4 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I  "petscksp.h"  I*/
5 
KSPSetUp_PIPEBCGS(KSP ksp)6 static PetscErrorCode KSPSetUp_PIPEBCGS(KSP ksp)
7 {
8   PetscFunctionBegin;
9   PetscCall(KSPSetWorkVecs(ksp, 15));
10   PetscFunctionReturn(PETSC_SUCCESS);
11 }
12 
13 /* Only need a few hacks from KSPSolve_BCGS */
14 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
KSPSolve_PIPEBCGS(KSP ksp)15 static PetscErrorCode KSPSolve_PIPEBCGS(KSP ksp)
16 {
17   PetscInt    i;
18   PetscScalar rho, rhoold, alpha, beta, omega = 0.0, d1, d2, d3;
19   Vec         X, B, S, R, RP, Y, Q, P2, Q2, R2, S2, W, Z, W2, Z2, T, V;
20   PetscReal   dp   = 0.0;
21   KSP_BCGS   *bcgs = (KSP_BCGS *)ksp->data;
22   PC          pc;
23 
24   PetscFunctionBegin;
25   X  = ksp->vec_sol;
26   B  = ksp->vec_rhs;
27   R  = ksp->work[0];
28   RP = ksp->work[1];
29   S  = ksp->work[2];
30   Y  = ksp->work[3];
31   Q  = ksp->work[4];
32   Q2 = ksp->work[5];
33   P2 = ksp->work[6];
34   R2 = ksp->work[7];
35   S2 = ksp->work[8];
36   W  = ksp->work[9];
37   Z  = ksp->work[10];
38   W2 = ksp->work[11];
39   Z2 = ksp->work[12];
40   T  = ksp->work[13];
41   V  = ksp->work[14];
42 
43   if (!ksp->guess_zero) {
44     if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess));
45     PetscCall(VecCopy(X, bcgs->guess));
46   } else {
47     PetscCall(VecSet(X, 0.0));
48   }
49 
50   /* Compute initial residual */
51   PetscCall(KSPGetPC(ksp, &pc));
52   if (!ksp->guess_zero) {
53     PetscCall(KSP_MatMult(ksp, pc->mat, X, Q2));
54     PetscCall(VecCopy(B, R));
55     PetscCall(VecAXPY(R, -1.0, Q2));
56   } else {
57     PetscCall(VecCopy(B, R));
58   }
59 
60   /* Test for nothing to do */
61   if (ksp->normtype != KSP_NORM_NONE) PetscCall(VecNorm(R, NORM_2, &dp));
62   else dp = 0.0;
63   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
64   ksp->its   = 0;
65   ksp->rnorm = dp;
66   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
67   PetscCall(KSPLogResidualHistory(ksp, dp));
68   PetscCall(KSPMonitor(ksp, 0, dp));
69   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP));
70   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
71 
72   /* Initialize */
73   PetscCall(VecCopy(R, RP)); /* rp <- r */
74 
75   PetscCall(VecDotBegin(R, RP, &rho)); /* rho <- (r,rp) */
76   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
77   PetscCall(KSP_PCApply(ksp, R, R2));          /* r2 <- K r */
78   PetscCall(KSP_MatMult(ksp, pc->mat, R2, W)); /* w <- A r2 */
79   PetscCall(VecDotEnd(R, RP, &rho));
80 
81   PetscCall(VecDotBegin(W, RP, &d2)); /* d2 <- (w,rp) */
82   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)W)));
83   PetscCall(KSP_PCApply(ksp, W, W2));          /* w2 <- K w */
84   PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
85   PetscCall(VecDotEnd(W, RP, &d2));
86 
87   alpha = rho / d2;
88   beta  = 0.0;
89 
90   /* Main loop */
91   i = 0;
92   do {
93     if (i == 0) {
94       PetscCall(VecCopy(R2, P2)); /* p2 <- r2 */
95       PetscCall(VecCopy(W, S));   /* s  <- w  */
96       PetscCall(VecCopy(W2, S2)); /* s2 <- w2 */
97       PetscCall(VecCopy(T, Z));   /* z  <- t  */
98     } else {
99       PetscCall(VecAXPBYPCZ(P2, 1.0, -beta * omega, beta, R2, S2)); /* p2 <- beta * p2 + r2 - beta * omega * s2 */
100       PetscCall(VecAXPBYPCZ(S, 1.0, -beta * omega, beta, W, Z));    /* s  <- beta * s  + w  - beta * omega * z  */
101       PetscCall(VecAXPBYPCZ(S2, 1.0, -beta * omega, beta, W2, Z2)); /* s2 <- beta * s2 + w2 - beta * omega * z2 */
102       PetscCall(VecAXPBYPCZ(Z, 1.0, -beta * omega, beta, T, V));    /* z  <- beta * z  + t  - beta * omega * v  */
103     }
104     PetscCall(VecWAXPY(Q, -alpha, S, R));    /* q  <- r  - alpha s  */
105     PetscCall(VecWAXPY(Q2, -alpha, S2, R2)); /* q2 <- r2 - alpha s2 */
106     PetscCall(VecWAXPY(Y, -alpha, Z, W));    /* y  <- w  - alpha z  */
107 
108     PetscCall(VecDotBegin(Q, Y, &d1)); /* d1 <- (q,y) */
109     PetscCall(VecDotBegin(Y, Y, &d2)); /* d2 <- (y,y) */
110 
111     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Q)));
112     PetscCall(KSP_PCApply(ksp, Z, Z2));          /* z2 <- K z */
113     PetscCall(KSP_MatMult(ksp, pc->mat, Z2, V)); /* v <- A z2 */
114 
115     PetscCall(VecDotEnd(Q, Y, &d1));
116     PetscCall(VecDotEnd(Y, Y, &d2));
117 
118     if (d2 == 0.0) {
119       /* y is 0. if q is 0, then alpha s == r, and hence alpha p may be our solution. Give it a try? */
120       PetscCall(VecDot(Q, Q, &d1));
121       if (d1 != 0.0) {
122         ksp->reason = KSP_DIVERGED_BREAKDOWN;
123         break;
124       }
125       PetscCall(VecAXPY(X, alpha, P2)); /* x <- x + alpha p2 */
126       PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
127       ksp->its++;
128       ksp->rnorm  = 0.0;
129       ksp->reason = KSP_CONVERGED_RTOL;
130       PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
131       PetscCall(KSPLogResidualHistory(ksp, dp));
132       PetscCall(KSPMonitor(ksp, i + 1, 0.0));
133       break;
134     }
135     omega = d1 / d2;                                      /* omega <- (y'q) / (y'y) */
136     PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, Q2)); /* x <- alpha * p2 + omega * q2 + x */
137     PetscCall(VecWAXPY(R, -omega, Y, Q));                 /* r <- q - omega y */
138     PetscCall(VecWAXPY(R2, -alpha, Z2, W2));              /* r2 <- w2 - alpha z2 */
139     PetscCall(VecAYPX(R2, -omega, Q2));                   /* r2 <- q2 - omega r2 */
140     PetscCall(VecWAXPY(W, -alpha, V, T));                 /* w <- t - alpha v */
141     PetscCall(VecAYPX(W, -omega, Y));                     /* w <- y - omega w */
142     rhoold = rho;
143 
144     if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- norm(r) */
145     PetscCall(VecDotBegin(R, RP, &rho));                                                                 /* rho <- (r,rp) */
146     PetscCall(VecDotBegin(S, RP, &d1));                                                                  /* d1 <- (s,rp) */
147     PetscCall(VecDotBegin(W, RP, &d2));                                                                  /* d2 <- (w,rp) */
148     PetscCall(VecDotBegin(Z, RP, &d3));                                                                  /* d3 <- (z,rp) */
149 
150     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
151     PetscCall(KSP_PCApply(ksp, W, W2));          /* w2 <- K w */
152     PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
153 
154     if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) PetscCall(VecNormEnd(R, NORM_2, &dp));
155     PetscCall(VecDotEnd(R, RP, &rho));
156     PetscCall(VecDotEnd(S, RP, &d1));
157     PetscCall(VecDotEnd(W, RP, &d2));
158     PetscCall(VecDotEnd(Z, RP, &d3));
159 
160     PetscCheck(d2 + beta * d1 - beta * omega * d3 != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Divide by zero");
161 
162     beta  = (rho / rhoold) * (alpha / omega);
163     alpha = rho / (d2 + beta * d1 - beta * omega * d3); /* alpha <- rho / (d2 + beta * d1 - beta * omega * d3) */
164 
165     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
166     ksp->its++;
167 
168     /* Residual replacement step  */
169     if (i > 0 && i % 100 == 0 && i < 1001) {
170       PetscCall(KSP_MatMult(ksp, pc->mat, X, R));
171       PetscCall(VecAYPX(R, -1.0, B));              /* r  <- b - Ax */
172       PetscCall(KSP_PCApply(ksp, R, R2));          /* r2 <- K r */
173       PetscCall(KSP_MatMult(ksp, pc->mat, R2, W)); /* w  <- A r2 */
174       PetscCall(KSP_PCApply(ksp, W, W2));          /* w2 <- K w */
175       PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t  <- A w2 */
176       PetscCall(KSP_MatMult(ksp, pc->mat, P2, S)); /* s  <- A p2 */
177       PetscCall(KSP_PCApply(ksp, S, S2));          /* s2 <- K s */
178       PetscCall(KSP_MatMult(ksp, pc->mat, S2, Z)); /* z  <- A s2 */
179       PetscCall(KSP_PCApply(ksp, Z, Z2));          /* z2 <- K z */
180       PetscCall(KSP_MatMult(ksp, pc->mat, Z2, V)); /* v  <- A z2 */
181     }
182 
183     ksp->rnorm = dp;
184     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
185     PetscCall(KSPLogResidualHistory(ksp, dp));
186     PetscCall(KSPMonitor(ksp, i + 1, dp));
187     PetscCall((*ksp->converged)(ksp, i + 1, dp, &ksp->reason, ksp->cnvP));
188     if (ksp->reason) break;
189     if (rho == 0.0) {
190       ksp->reason = KSP_DIVERGED_BREAKDOWN;
191       break;
192     }
193     i++;
194   } while (i < ksp->max_it);
195 
196   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 /*MC
201     KSPPIPEBCGS - Implements a pipelined BiCGStab method. [](sec_pipelineksp)
202 
203     Level: intermediate
204 
205     Notes:
206     This method has only two non-blocking reductions per iteration, compared to 3 blocking for standard `KSPFBCGS`.  The
207     non-blocking reductions are overlapped by matrix-vector products and preconditioner applications.
208 
209     Periodic residual replacement may be used to increase robustness and maximal attainable accuracy.
210 
211     Like `KSPFBCGS`, the `KSPPIPEBCGS` implementation only allows for right preconditioning.
212 
213     MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
214     performance of pipelined methods. See [](doc_faq_pipelined)
215 
216     Contributed by:
217     Siegfried Cools, Universiteit Antwerpen, {cite}`cools2017communication`
218     EXA2CT European Project on EXascale Algorithms and Advanced Computational Techniques, 2016.
219 
220 .seealso: [](ch_ksp), `KSPFBCGS`, `KSPFBCGSR`, `KSPBCGS`, `KSPBCGSL`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPSetPCSide()`,
221            [](sec_pipelineksp), [](doc_faq_pipelined)
222 M*/
KSPCreate_PIPEBCGS(KSP ksp)223 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEBCGS(KSP ksp)
224 {
225   KSP_BCGS *bcgs;
226 
227   PetscFunctionBegin;
228   PetscCall(PetscNew(&bcgs));
229 
230   ksp->data                = bcgs;
231   ksp->ops->setup          = KSPSetUp_PIPEBCGS;
232   ksp->ops->solve          = KSPSolve_PIPEBCGS;
233   ksp->ops->destroy        = KSPDestroy_BCGS;
234   ksp->ops->reset          = KSPReset_BCGS;
235   ksp->ops->buildresidual  = KSPBuildResidualDefault;
236   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
237 
238   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
239   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242