1 #include <petsc/private/kspimpl.h>
2 #include <petsc/private/vecimpl.h>
3
4 #define offset(j) PetscMax(((j) - (2 * l)), 0)
5 #define shift(i, j) ((i) - offset(j))
6 #define G(i, j) (plcg->G[((j) * (2 * l + 1)) + (shift((i), (j)))])
7 #define G_noshift(i, j) (plcg->G[((j) * (2 * l + 1)) + (i)])
8 #define alpha(i) (plcg->alpha[i])
9 #define gamma(i) (plcg->gamma[i])
10 #define delta(i) (plcg->delta[i])
11 #define sigma(i) (plcg->sigma[i])
12 #define req(i) (plcg->req[i])
13
14 typedef struct KSP_CG_PIPE_L_s KSP_CG_PIPE_L;
15 struct KSP_CG_PIPE_L_s {
16 PetscInt l; /* pipeline depth */
17 Vec *Z; /* Z vectors (shifted base) */
18 Vec *U; /* U vectors (unpreconditioned shifted base) */
19 Vec *V; /* V vectors (original base) */
20 Vec *Q; /* Q vectors (auxiliary bases) */
21 Vec p; /* work vector */
22 PetscScalar *G; /* such that Z = VG (band matrix)*/
23 PetscScalar *gamma, *delta, *alpha;
24 PetscReal lmin, lmax; /* min and max eigen values estimates to compute base shifts */
25 PetscReal *sigma; /* base shifts */
26 MPI_Request *req; /* request array for asynchronous global collective */
27 PetscBool show_rstrt; /* flag to show restart information in output (default: not shown) */
28 };
29
30 /*
31 KSPSetUp_PIPELCG - Sets up the workspace needed by the PIPELCG method.
32
33 This is called once, usually automatically by KSPSolve() or KSPSetUp()
34 but can be called directly by KSPSetUp()
35 */
KSPSetUp_PIPELCG(KSP ksp)36 static PetscErrorCode KSPSetUp_PIPELCG(KSP ksp)
37 {
38 KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
39 PetscInt l = plcg->l, max_it = ksp->max_it;
40 MPI_Comm comm;
41
42 PetscFunctionBegin;
43 comm = PetscObjectComm((PetscObject)ksp);
44 PetscCheck(max_it >= 1, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: max_it argument must be positive.", ((PetscObject)ksp)->type_name);
45 PetscCheck(l >= 1, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: pipel argument must be positive.", ((PetscObject)ksp)->type_name);
46 PetscCheck(l <= max_it, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: pipel argument must be less than max_it.", ((PetscObject)ksp)->type_name);
47
48 PetscCall(KSPSetWorkVecs(ksp, 1)); /* get work vectors needed by PIPELCG */
49 plcg->p = ksp->work[0];
50
51 PetscCall(VecDuplicateVecs(plcg->p, PetscMax(3, l + 1), &plcg->Z));
52 PetscCall(VecDuplicateVecs(plcg->p, 3, &plcg->U));
53 PetscCall(VecDuplicateVecs(plcg->p, 3, &plcg->V));
54 PetscCall(VecDuplicateVecs(plcg->p, 3 * (l - 1) + 1, &plcg->Q));
55 PetscCall(PetscCalloc1(2, &plcg->alpha));
56 PetscCall(PetscCalloc1(l, &plcg->sigma));
57 PetscFunctionReturn(PETSC_SUCCESS);
58 }
59
KSPReset_PIPELCG(KSP ksp)60 static PetscErrorCode KSPReset_PIPELCG(KSP ksp)
61 {
62 KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
63 PetscInt l = plcg->l;
64
65 PetscFunctionBegin;
66 PetscCall(PetscFree(plcg->sigma));
67 PetscCall(PetscFree(plcg->alpha));
68 PetscCall(VecDestroyVecs(PetscMax(3, l + 1), &plcg->Z));
69 PetscCall(VecDestroyVecs(3, &plcg->U));
70 PetscCall(VecDestroyVecs(3, &plcg->V));
71 PetscCall(VecDestroyVecs(3 * (l - 1) + 1, &plcg->Q));
72 PetscFunctionReturn(PETSC_SUCCESS);
73 }
74
KSPDestroy_PIPELCG(KSP ksp)75 static PetscErrorCode KSPDestroy_PIPELCG(KSP ksp)
76 {
77 PetscFunctionBegin;
78 PetscCall(KSPReset_PIPELCG(ksp));
79 PetscCall(KSPDestroyDefault(ksp));
80 PetscFunctionReturn(PETSC_SUCCESS);
81 }
82
KSPSetFromOptions_PIPELCG(KSP ksp,PetscOptionItems PetscOptionsObject)83 static PetscErrorCode KSPSetFromOptions_PIPELCG(KSP ksp, PetscOptionItems PetscOptionsObject)
84 {
85 KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
86 PetscBool flag = PETSC_FALSE;
87
88 PetscFunctionBegin;
89 PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPELCG options");
90 PetscCall(PetscOptionsInt("-ksp_pipelcg_pipel", "Pipeline length", "", plcg->l, &plcg->l, &flag));
91 if (!flag) plcg->l = 1;
92 PetscCall(PetscOptionsReal("-ksp_pipelcg_lmin", "Estimate for smallest eigenvalue", "", plcg->lmin, &plcg->lmin, &flag));
93 if (!flag) plcg->lmin = 0.0;
94 PetscCall(PetscOptionsReal("-ksp_pipelcg_lmax", "Estimate for largest eigenvalue", "", plcg->lmax, &plcg->lmax, &flag));
95 if (!flag) plcg->lmax = 0.0;
96 PetscCall(PetscOptionsBool("-ksp_pipelcg_monitor", "Output information on restarts when they occur? (default: 0)", "", plcg->show_rstrt, &plcg->show_rstrt, &flag));
97 if (!flag) plcg->show_rstrt = PETSC_FALSE;
98 PetscOptionsHeadEnd();
99 PetscFunctionReturn(PETSC_SUCCESS);
100 }
101
MPIU_Iallreduce(void * sendbuf,void * recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request * request)102 static PetscMPIInt MPIU_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
103 {
104 PetscMPIInt err;
105 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
106 err = MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request);
107 #else
108 err = MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
109 *request = MPI_REQUEST_NULL;
110 #endif
111 return err;
112 }
113
KSPView_PIPELCG(KSP ksp,PetscViewer viewer)114 static PetscErrorCode KSPView_PIPELCG(KSP ksp, PetscViewer viewer)
115 {
116 KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
117 PetscBool isascii = PETSC_FALSE, isstring = PETSC_FALSE;
118
119 PetscFunctionBegin;
120 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
121 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
122 if (isascii) {
123 PetscCall(PetscViewerASCIIPrintf(viewer, " Pipeline depth: %" PetscInt_FMT "\n", plcg->l));
124 PetscCall(PetscViewerASCIIPrintf(viewer, " Minimal eigenvalue estimate %g\n", (double)plcg->lmin));
125 PetscCall(PetscViewerASCIIPrintf(viewer, " Maximal eigenvalue estimate %g\n", (double)plcg->lmax));
126 } else if (isstring) {
127 PetscCall(PetscViewerStringSPrintf(viewer, " Pipeline depth: %" PetscInt_FMT "\n", plcg->l));
128 PetscCall(PetscViewerStringSPrintf(viewer, " Minimal eigenvalue estimate %g\n", (double)plcg->lmin));
129 PetscCall(PetscViewerStringSPrintf(viewer, " Maximal eigenvalue estimate %g\n", (double)plcg->lmax));
130 }
131 PetscFunctionReturn(PETSC_SUCCESS);
132 }
133
KSPSolve_InnerLoop_PIPELCG(KSP ksp)134 static PetscErrorCode KSPSolve_InnerLoop_PIPELCG(KSP ksp)
135 {
136 KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
137 Mat A = NULL, Pmat = NULL;
138 PetscInt it = 0, max_it = ksp->max_it, l = plcg->l, i = 0, j = 0, k = 0;
139 PetscInt start = 0, middle = 0, end = 0;
140 Vec *Z = plcg->Z, *U = plcg->U, *V = plcg->V, *Q = plcg->Q;
141 Vec x = NULL, p = NULL, temp = NULL;
142 PetscScalar sum_dummy = 0.0, eta = 0.0, zeta = 0.0, lambda = 0.0;
143 PetscReal dp = 0.0, tmp = 0.0, beta = 0.0, invbeta2 = 0.0;
144 MPI_Comm comm;
145 PetscMPIInt mpin;
146
147 PetscFunctionBegin;
148 x = ksp->vec_sol;
149 p = plcg->p;
150
151 comm = PetscObjectComm((PetscObject)ksp);
152 PetscCall(PCGetOperators(ksp->pc, &A, &Pmat));
153
154 for (it = 0; it < max_it + l; ++it) {
155 /* Multiplication z_{it+1} = Az_{it} */
156 /* Shift the U vector pointers */
157 temp = U[2];
158 for (i = 2; i > 0; i--) U[i] = U[i - 1];
159 U[0] = temp;
160 if (it < l) {
161 /* SpMV and Sigma-shift and Prec */
162 PetscCall(MatMult(A, Z[l - it], U[0]));
163 PetscCall(VecAXPY(U[0], -sigma(it), U[1]));
164 PetscCall(KSP_PCApply(ksp, U[0], Z[l - it - 1]));
165 if (it < l - 1) PetscCall(VecCopy(Z[l - it - 1], Q[3 * it]));
166 } else {
167 /* Shift the Z vector pointers */
168 temp = Z[PetscMax(l, 2)];
169 for (i = PetscMax(l, 2); i > 0; --i) Z[i] = Z[i - 1];
170 Z[0] = temp;
171 /* SpMV and Prec */
172 PetscCall(MatMult(A, Z[1], U[0]));
173 PetscCall(KSP_PCApply(ksp, U[0], Z[0]));
174 }
175
176 /* Adjust the G matrix */
177 if (it >= l) {
178 if (it == l) {
179 /* MPI_Wait for G(0,0),scale V0 and Z and U and Q vectors with 1/beta */
180 PetscCallMPI(MPI_Wait(&req(0), MPI_STATUS_IGNORE));
181 beta = PetscSqrtReal(PetscRealPart(G(0, 0)));
182 G(0, 0) = 1.0;
183 PetscCall(VecAXPY(V[0], 1.0 / beta, p)); /* this assumes V[0] to be zero initially */
184 for (j = 0; j <= PetscMax(l, 2); ++j) PetscCall(VecScale(Z[j], 1.0 / beta));
185 for (j = 0; j <= 2; ++j) PetscCall(VecScale(U[j], 1.0 / beta));
186 for (j = 0; j < l - 1; ++j) PetscCall(VecScale(Q[3 * j], 1.0 / beta));
187 }
188
189 /* MPI_Wait until the dot products,started l iterations ago,are completed */
190 PetscCallMPI(MPI_Wait(&req(it - l + 1), MPI_STATUS_IGNORE));
191 if (it >= 2 * l) {
192 for (j = PetscMax(0, it - 3 * l + 1); j <= it - 2 * l; j++) G(j, it - l + 1) = G(it - 2 * l + 1, j + l); /* exploit symmetry in G matrix */
193 }
194
195 if (it <= 2 * l - 1) {
196 invbeta2 = 1.0 / (beta * beta);
197 /* Scale columns 1 up to l of G with 1/beta^2 */
198 for (j = PetscMax(it - 3 * l + 1, 0); j <= it - l + 1; ++j) G(j, it - l + 1) *= invbeta2;
199 }
200
201 for (j = PetscMax(it - 2 * l + 2, 0); j <= it - l; ++j) {
202 sum_dummy = 0.0;
203 for (k = PetscMax(it - 3 * l + 1, 0); k <= j - 1; ++k) sum_dummy = sum_dummy + G(k, j) * G(k, it - l + 1);
204 G(j, it - l + 1) = (G(j, it - l + 1) - sum_dummy) / G(j, j);
205 }
206
207 sum_dummy = 0.0;
208 for (k = PetscMax(it - 3 * l + 1, 0); k <= it - l; ++k) sum_dummy = sum_dummy + G(k, it - l + 1) * G(k, it - l + 1);
209
210 tmp = PetscRealPart(G(it - l + 1, it - l + 1) - sum_dummy);
211 /* Breakdown check */
212 if (tmp < 0) {
213 if (plcg->show_rstrt) PetscCall(PetscPrintf(comm, "Sqrt breakdown in iteration %" PetscInt_FMT ": sqrt argument is %e. Iteration was restarted.\n", ksp->its + 1, (double)tmp));
214 /* End hanging dot-products in the pipeline before exiting for-loop */
215 start = it - l + 2;
216 end = PetscMin(it + 1, max_it + 1); /* !warning! 'it' can actually be greater than 'max_it' */
217 for (i = start; i < end; ++i) PetscCallMPI(MPI_Wait(&req(i), MPI_STATUS_IGNORE));
218 break;
219 }
220 G(it - l + 1, it - l + 1) = PetscSqrtReal(tmp);
221
222 if (it < 2 * l) {
223 if (it == l) {
224 gamma(it - l) = (G(it - l, it - l + 1) + sigma(it - l) * G(it - l, it - l)) / G(it - l, it - l);
225 } else {
226 gamma(it - l) = (G(it - l, it - l + 1) + sigma(it - l) * G(it - l, it - l) - delta(it - l - 1) * G(it - l - 1, it - l)) / G(it - l, it - l);
227 }
228 delta(it - l) = G(it - l + 1, it - l + 1) / G(it - l, it - l);
229 } else {
230 gamma(it - l) = (G(it - l, it - l) * gamma(it - 2 * l) + G(it - l, it - l + 1) * delta(it - 2 * l) - G(it - l - 1, it - l) * delta(it - l - 1)) / G(it - l, it - l);
231 delta(it - l) = (G(it - l + 1, it - l + 1) * delta(it - 2 * l)) / G(it - l, it - l);
232 }
233
234 /* Recursively compute the next V, Q, Z and U vectors */
235 /* Shift the V vector pointers */
236 temp = V[2];
237 for (i = 2; i > 0; i--) V[i] = V[i - 1];
238 V[0] = temp;
239
240 /* Recurrence V vectors */
241 if (l == 1) {
242 PetscCall(VecCopy(Z[1], V[0]));
243 } else {
244 PetscCall(VecCopy(Q[0], V[0]));
245 }
246 if (it == l) {
247 PetscCall(VecAXPY(V[0], sigma(0) - gamma(it - l), V[1]));
248 } else {
249 alpha(0) = sigma(0) - gamma(it - l);
250 alpha(1) = -delta(it - l - 1);
251 PetscCall(VecMAXPY(V[0], 2, &alpha(0), &V[1]));
252 }
253 PetscCall(VecScale(V[0], 1.0 / delta(it - l)));
254
255 /* Recurrence Q vectors */
256 for (j = 0; j < l - 1; ++j) {
257 /* Shift the Q vector pointers */
258 temp = Q[3 * j + 2];
259 for (i = 2; i > 0; i--) Q[3 * j + i] = Q[3 * j + i - 1];
260 Q[3 * j] = temp;
261
262 if (j < l - 2) {
263 PetscCall(VecCopy(Q[3 * (j + 1)], Q[3 * j]));
264 } else {
265 PetscCall(VecCopy(Z[1], Q[3 * j]));
266 }
267 if (it == l) {
268 PetscCall(VecAXPY(Q[3 * j], sigma(j + 1) - gamma(it - l), Q[3 * j + 1]));
269 } else {
270 alpha(0) = sigma(j + 1) - gamma(it - l);
271 alpha(1) = -delta(it - l - 1);
272 PetscCall(VecMAXPY(Q[3 * j], 2, &alpha(0), &Q[3 * j + 1]));
273 }
274 PetscCall(VecScale(Q[3 * j], 1.0 / delta(it - l)));
275 }
276
277 /* Recurrence Z and U vectors */
278 if (it == l) {
279 PetscCall(VecAXPY(Z[0], -gamma(it - l), Z[1]));
280 PetscCall(VecAXPY(U[0], -gamma(it - l), U[1]));
281 } else {
282 alpha(0) = -gamma(it - l);
283 alpha(1) = -delta(it - l - 1);
284 PetscCall(VecMAXPY(Z[0], 2, &alpha(0), &Z[1]));
285 PetscCall(VecMAXPY(U[0], 2, &alpha(0), &U[1]));
286 }
287 PetscCall(VecScale(Z[0], 1.0 / delta(it - l)));
288 PetscCall(VecScale(U[0], 1.0 / delta(it - l)));
289 }
290
291 /* Compute and communicate the dot products */
292 if (it < l) {
293 for (j = 0; j < it + 2; ++j) PetscCall((*U[0]->ops->dot_local)(U[0], Z[l - j], &G(j, it + 1))); /* dot-products (U[0],Z[j]) */
294 PetscCall(PetscMPIIntCast(it + 2, &mpin));
295 PetscCallMPI(MPIU_Iallreduce(MPI_IN_PLACE, &G(0, it + 1), mpin, MPIU_SCALAR, MPIU_SUM, comm, &req(it + 1)));
296 } else if ((it >= l) && (it < max_it)) {
297 middle = it - l + 2;
298 end = it + 2;
299 PetscCall((*U[0]->ops->dot_local)(U[0], V[0], &G(it - l + 1, it + 1))); /* dot-product (U[0],V[0]) */
300 for (j = middle; j < end; ++j) PetscCall((*U[0]->ops->dot_local)(U[0], plcg->Z[it + 1 - j], &G(j, it + 1))); /* dot-products (U[0],Z[j]) */
301 PetscCall(PetscMPIIntCast(l + 1, &mpin));
302 PetscCallMPI(MPIU_Iallreduce(MPI_IN_PLACE, &G(it - l + 1, it + 1), mpin, MPIU_SCALAR, MPIU_SUM, comm, &req(it + 1)));
303 }
304
305 /* Compute solution vector and residual norm */
306 if (it >= l) {
307 if (it == l) {
308 if (ksp->its != 0) ++ksp->its;
309 eta = gamma(0);
310 zeta = beta;
311 PetscCall(VecCopy(V[1], p));
312 PetscCall(VecScale(p, 1.0 / eta));
313 PetscCall(VecAXPY(x, zeta, p));
314 dp = beta;
315 } else if (it > l) {
316 k = it - l;
317 ++ksp->its;
318 lambda = delta(k - 1) / eta;
319 eta = gamma(k) - lambda * delta(k - 1);
320 zeta = -lambda * zeta;
321 PetscCall(VecScale(p, -delta(k - 1) / eta));
322 PetscCall(VecAXPY(p, 1.0 / eta, V[1]));
323 PetscCall(VecAXPY(x, zeta, p));
324 dp = PetscAbsScalar(zeta);
325 }
326 ksp->rnorm = dp;
327 PetscCall(KSPLogResidualHistory(ksp, dp));
328 PetscCall(KSPMonitor(ksp, ksp->its, dp));
329 PetscCall((*ksp->converged)(ksp, ksp->its, dp, &ksp->reason, ksp->cnvP));
330
331 if (ksp->its >= max_it && !ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
332 if (ksp->reason) {
333 /* End hanging dot-products in the pipeline before exiting for-loop */
334 start = it - l + 2;
335 end = PetscMin(it + 2, max_it + 1); /* !warning! 'it' can actually be greater than 'max_it' */
336 for (i = start; i < end; ++i) PetscCallMPI(MPI_Wait(&req(i), MPI_STATUS_IGNORE));
337 break;
338 }
339 }
340 } /* End inner for loop */
341 PetscFunctionReturn(PETSC_SUCCESS);
342 }
343
KSPSolve_ReInitData_PIPELCG(KSP ksp)344 static PetscErrorCode KSPSolve_ReInitData_PIPELCG(KSP ksp)
345 {
346 KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
347 PetscInt i = 0, j = 0, l = plcg->l, max_it = ksp->max_it;
348
349 PetscFunctionBegin;
350 for (i = 0; i < PetscMax(3, l + 1); ++i) PetscCall(VecSet(plcg->Z[i], 0.0));
351 for (i = 1; i < 3; ++i) PetscCall(VecSet(plcg->U[i], 0.0));
352 for (i = 0; i < 3; ++i) PetscCall(VecSet(plcg->V[i], 0.0));
353 for (i = 0; i < 3 * (l - 1) + 1; ++i) PetscCall(VecSet(plcg->Q[i], 0.0));
354 for (j = 0; j < (max_it + 1); ++j) {
355 gamma(j) = 0.0;
356 delta(j) = 0.0;
357 for (i = 0; i < (2 * l + 1); ++i) G_noshift(i, j) = 0.0;
358 }
359 PetscFunctionReturn(PETSC_SUCCESS);
360 }
361
362 /*
363 KSPSolve_PIPELCG - This routine actually applies the pipelined(l) conjugate gradient method
364 */
KSPSolve_PIPELCG(KSP ksp)365 static PetscErrorCode KSPSolve_PIPELCG(KSP ksp)
366 {
367 KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
368 Mat A = NULL, Pmat = NULL;
369 Vec b = NULL, x = NULL, p = NULL;
370 PetscInt max_it = ksp->max_it, l = plcg->l;
371 PetscInt i = 0, outer_it = 0, curr_guess_zero = 0;
372 PetscReal lmin = plcg->lmin, lmax = plcg->lmax;
373 PetscBool diagonalscale = PETSC_FALSE;
374 MPI_Comm comm;
375
376 PetscFunctionBegin;
377 comm = PetscObjectComm((PetscObject)ksp);
378 PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
379 PetscCheck(!diagonalscale, comm, PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
380
381 x = ksp->vec_sol;
382 b = ksp->vec_rhs;
383 p = plcg->p;
384
385 PetscCall(PetscCalloc1((max_it + 1) * (2 * l + 1), &plcg->G));
386 PetscCall(PetscCalloc1(max_it + 1, &plcg->gamma));
387 PetscCall(PetscCalloc1(max_it + 1, &plcg->delta));
388 PetscCall(PetscCalloc1(max_it + 1, &plcg->req));
389
390 PetscCall(PCGetOperators(ksp->pc, &A, &Pmat));
391
392 for (i = 0; i < l; ++i) sigma(i) = (0.5 * (lmin + lmax) + (0.5 * (lmax - lmin) * PetscCosReal(PETSC_PI * (2.0 * i + 1.0) / (2.0 * l))));
393
394 ksp->its = 0;
395 outer_it = 0;
396 curr_guess_zero = !!ksp->guess_zero;
397
398 while (ksp->its < max_it) { /* OUTER LOOP (gmres-like restart to handle breakdowns) */
399 /* RESTART LOOP */
400 if (!curr_guess_zero) {
401 PetscCall(KSP_MatMult(ksp, A, x, plcg->U[0])); /* u <- b - Ax */
402 PetscCall(VecAYPX(plcg->U[0], -1.0, b));
403 } else {
404 PetscCall(VecCopy(b, plcg->U[0])); /* u <- b (x is 0) */
405 }
406 PetscCall(KSP_PCApply(ksp, plcg->U[0], p)); /* p <- Bu */
407
408 if (outer_it > 0) {
409 /* Re-initialize Z,U,V,Q,gamma,delta,G after restart occurred */
410 PetscCall(KSPSolve_ReInitData_PIPELCG(ksp));
411 }
412
413 PetscCall((*plcg->U[0]->ops->dot_local)(plcg->U[0], p, &G(0, 0)));
414 PetscCallMPI(MPIU_Iallreduce(MPI_IN_PLACE, &G(0, 0), 1, MPIU_SCALAR, MPIU_SUM, comm, &req(0)));
415 PetscCall(VecCopy(p, plcg->Z[l]));
416
417 PetscCall(KSPSolve_InnerLoop_PIPELCG(ksp));
418
419 if (ksp->reason) break; /* convergence or divergence */
420 ++outer_it;
421 curr_guess_zero = 0;
422 }
423
424 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
425 PetscCall(PetscFree(plcg->G));
426 PetscCall(PetscFree(plcg->gamma));
427 PetscCall(PetscFree(plcg->delta));
428 PetscCall(PetscFree(plcg->req));
429 PetscFunctionReturn(PETSC_SUCCESS);
430 }
431
432 /*MC
433 KSPPIPELCG - Deep pipelined (length l) Conjugate Gradient method {cite}`cornelis2018communication` and {cite}`cools2019numerically`.
434 This method has only a single non-blocking global
435 reduction per iteration, compared to 2 blocking reductions for standard `KSPCG`. The reduction is overlapped by the
436 matrix-vector product and preconditioner application of the next l iterations. The pipeline length l is a parameter
437 of the method. [](sec_pipelineksp)
438
439 Options Database Keys:
440 + -ksp_pipelcg_pipel - pipelined length
441 . -ksp_pipelcg_lmin - approximation to the smallest eigenvalue of the preconditioned operator (default: 0.0)
442 . -ksp_pipelcg_lmax - approximation to the largest eigenvalue of the preconditioned operator (default: 0.0)
443 - -ksp_pipelcg_monitor - output where/why the method restarts when a sqrt breakdown occurs
444
445 Example usage:
446 .vb
447 KSP tutorials ex2, no preconditioner, pipel = 2, lmin = 0.0, lmax = 8.0 :
448 $mpiexec -n 14 ./ex2 -m 1000 -n 1000 -ksp_type pipelcg -pc_type none -ksp_norm_type natural
449 -ksp_rtol 1e-10 -ksp_max_it 1000 -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 8.0 -log_view
450 SNES tutorials ex48, bjacobi preconditioner, pipel = 3, lmin = 0.0, lmax = 2.0, show restart information :
451 $mpiexec -n 14 ./ex48 -M 150 -P 100 -ksp_type pipelcg -pc_type bjacobi -ksp_rtol 1e-10 -ksp_pipelcg_pipel 3
452 -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 2.0 -ksp_pipelcg_monitor -log_view
453 .ve
454
455 Level: advanced
456
457 Notes:
458 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
459 performance of pipelined methods. See [](doc_faq_pipelined)
460
461 Contributed by:
462 Siegfried Cools, University of Antwerp, Dept. Mathematics and Computer Science,
463 funded by Flemish Research Foundation (FWO) grant number 12H4617N.
464
465 .seealso: [](ch_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSPCG`, `KSPPIPECG`, `KSPPIPECGRR`, `KSPPGMRES`,
466 `KSPPIPEBCGS`, `KSPSetPCSide()`, `KSPGROPPCG`
467 M*/
KSPCreate_PIPELCG(KSP ksp)468 PETSC_EXTERN PetscErrorCode KSPCreate_PIPELCG(KSP ksp)
469 {
470 KSP_CG_PIPE_L *plcg = NULL;
471
472 PetscFunctionBegin;
473 PetscCall(PetscNew(&plcg));
474 ksp->data = (void *)plcg;
475
476 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
477 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
478
479 ksp->ops->setup = KSPSetUp_PIPELCG;
480 ksp->ops->solve = KSPSolve_PIPELCG;
481 ksp->ops->reset = KSPReset_PIPELCG;
482 ksp->ops->destroy = KSPDestroy_PIPELCG;
483 ksp->ops->view = KSPView_PIPELCG;
484 ksp->ops->setfromoptions = KSPSetFromOptions_PIPELCG;
485 ksp->ops->buildsolution = KSPBuildSolutionDefault;
486 ksp->ops->buildresidual = KSPBuildResidualDefault;
487 PetscFunctionReturn(PETSC_SUCCESS);
488 }
489