1 #include <petsc/private/kspimpl.h>
2
3 /*
4 KSPSetUp_PIPECG - Sets up the workspace needed by the PIPECG method.
5
6 This is called once, usually automatically by KSPSolve() or KSPSetUp()
7 but can be called directly by KSPSetUp()
8 */
KSPSetUp_PIPECG(KSP ksp)9 static PetscErrorCode KSPSetUp_PIPECG(KSP ksp)
10 {
11 PetscFunctionBegin;
12 /* get work vectors needed by PIPECG */
13 PetscCall(KSPSetWorkVecs(ksp, 9));
14 PetscFunctionReturn(PETSC_SUCCESS);
15 }
16
17 /*
18 KSPSolve_PIPECG - This routine actually applies the pipelined conjugate gradient method
19 */
KSPSolve_PIPECG(KSP ksp)20 static PetscErrorCode KSPSolve_PIPECG(KSP ksp)
21 {
22 PetscInt i;
23 PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0;
24 PetscReal dp = 0.0;
25 Vec X, B, Z, P, W, Q, U, M, N, R, S;
26 Mat Amat, Pmat;
27 PetscBool diagonalscale;
28
29 PetscFunctionBegin;
30 PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
31 PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
32
33 X = ksp->vec_sol;
34 B = ksp->vec_rhs;
35 R = ksp->work[0];
36 Z = ksp->work[1];
37 P = ksp->work[2];
38 N = ksp->work[3];
39 W = ksp->work[4];
40 Q = ksp->work[5];
41 U = ksp->work[6];
42 M = ksp->work[7];
43 S = ksp->work[8];
44
45 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
46
47 ksp->its = 0;
48 if (!ksp->guess_zero) {
49 PetscCall(KSP_MatMult(ksp, Amat, X, R)); /* r <- b - Ax */
50 PetscCall(VecAYPX(R, -1.0, B));
51 } else {
52 PetscCall(VecCopy(B, R)); /* r <- b (x is 0) */
53 }
54
55 PetscCall(KSP_PCApply(ksp, R, U)); /* u <- Br */
56
57 switch (ksp->normtype) {
58 case KSP_NORM_PRECONDITIONED:
59 PetscCall(VecNormBegin(U, NORM_2, &dp)); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
60 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
61 PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
62 PetscCall(VecNormEnd(U, NORM_2, &dp));
63 break;
64 case KSP_NORM_UNPRECONDITIONED:
65 PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- r'*r = e'*A'*A*e */
66 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
67 PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
68 PetscCall(VecNormEnd(R, NORM_2, &dp));
69 break;
70 case KSP_NORM_NATURAL:
71 PetscCall(VecDotBegin(R, U, &gamma)); /* gamma <- u'*r */
72 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
73 PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
74 PetscCall(VecDotEnd(R, U, &gamma));
75 KSPCheckDot(ksp, gamma);
76 dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
77 break;
78 case KSP_NORM_NONE:
79 PetscCall(KSP_MatMult(ksp, Amat, U, W));
80 dp = 0.0;
81 break;
82 default:
83 SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
84 }
85 PetscCall(KSPLogResidualHistory(ksp, dp));
86 PetscCall(KSPMonitor(ksp, 0, dp));
87 ksp->rnorm = dp;
88 PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /* test for convergence */
89 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
90
91 i = 0;
92 do {
93 if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
94 PetscCall(VecNormBegin(R, NORM_2, &dp));
95 } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
96 PetscCall(VecNormBegin(U, NORM_2, &dp));
97 }
98 if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
99 PetscCall(VecDotBegin(W, U, &delta));
100 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
101
102 PetscCall(KSP_PCApply(ksp, W, M)); /* m <- Bw */
103 PetscCall(KSP_MatMult(ksp, Amat, M, N)); /* n <- Am */
104
105 if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
106 PetscCall(VecNormEnd(R, NORM_2, &dp));
107 } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
108 PetscCall(VecNormEnd(U, NORM_2, &dp));
109 }
110 if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
111 PetscCall(VecDotEnd(W, U, &delta));
112
113 if (i > 0) {
114 if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
115 else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
116
117 ksp->rnorm = dp;
118 PetscCall(KSPLogResidualHistory(ksp, dp));
119 PetscCall(KSPMonitor(ksp, i, dp));
120 PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
121 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
122 }
123
124 if (i == 0) {
125 alpha = gamma / delta;
126 PetscCall(VecCopy(N, Z)); /* z <- n */
127 PetscCall(VecCopy(M, Q)); /* q <- m */
128 PetscCall(VecCopy(U, P)); /* p <- u */
129 PetscCall(VecCopy(W, S)); /* s <- w */
130 } else {
131 beta = gamma / gammaold;
132 alpha = gamma / (delta - beta / alpha * gamma);
133 PetscCall(VecAYPX(Z, beta, N)); /* z <- n + beta * z */
134 PetscCall(VecAYPX(Q, beta, M)); /* q <- m + beta * q */
135 PetscCall(VecAYPX(P, beta, U)); /* p <- u + beta * p */
136 PetscCall(VecAYPX(S, beta, W)); /* s <- w + beta * s */
137 }
138 PetscCall(VecAXPY(X, alpha, P)); /* x <- x + alpha * p */
139 PetscCall(VecAXPY(U, -alpha, Q)); /* u <- u - alpha * q */
140 PetscCall(VecAXPY(W, -alpha, Z)); /* w <- w - alpha * z */
141 PetscCall(VecAXPY(R, -alpha, S)); /* r <- r - alpha * s */
142 gammaold = gamma;
143 i++;
144 ksp->its = i;
145
146 /* if (i%50 == 0) { */
147 /* PetscCall(KSP_MatMult(ksp,Amat,X,R)); /\* w <- b - Ax *\/ */
148 /* PetscCall(VecAYPX(R,-1.0,B)); */
149 /* PetscCall(KSP_PCApply(ksp,R,U)); */
150 /* PetscCall(KSP_MatMult(ksp,Amat,U,W)); */
151 /* } */
152
153 } while (i <= ksp->max_it);
154 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
155 PetscFunctionReturn(PETSC_SUCCESS);
156 }
157
158 PETSC_INTERN PetscErrorCode KSPBuildResidual_CG(KSP, Vec, Vec, Vec *);
159
160 /*MC
161 KSPPIPECG - Pipelined conjugate gradient method {cite}`ghyselsvanroose2014`. [](sec_pipelineksp)
162
163 Level: intermediate
164
165 Notes:
166 This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPPCG`. The
167 non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
168
169 See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product and `KSPGROPPCG`
170
171 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
172 See [](doc_faq_pipelined)
173
174 Contributed by:
175 Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
176
177 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECG2`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
178 M*/
KSPCreate_PIPECG(KSP ksp)179 PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG(KSP ksp)
180 {
181 PetscFunctionBegin;
182 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
183 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
184 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
185 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
186
187 ksp->ops->setup = KSPSetUp_PIPECG;
188 ksp->ops->solve = KSPSolve_PIPECG;
189 ksp->ops->destroy = KSPDestroyDefault;
190 ksp->ops->view = NULL;
191 ksp->ops->setfromoptions = NULL;
192 ksp->ops->buildsolution = KSPBuildSolutionDefault;
193 ksp->ops->buildresidual = KSPBuildResidual_CG;
194 PetscFunctionReturn(PETSC_SUCCESS);
195 }
196