1 static char help[] = "Applies the 2023 preconditioner of Benzi and Faccio\n\n";
2
3 #include <petscmat.h>
4 #include <petscviewer.h>
5 #include <petscvec.h>
6 #include <petscis.h>
7 #include <petscksp.h>
8
9 /*
10 * This example reproduces the preconditioner outlined in Benzi's paper
11 * https://doi.org/10.1137/22M1505529. The problem considered is:
12 *
13 * (A + gamma UU^T)x = b
14 *
15 * whose structure arises from, for example, grad-div stabilization in the
16 * Navier-Stokes momentum equation. In the code we will also refer to
17 * gamma UU^T as J. The preconditioner developed by Benzi is:
18 *
19 * P_alpha = (A + alpha I)(alpha I + gamma UU^T)
20 *
21 * Another variant which may yield better convergence depending on the specific
22 * problem is
23 *
24 * P_alpha = (A + alpha D) D^-1 (alpha D + gamma UU^T)
25 *
26 * where D = diag(A + gamma UU^T). This is the variant implemented
27 * here. Application of the preconditioner involves (approximate) solution of
28 * two systems, one with (A + alpha D), and another with (alpha D + gamma
29 * UU^T). For small alpha (which generally yields the best overall
30 * preconditioner), (alpha D + gamma UU^T) is ill-conditioned. To combat this we
31 * solve (alpha D + gamma UU^T) using the Sherman-Morrison-Woodbury (SMW) matrix
32 * identity, which effectively converts the grad-div structure to a much nicer
33 * div-grad (laplacian) structure.
34 *
35 * The matrices used as input can be generated by running the matlab/octave
36 * program IFISS. The particular matrices checked into the datafiles repository
37 * and used in testing of this example correspond to a leaky lid-driven cavity
38 * with a stretched grid and Q2-Q1 finite elements. The matrices are taken from
39 * the last iteration of a Picard solve with tolerance 1e-8 with a viscosity of
40 * 0.1 and a 32x32 grid. We summarize below iteration counts from running this
41 * preconditioner for different grids and viscosity with a KSP tolerance of 1e-6.
42 *
43 * 32x32 64x64 128x128
44 * 0.1 28 36 43
45 * 0.01 59 75 73
46 * 0.002 136 161 167
47 *
48 * A reader of Benzi's paper will note that the performance shown above with
49 * respect to decreasing viscosity is significantly worse than in the
50 * paper. This is actually because of the choice of RHS. In Benzi's work, the
51 * RHS was generated by multiplying the operator with a vector of 1s whereas
52 * here we generate the RHS using a random vector. The iteration counts from the
53 * Benzi paper can be reproduced by changing the RHS generation in this example,
54 * but we choose to use the more difficult RHS as the resulting performance may
55 * more closely match what users experience in "physical" contexts.
56 */
57
CreateAndLoadMat(const char * mat_name,Mat * mat)58 PetscErrorCode CreateAndLoadMat(const char *mat_name, Mat *mat)
59 {
60 PetscViewer viewer;
61 char file[PETSC_MAX_PATH_LEN];
62 char flag_name[10] = "-f";
63 PetscBool flg;
64
65 PetscFunctionBeginUser;
66 PetscCall(PetscOptionsGetString(NULL, NULL, strcat(flag_name, mat_name), file, sizeof(file), &flg));
67 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -f<mat_name> option");
68 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
69 PetscCall(MatCreate(PETSC_COMM_WORLD, mat));
70 PetscCall(MatSetType(*mat, MATAIJ));
71 PetscCall(PetscObjectSetName((PetscObject)*mat, mat_name));
72 PetscCall(MatSetFromOptions(*mat));
73 PetscCall(MatLoad(*mat, viewer));
74 PetscCall(PetscViewerDestroy(&viewer));
75 PetscFunctionReturn(PETSC_SUCCESS);
76 }
77
78 typedef struct {
79 Mat U, UT, D, aD, aDinv, I_plus_gammaUTaDinvU;
80 PC smw_cholesky;
81 PetscReal gamma, alpha;
82 PetscBool setup_called;
83 } SmwPCCtx;
84
SmwSetup(PC pc)85 PetscErrorCode SmwSetup(PC pc)
86 {
87 SmwPCCtx *ctx;
88 Vec aDVec;
89
90 PetscFunctionBeginUser;
91 PetscCall(PCShellGetContext(pc, &ctx));
92
93 if (ctx->setup_called) PetscFunctionReturn(PETSC_SUCCESS);
94
95 // Create aD
96 PetscCall(MatDuplicate(ctx->D, MAT_COPY_VALUES, &ctx->aD));
97 PetscCall(MatScale(ctx->aD, ctx->alpha));
98
99 // Create aDinv
100 PetscCall(MatDuplicate(ctx->aD, MAT_DO_NOT_COPY_VALUES, &ctx->aDinv));
101 PetscCall(MatCreateVecs(ctx->aD, &aDVec, NULL));
102 PetscCall(MatGetDiagonal(ctx->aD, aDVec));
103 PetscCall(VecReciprocal(aDVec));
104 PetscCall(MatDiagonalSet(ctx->aDinv, aDVec, INSERT_VALUES));
105
106 // Create UT
107 PetscCall(MatTranspose(ctx->U, MAT_INITIAL_MATRIX, &ctx->UT));
108
109 // Create sum Mat
110 PetscCall(MatMatMatMult(ctx->UT, ctx->aDinv, ctx->U, MAT_INITIAL_MATRIX, PETSC_CURRENT, &ctx->I_plus_gammaUTaDinvU));
111 PetscCall(MatScale(ctx->I_plus_gammaUTaDinvU, ctx->gamma));
112 PetscCall(MatShift(ctx->I_plus_gammaUTaDinvU, 1.));
113
114 PetscCall(PCCreate(PETSC_COMM_WORLD, &ctx->smw_cholesky));
115 PetscCall(PCSetType(ctx->smw_cholesky, PCCHOLESKY));
116 PetscCall(PCSetOperators(ctx->smw_cholesky, ctx->I_plus_gammaUTaDinvU, ctx->I_plus_gammaUTaDinvU));
117 PetscCall(PCSetOptionsPrefix(ctx->smw_cholesky, "smw_"));
118 PetscCall(PCSetFromOptions(ctx->smw_cholesky));
119 PetscCall(PCSetUp(ctx->smw_cholesky));
120
121 PetscCall(VecDestroy(&aDVec));
122
123 ctx->setup_called = PETSC_TRUE;
124 PetscFunctionReturn(PETSC_SUCCESS);
125 }
126
SmwApply(PC pc,Vec x,Vec y)127 PetscErrorCode SmwApply(PC pc, Vec x, Vec y)
128 {
129 SmwPCCtx *ctx;
130 Vec vel0, pressure0, pressure1;
131
132 PetscFunctionBeginUser;
133 PetscCall(PCShellGetContext(pc, &ctx));
134
135 PetscCall(MatCreateVecs(ctx->UT, &vel0, &pressure0));
136 PetscCall(VecDuplicate(pressure0, &pressure1));
137
138 // First term
139 PetscCall(MatMult(ctx->aDinv, x, vel0));
140 PetscCall(MatMult(ctx->UT, vel0, pressure0));
141 PetscCall(PCApply(ctx->smw_cholesky, pressure0, pressure1));
142 PetscCall(MatMult(ctx->U, pressure1, vel0));
143 PetscCall(MatMult(ctx->aDinv, vel0, y));
144 PetscCall(VecScale(y, -ctx->gamma));
145
146 // Second term
147 PetscCall(MatMult(ctx->aDinv, x, vel0));
148
149 PetscCall(VecAXPY(y, 1, vel0));
150
151 PetscCall(VecDestroy(&vel0));
152 PetscCall(VecDestroy(&pressure0));
153 PetscCall(VecDestroy(&pressure1));
154 PetscFunctionReturn(PETSC_SUCCESS);
155 }
156
main(int argc,char ** args)157 int main(int argc, char **args)
158 {
159 Mat A, B, Q, Acondensed, Bcondensed, BT, J, AplusJ, QInv, D, AplusD, JplusD, U;
160 Mat AplusJarray[2];
161 Vec bound, x, b, Qdiag, DVec;
162 PetscBool flg;
163 PetscViewer viewer;
164 char file[PETSC_MAX_PATH_LEN];
165 PetscInt *boundary_indices;
166 PetscInt boundary_indices_size, am, an, bm, bn, condensed_am, astart, aend, Dstart, Dend, num_local_bnd_dofs = 0;
167 const PetscScalar zero = 0;
168 IS boundary_is, bulk_is;
169 KSP ksp;
170 PC pc, pcA, pcJ;
171 PetscRandom rctx;
172 PetscReal *boundary_indices_values;
173 PetscReal gamma = 100, alpha = .01;
174 PetscMPIInt rank;
175 SmwPCCtx ctx;
176
177 PetscFunctionBeginUser;
178 PetscCall(PetscInitialize(&argc, &args, NULL, help));
179 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
180
181 PetscCall(CreateAndLoadMat("A", &A));
182 PetscCall(CreateAndLoadMat("B", &B));
183 PetscCall(CreateAndLoadMat("Q", &Q));
184
185 PetscCall(PetscOptionsGetString(NULL, NULL, "-fbound", file, sizeof(file), &flg));
186 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -fbound option");
187
188 if (rank == 0) {
189 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, file, FILE_MODE_READ, &viewer));
190 PetscCall(VecCreate(PETSC_COMM_SELF, &bound));
191 PetscCall(PetscObjectSetName((PetscObject)bound, "bound"));
192 PetscCall(VecSetType(bound, VECSEQ));
193 PetscCall(VecLoad(bound, viewer));
194 PetscCall(PetscViewerDestroy(&viewer));
195 PetscCall(VecGetLocalSize(bound, &boundary_indices_size));
196 PetscCall(VecGetArray(bound, &boundary_indices_values));
197 }
198 PetscCallMPI(MPI_Bcast(&boundary_indices_size, 1, MPIU_INT, 0, PETSC_COMM_WORLD));
199 if (rank != 0) PetscCall(PetscMalloc1(boundary_indices_size, &boundary_indices_values));
200 PetscCallMPI(MPI_Bcast(boundary_indices_values, (PetscMPIInt)boundary_indices_size, MPIU_SCALAR, 0, PETSC_COMM_WORLD));
201
202 PetscCall(MatGetSize(A, &am, NULL));
203 // The total number of dofs for a given velocity component
204 const PetscInt nc = am / 2;
205 PetscCall(MatGetOwnershipRange(A, &astart, &aend));
206
207 PetscCall(PetscMalloc1(2 * boundary_indices_size, &boundary_indices));
208
209 //
210 // The dof index ordering appears to be all vx dofs and then all vy dofs.
211 //
212
213 // First do vx
214 for (PetscInt i = 0; i < boundary_indices_size; ++i) {
215 // MATLAB uses 1-based indexing
216 const PetscInt bnd_dof = (PetscInt)boundary_indices_values[i] - 1;
217 if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof;
218 }
219
220 // Now vy
221 for (PetscInt i = 0; i < boundary_indices_size; ++i) {
222 // MATLAB uses 1-based indexing
223 const PetscInt bnd_dof = ((PetscInt)boundary_indices_values[i] - 1) + nc;
224 if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof;
225 }
226 if (rank == 0) PetscCall(VecRestoreArray(bound, &boundary_indices_values));
227 else PetscCall(PetscFree(boundary_indices_values));
228
229 PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, num_local_bnd_dofs, boundary_indices, PETSC_USE_POINTER, &boundary_is));
230 PetscCall(ISComplement(boundary_is, astart, aend, &bulk_is));
231
232 PetscCall(MatCreateSubMatrix(A, bulk_is, bulk_is, MAT_INITIAL_MATRIX, &Acondensed));
233 // Can't pass null for row index set :-(
234 PetscCall(MatTranspose(B, MAT_INPLACE_MATRIX, &B));
235 PetscCall(MatCreateSubMatrix(B, bulk_is, NULL, MAT_INITIAL_MATRIX, &Bcondensed));
236 PetscCall(MatGetLocalSize(Acondensed, &am, &an));
237 PetscCall(MatGetLocalSize(Bcondensed, &bm, &bn));
238
239 // Create QInv
240 PetscCall(MatCreateVecs(Q, &Qdiag, NULL));
241 PetscCall(MatGetDiagonal(Q, Qdiag));
242 PetscCall(VecReciprocal(Qdiag));
243 PetscCall(MatDuplicate(Q, MAT_DO_NOT_COPY_VALUES, &QInv));
244 PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES));
245 PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY));
246 PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY));
247
248 // Create J
249 PetscCall(MatTranspose(Bcondensed, MAT_INITIAL_MATRIX, &BT));
250 PetscCall(MatMatMatMult(Bcondensed, QInv, BT, MAT_INITIAL_MATRIX, PETSC_CURRENT, &J));
251 PetscCall(MatScale(J, gamma));
252
253 // Create sum of A + J
254 AplusJarray[0] = Acondensed;
255 AplusJarray[1] = J;
256 PetscCall(MatCreateComposite(PETSC_COMM_WORLD, 2, AplusJarray, &AplusJ));
257
258 // Create decomposition matrices
259 // We've already used Qdiag, which currently represents Q^-1, for its necessary purposes. Let's
260 // convert it to represent Q^(-1/2)
261 PetscCall(VecSqrtAbs(Qdiag));
262 // We can similarly reuse Qinv
263 PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES));
264 PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY));
265 PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY));
266 // Create U
267 PetscCall(MatMatMult(Bcondensed, QInv, MAT_INITIAL_MATRIX, PETSC_CURRENT, &U));
268
269 // Create x and b
270 PetscCall(MatCreateVecs(AplusJ, &x, &b));
271 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
272 PetscCall(VecSetRandom(x, rctx));
273 PetscCall(PetscRandomDestroy(&rctx));
274 PetscCall(MatMult(AplusJ, x, b));
275
276 // Compute preconditioner operators
277 PetscCall(MatGetLocalSize(Acondensed, &condensed_am, NULL));
278 PetscCall(MatCreate(PETSC_COMM_WORLD, &D));
279 PetscCall(MatSetType(D, MATAIJ));
280 PetscCall(MatSetSizes(D, condensed_am, condensed_am, PETSC_DETERMINE, PETSC_DETERMINE));
281 PetscCall(MatGetOwnershipRange(D, &Dstart, &Dend));
282 for (PetscInt i = Dstart; i < Dend; ++i) PetscCall(MatSetValues(D, 1, &i, 1, &i, &zero, INSERT_VALUES));
283 PetscCall(MatAssemblyBegin(D, MAT_FINAL_ASSEMBLY));
284 PetscCall(MatAssemblyEnd(D, MAT_FINAL_ASSEMBLY));
285 PetscCall(MatCreateVecs(D, &DVec, NULL));
286 PetscCall(MatGetDiagonal(AplusJ, DVec));
287 PetscCall(MatDiagonalSet(D, DVec, INSERT_VALUES));
288 PetscCall(MatDuplicate(Acondensed, MAT_COPY_VALUES, &AplusD));
289 PetscCall(MatAXPY(AplusD, alpha, D, SUBSET_NONZERO_PATTERN));
290 PetscCall(MatDuplicate(J, MAT_COPY_VALUES, &JplusD));
291 PetscCall(MatAXPY(JplusD, alpha, D, SUBSET_NONZERO_PATTERN));
292
293 // Initialize our SMW context
294 ctx.U = U;
295 ctx.D = D;
296 ctx.gamma = gamma;
297 ctx.alpha = alpha;
298 ctx.setup_called = PETSC_FALSE;
299
300 // Set preconditioner operators
301 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
302 PetscCall(KSPSetType(ksp, KSPFGMRES));
303 PetscCall(KSPSetOperators(ksp, AplusJ, AplusJ));
304 PetscCall(KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED));
305 PetscCall(KSPGMRESSetRestart(ksp, 300));
306 PetscCall(KSPGetPC(ksp, &pc));
307 PetscCall(PCSetType(pc, PCCOMPOSITE));
308 PetscCall(PCCompositeSetType(pc, PC_COMPOSITE_SPECIAL));
309 PetscCall(PCCompositeAddPCType(pc, PCILU));
310 PetscCall(PCCompositeAddPCType(pc, PCSHELL));
311 PetscCall(PCCompositeGetPC(pc, 0, &pcA));
312 PetscCall(PCCompositeGetPC(pc, 1, &pcJ));
313 PetscCall(PCSetOperators(pcA, AplusD, AplusD));
314 PetscCall(PCSetOperators(pcJ, JplusD, JplusD));
315 PetscCall(PCShellSetContext(pcJ, &ctx));
316 PetscCall(PCShellSetApply(pcJ, SmwApply));
317 PetscCall(PCShellSetSetUp(pcJ, SmwSetup));
318 PetscCall(PCCompositeSpecialSetAlphaMat(pc, D));
319
320 // Solve
321 PetscCall(KSPSetFromOptions(ksp));
322 PetscCall(KSPSolve(ksp, b, x));
323
324 PetscCall(MatDestroy(&A));
325 PetscCall(MatDestroy(&B));
326 PetscCall(MatDestroy(&Q));
327 PetscCall(MatDestroy(&Acondensed));
328 PetscCall(MatDestroy(&Bcondensed));
329 PetscCall(MatDestroy(&BT));
330 PetscCall(MatDestroy(&J));
331 PetscCall(MatDestroy(&AplusJ));
332 PetscCall(MatDestroy(&QInv));
333 PetscCall(MatDestroy(&D));
334 PetscCall(MatDestroy(&AplusD));
335 PetscCall(MatDestroy(&JplusD));
336 PetscCall(MatDestroy(&U));
337 if (rank == 0) PetscCall(VecDestroy(&bound));
338 PetscCall(VecDestroy(&x));
339 PetscCall(VecDestroy(&b));
340 PetscCall(VecDestroy(&Qdiag));
341 PetscCall(VecDestroy(&DVec));
342 PetscCall(ISDestroy(&boundary_is));
343 PetscCall(ISDestroy(&bulk_is));
344 PetscCall(KSPDestroy(&ksp));
345 PetscCall(PetscFree(boundary_indices));
346 PetscCall(MatDestroy(&ctx.UT));
347 PetscCall(MatDestroy(&ctx.I_plus_gammaUTaDinvU));
348 PetscCall(MatDestroy(&ctx.aD));
349 PetscCall(MatDestroy(&ctx.aDinv));
350 PetscCall(PCDestroy(&ctx.smw_cholesky));
351
352 PetscCall(PetscFinalize());
353 return 0;
354 }
355
356 /*TEST
357
358 build:
359 requires: !complex
360
361 test:
362 args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor
363 requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double
364
365 test:
366 suffix: 2
367 nsize: 2
368 args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor
369 requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double strumpack
370
371 TEST*/
372