xref: /petsc/src/ksp/pc/tutorials/ex4.c (revision 732aec7a18f2199fb53bb9a2f3aef439a834ce31)
1 static char help[] = "Applies the 2023 preconditioner of Benzi and Faccio\n\n";
2 
3 #include <petscmat.h>
4 #include <petscviewer.h>
5 #include <petscvec.h>
6 #include <petscis.h>
7 #include <petscksp.h>
8 
9 /*
10  * This example reproduces the preconditioner outlined in Benzi's paper
11  * https://doi.org/10.1137/22M1505529. The problem considered is:
12  *
13  * (A + gamma UU^T)x = b
14  *
15  * whose structure arises from, for example, grad-div stabilization in the
16  * Navier-Stokes momentum equation. In the code we will also refer to
17  * gamma UU^T as J. The preconditioner developed by Benzi is:
18  *
19  * P_alpha = (A + alpha I)(alpha I + gamma UU^T)
20  *
21  * Another variant which may yield better convergence depending on the specific
22  * problem is
23  *
24  * P_alpha = (A + alpha D) D^-1 (alpha D + gamma UU^T)
25  *
26  * where D = diag(A + gamma UU^T). This is the variant implemented
27  * here. Application of the preconditioner involves (approximate) solution of
28  * two systems, one with (A + alpha D), and another with (alpha D + gamma
29  * UU^T). For small alpha (which generally yields the best overall
30  * preconditioner), (alpha D + gamma UU^T) is ill-conditioned. To combat this we
31  * solve (alpha D + gamma UU^T) using the Sherman-Morrison-Woodbury (SMW) matrix
32  * identity, which effectively converts the grad-div structure to a much nicer
33  * div-grad (laplacian) structure.
34  *
35  * The matrices used as input can be generated by running the matlab/octave
36  * program IFISS. The particular matrices checked into the datafiles repository
37  * and used in testing of this example correspond to a leaky lid-driven cavity
38  * with a stretched grid and Q2-Q1 finite elements. The matrices are taken from
39  * the last iteration of a Picard solve with tolerance 1e-8 with a viscosity of
40  * 0.1 and a 32x32 grid. We summarize below iteration counts from running this
41  * preconditioner for different grids and viscosity with a KSP tolerance of 1e-6.
42  *
43  *       32x32 64x64 128x128
44  * 0.1   28    36    43
45  * 0.01  59    75    73
46  * 0.002 136   161   167
47  *
48  * A reader of Benzi's paper will note that the performance shown above with
49  * respect to decreasing viscosity is significantly worse than in the
50  * paper. This is actually because of the choice of RHS. In Benzi's work, the
51  * RHS was generated by multiplying the operator with a vector of 1s whereas
52  * here we generate the RHS using a random vector. The iteration counts from the
53  * Benzi paper can be reproduced by changing the RHS generation in this example,
54  * but we choose to use the more difficult RHS as the resulting performance may
55  * more closely match what users experience in "physical" contexts.
56  */
57 
CreateAndLoadMat(const char * mat_name,Mat * mat)58 PetscErrorCode CreateAndLoadMat(const char *mat_name, Mat *mat)
59 {
60   PetscViewer viewer;
61   char        file[PETSC_MAX_PATH_LEN];
62   char        flag_name[10] = "-f";
63   PetscBool   flg;
64 
65   PetscFunctionBeginUser;
66   PetscCall(PetscOptionsGetString(NULL, NULL, strcat(flag_name, mat_name), file, sizeof(file), &flg));
67   PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -f<mat_name> option");
68   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
69   PetscCall(MatCreate(PETSC_COMM_WORLD, mat));
70   PetscCall(MatSetType(*mat, MATAIJ));
71   PetscCall(PetscObjectSetName((PetscObject)*mat, mat_name));
72   PetscCall(MatSetFromOptions(*mat));
73   PetscCall(MatLoad(*mat, viewer));
74   PetscCall(PetscViewerDestroy(&viewer));
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
78 typedef struct {
79   Mat       U, UT, D, aD, aDinv, I_plus_gammaUTaDinvU;
80   PC        smw_cholesky;
81   PetscReal gamma, alpha;
82   PetscBool setup_called;
83 } SmwPCCtx;
84 
SmwSetup(PC pc)85 PetscErrorCode SmwSetup(PC pc)
86 {
87   SmwPCCtx *ctx;
88   Vec       aDVec;
89 
90   PetscFunctionBeginUser;
91   PetscCall(PCShellGetContext(pc, &ctx));
92 
93   if (ctx->setup_called) PetscFunctionReturn(PETSC_SUCCESS);
94 
95   // Create aD
96   PetscCall(MatDuplicate(ctx->D, MAT_COPY_VALUES, &ctx->aD));
97   PetscCall(MatScale(ctx->aD, ctx->alpha));
98 
99   // Create aDinv
100   PetscCall(MatDuplicate(ctx->aD, MAT_DO_NOT_COPY_VALUES, &ctx->aDinv));
101   PetscCall(MatCreateVecs(ctx->aD, &aDVec, NULL));
102   PetscCall(MatGetDiagonal(ctx->aD, aDVec));
103   PetscCall(VecReciprocal(aDVec));
104   PetscCall(MatDiagonalSet(ctx->aDinv, aDVec, INSERT_VALUES));
105 
106   // Create UT
107   PetscCall(MatTranspose(ctx->U, MAT_INITIAL_MATRIX, &ctx->UT));
108 
109   // Create sum Mat
110   PetscCall(MatMatMatMult(ctx->UT, ctx->aDinv, ctx->U, MAT_INITIAL_MATRIX, PETSC_CURRENT, &ctx->I_plus_gammaUTaDinvU));
111   PetscCall(MatScale(ctx->I_plus_gammaUTaDinvU, ctx->gamma));
112   PetscCall(MatShift(ctx->I_plus_gammaUTaDinvU, 1.));
113 
114   PetscCall(PCCreate(PETSC_COMM_WORLD, &ctx->smw_cholesky));
115   PetscCall(PCSetType(ctx->smw_cholesky, PCCHOLESKY));
116   PetscCall(PCSetOperators(ctx->smw_cholesky, ctx->I_plus_gammaUTaDinvU, ctx->I_plus_gammaUTaDinvU));
117   PetscCall(PCSetOptionsPrefix(ctx->smw_cholesky, "smw_"));
118   PetscCall(PCSetFromOptions(ctx->smw_cholesky));
119   PetscCall(PCSetUp(ctx->smw_cholesky));
120 
121   PetscCall(VecDestroy(&aDVec));
122 
123   ctx->setup_called = PETSC_TRUE;
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
SmwApply(PC pc,Vec x,Vec y)127 PetscErrorCode SmwApply(PC pc, Vec x, Vec y)
128 {
129   SmwPCCtx *ctx;
130   Vec       vel0, pressure0, pressure1;
131 
132   PetscFunctionBeginUser;
133   PetscCall(PCShellGetContext(pc, &ctx));
134 
135   PetscCall(MatCreateVecs(ctx->UT, &vel0, &pressure0));
136   PetscCall(VecDuplicate(pressure0, &pressure1));
137 
138   // First term
139   PetscCall(MatMult(ctx->aDinv, x, vel0));
140   PetscCall(MatMult(ctx->UT, vel0, pressure0));
141   PetscCall(PCApply(ctx->smw_cholesky, pressure0, pressure1));
142   PetscCall(MatMult(ctx->U, pressure1, vel0));
143   PetscCall(MatMult(ctx->aDinv, vel0, y));
144   PetscCall(VecScale(y, -ctx->gamma));
145 
146   // Second term
147   PetscCall(MatMult(ctx->aDinv, x, vel0));
148 
149   PetscCall(VecAXPY(y, 1, vel0));
150 
151   PetscCall(VecDestroy(&vel0));
152   PetscCall(VecDestroy(&pressure0));
153   PetscCall(VecDestroy(&pressure1));
154   PetscFunctionReturn(PETSC_SUCCESS);
155 }
156 
main(int argc,char ** args)157 int main(int argc, char **args)
158 {
159   Mat               A, B, Q, Acondensed, Bcondensed, BT, J, AplusJ, QInv, D, AplusD, JplusD, U;
160   Mat               AplusJarray[2];
161   Vec               bound, x, b, Qdiag, DVec;
162   PetscBool         flg;
163   PetscViewer       viewer;
164   char              file[PETSC_MAX_PATH_LEN];
165   PetscInt         *boundary_indices;
166   PetscInt          boundary_indices_size, am, an, bm, bn, condensed_am, astart, aend, Dstart, Dend, num_local_bnd_dofs = 0;
167   const PetscScalar zero = 0;
168   IS                boundary_is, bulk_is;
169   KSP               ksp;
170   PC                pc, pcA, pcJ;
171   PetscRandom       rctx;
172   PetscReal        *boundary_indices_values;
173   PetscReal         gamma = 100, alpha = .01;
174   PetscMPIInt       rank;
175   SmwPCCtx          ctx;
176 
177   PetscFunctionBeginUser;
178   PetscCall(PetscInitialize(&argc, &args, NULL, help));
179   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
180 
181   PetscCall(CreateAndLoadMat("A", &A));
182   PetscCall(CreateAndLoadMat("B", &B));
183   PetscCall(CreateAndLoadMat("Q", &Q));
184 
185   PetscCall(PetscOptionsGetString(NULL, NULL, "-fbound", file, sizeof(file), &flg));
186   PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -fbound option");
187 
188   if (rank == 0) {
189     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, file, FILE_MODE_READ, &viewer));
190     PetscCall(VecCreate(PETSC_COMM_SELF, &bound));
191     PetscCall(PetscObjectSetName((PetscObject)bound, "bound"));
192     PetscCall(VecSetType(bound, VECSEQ));
193     PetscCall(VecLoad(bound, viewer));
194     PetscCall(PetscViewerDestroy(&viewer));
195     PetscCall(VecGetLocalSize(bound, &boundary_indices_size));
196     PetscCall(VecGetArray(bound, &boundary_indices_values));
197   }
198   PetscCallMPI(MPI_Bcast(&boundary_indices_size, 1, MPIU_INT, 0, PETSC_COMM_WORLD));
199   if (rank != 0) PetscCall(PetscMalloc1(boundary_indices_size, &boundary_indices_values));
200   PetscCallMPI(MPI_Bcast(boundary_indices_values, (PetscMPIInt)boundary_indices_size, MPIU_SCALAR, 0, PETSC_COMM_WORLD));
201 
202   PetscCall(MatGetSize(A, &am, NULL));
203   // The total number of dofs for a given velocity component
204   const PetscInt nc = am / 2;
205   PetscCall(MatGetOwnershipRange(A, &astart, &aend));
206 
207   PetscCall(PetscMalloc1(2 * boundary_indices_size, &boundary_indices));
208 
209   //
210   // The dof index ordering appears to be all vx dofs and then all vy dofs.
211   //
212 
213   // First do vx
214   for (PetscInt i = 0; i < boundary_indices_size; ++i) {
215     // MATLAB uses 1-based indexing
216     const PetscInt bnd_dof = (PetscInt)boundary_indices_values[i] - 1;
217     if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof;
218   }
219 
220   // Now vy
221   for (PetscInt i = 0; i < boundary_indices_size; ++i) {
222     // MATLAB uses 1-based indexing
223     const PetscInt bnd_dof = ((PetscInt)boundary_indices_values[i] - 1) + nc;
224     if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof;
225   }
226   if (rank == 0) PetscCall(VecRestoreArray(bound, &boundary_indices_values));
227   else PetscCall(PetscFree(boundary_indices_values));
228 
229   PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, num_local_bnd_dofs, boundary_indices, PETSC_USE_POINTER, &boundary_is));
230   PetscCall(ISComplement(boundary_is, astart, aend, &bulk_is));
231 
232   PetscCall(MatCreateSubMatrix(A, bulk_is, bulk_is, MAT_INITIAL_MATRIX, &Acondensed));
233   // Can't pass null for row index set :-(
234   PetscCall(MatTranspose(B, MAT_INPLACE_MATRIX, &B));
235   PetscCall(MatCreateSubMatrix(B, bulk_is, NULL, MAT_INITIAL_MATRIX, &Bcondensed));
236   PetscCall(MatGetLocalSize(Acondensed, &am, &an));
237   PetscCall(MatGetLocalSize(Bcondensed, &bm, &bn));
238 
239   // Create QInv
240   PetscCall(MatCreateVecs(Q, &Qdiag, NULL));
241   PetscCall(MatGetDiagonal(Q, Qdiag));
242   PetscCall(VecReciprocal(Qdiag));
243   PetscCall(MatDuplicate(Q, MAT_DO_NOT_COPY_VALUES, &QInv));
244   PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES));
245   PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY));
246   PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY));
247 
248   // Create J
249   PetscCall(MatTranspose(Bcondensed, MAT_INITIAL_MATRIX, &BT));
250   PetscCall(MatMatMatMult(Bcondensed, QInv, BT, MAT_INITIAL_MATRIX, PETSC_CURRENT, &J));
251   PetscCall(MatScale(J, gamma));
252 
253   // Create sum of A + J
254   AplusJarray[0] = Acondensed;
255   AplusJarray[1] = J;
256   PetscCall(MatCreateComposite(PETSC_COMM_WORLD, 2, AplusJarray, &AplusJ));
257 
258   // Create decomposition matrices
259   // We've already used Qdiag, which currently represents Q^-1,  for its necessary purposes. Let's
260   // convert it to represent Q^(-1/2)
261   PetscCall(VecSqrtAbs(Qdiag));
262   // We can similarly reuse Qinv
263   PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES));
264   PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY));
265   PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY));
266   // Create U
267   PetscCall(MatMatMult(Bcondensed, QInv, MAT_INITIAL_MATRIX, PETSC_CURRENT, &U));
268 
269   // Create x and b
270   PetscCall(MatCreateVecs(AplusJ, &x, &b));
271   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
272   PetscCall(VecSetRandom(x, rctx));
273   PetscCall(PetscRandomDestroy(&rctx));
274   PetscCall(MatMult(AplusJ, x, b));
275 
276   // Compute preconditioner operators
277   PetscCall(MatGetLocalSize(Acondensed, &condensed_am, NULL));
278   PetscCall(MatCreate(PETSC_COMM_WORLD, &D));
279   PetscCall(MatSetType(D, MATAIJ));
280   PetscCall(MatSetSizes(D, condensed_am, condensed_am, PETSC_DETERMINE, PETSC_DETERMINE));
281   PetscCall(MatGetOwnershipRange(D, &Dstart, &Dend));
282   for (PetscInt i = Dstart; i < Dend; ++i) PetscCall(MatSetValues(D, 1, &i, 1, &i, &zero, INSERT_VALUES));
283   PetscCall(MatAssemblyBegin(D, MAT_FINAL_ASSEMBLY));
284   PetscCall(MatAssemblyEnd(D, MAT_FINAL_ASSEMBLY));
285   PetscCall(MatCreateVecs(D, &DVec, NULL));
286   PetscCall(MatGetDiagonal(AplusJ, DVec));
287   PetscCall(MatDiagonalSet(D, DVec, INSERT_VALUES));
288   PetscCall(MatDuplicate(Acondensed, MAT_COPY_VALUES, &AplusD));
289   PetscCall(MatAXPY(AplusD, alpha, D, SUBSET_NONZERO_PATTERN));
290   PetscCall(MatDuplicate(J, MAT_COPY_VALUES, &JplusD));
291   PetscCall(MatAXPY(JplusD, alpha, D, SUBSET_NONZERO_PATTERN));
292 
293   // Initialize our SMW context
294   ctx.U            = U;
295   ctx.D            = D;
296   ctx.gamma        = gamma;
297   ctx.alpha        = alpha;
298   ctx.setup_called = PETSC_FALSE;
299 
300   // Set preconditioner operators
301   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
302   PetscCall(KSPSetType(ksp, KSPFGMRES));
303   PetscCall(KSPSetOperators(ksp, AplusJ, AplusJ));
304   PetscCall(KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED));
305   PetscCall(KSPGMRESSetRestart(ksp, 300));
306   PetscCall(KSPGetPC(ksp, &pc));
307   PetscCall(PCSetType(pc, PCCOMPOSITE));
308   PetscCall(PCCompositeSetType(pc, PC_COMPOSITE_SPECIAL));
309   PetscCall(PCCompositeAddPCType(pc, PCILU));
310   PetscCall(PCCompositeAddPCType(pc, PCSHELL));
311   PetscCall(PCCompositeGetPC(pc, 0, &pcA));
312   PetscCall(PCCompositeGetPC(pc, 1, &pcJ));
313   PetscCall(PCSetOperators(pcA, AplusD, AplusD));
314   PetscCall(PCSetOperators(pcJ, JplusD, JplusD));
315   PetscCall(PCShellSetContext(pcJ, &ctx));
316   PetscCall(PCShellSetApply(pcJ, SmwApply));
317   PetscCall(PCShellSetSetUp(pcJ, SmwSetup));
318   PetscCall(PCCompositeSpecialSetAlphaMat(pc, D));
319 
320   // Solve
321   PetscCall(KSPSetFromOptions(ksp));
322   PetscCall(KSPSolve(ksp, b, x));
323 
324   PetscCall(MatDestroy(&A));
325   PetscCall(MatDestroy(&B));
326   PetscCall(MatDestroy(&Q));
327   PetscCall(MatDestroy(&Acondensed));
328   PetscCall(MatDestroy(&Bcondensed));
329   PetscCall(MatDestroy(&BT));
330   PetscCall(MatDestroy(&J));
331   PetscCall(MatDestroy(&AplusJ));
332   PetscCall(MatDestroy(&QInv));
333   PetscCall(MatDestroy(&D));
334   PetscCall(MatDestroy(&AplusD));
335   PetscCall(MatDestroy(&JplusD));
336   PetscCall(MatDestroy(&U));
337   if (rank == 0) PetscCall(VecDestroy(&bound));
338   PetscCall(VecDestroy(&x));
339   PetscCall(VecDestroy(&b));
340   PetscCall(VecDestroy(&Qdiag));
341   PetscCall(VecDestroy(&DVec));
342   PetscCall(ISDestroy(&boundary_is));
343   PetscCall(ISDestroy(&bulk_is));
344   PetscCall(KSPDestroy(&ksp));
345   PetscCall(PetscFree(boundary_indices));
346   PetscCall(MatDestroy(&ctx.UT));
347   PetscCall(MatDestroy(&ctx.I_plus_gammaUTaDinvU));
348   PetscCall(MatDestroy(&ctx.aD));
349   PetscCall(MatDestroy(&ctx.aDinv));
350   PetscCall(PCDestroy(&ctx.smw_cholesky));
351 
352   PetscCall(PetscFinalize());
353   return 0;
354 }
355 
356 /*TEST
357 
358    build:
359       requires: !complex
360 
361    test:
362       args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor
363       requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double
364 
365    test:
366       suffix: 2
367       nsize: 2
368       args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor
369       requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double strumpack
370 
371 TEST*/
372