1 static char help[] = "Applies the 2023 preconditioner of Benzi and Faccio\n\n"; 2 3 #include <petscmat.h> 4 #include <petscviewer.h> 5 #include <petscvec.h> 6 #include <petscis.h> 7 #include <petscksp.h> 8 9 /* 10 * This example reproduces the preconditioner outlined in Benzi's paper 11 * https://doi.org/10.1137/22M1505529. The problem considered is: 12 * 13 * (A + gamma UU^T)x = b 14 * 15 * whose structure arises from, for example, grad-div stabilization in the 16 * Navier-Stokes momentum equation. In the code we will also refer to 17 * gamma UU^T as J. The preconditioner developed by Benzi is: 18 * 19 * P_alpha = (A + alpha I)(alpha I + gamma UU^T) 20 * 21 * Another variant which may yield better convergence depending on the specific 22 * problem is 23 * 24 * P_alpha = (A + alpha D) D^-1 (alpha D + gamma UU^T) 25 * 26 * where D = diag(A + gamma UU^T). This is the variant implemented 27 * here. Application of the preconditioner involves (approximate) solution of 28 * two systems, one with (A + alpha D), and another with (alpha D + gamma 29 * UU^T). For small alpha (which generally yields the best overall 30 * preconditioner), (alpha D + gamma UU^T) is ill-conditioned. To combat this we 31 * solve (alpha D + gamma UU^T) using the Sherman-Morrison-Woodbury (SMW) matrix 32 * identity, which effectively converts the grad-div structure to a much nicer 33 * div-grad (laplacian) structure. 34 * 35 * The matrices used as input can be generated by running the matlab/octave 36 * program IFISS. The particular matrices checked into the datafiles repository 37 * and used in testing of this example correspond to a leaky lid-driven cavity 38 * with a stretched grid and Q2-Q1 finite elements. The matrices are taken from 39 * the last iteration of a Picard solve with tolerance 1e-8 with a viscosity of 40 * 0.1 and a 32x32 grid. We summarize below iteration counts from running this 41 * preconditioner for different grids and viscosity with a KSP tolerance of 1e-6. 42 * 43 * 32x32 64x64 128x128 44 * 0.1 28 36 43 45 * 0.01 59 75 73 46 * 0.002 136 161 167 47 * 48 * A reader of Benzi's paper will note that the performance shown above with 49 * respect to decreasing viscosity is significantly worse than in the 50 * paper. This is actually because of the choice of RHS. In Benzi's work, the 51 * RHS was generated by multiplying the operator with a vector of 1s whereas 52 * here we generate the RHS using a random vector. The iteration counts from the 53 * Benzi paper can be reproduced by changing the RHS generation in this example, 54 * but we choose to use the more difficult RHS as the resulting performance may 55 * more closely match what users experience in "physical" contexts. 56 */ 57 58 PetscErrorCode CreateAndLoadMat(const char *mat_name, Mat *mat) 59 { 60 PetscViewer viewer; 61 char file[PETSC_MAX_PATH_LEN]; 62 char flag_name[10] = "-f"; 63 PetscBool flg; 64 65 PetscFunctionBeginUser; 66 PetscCall(PetscOptionsGetString(NULL, NULL, strcat(flag_name, mat_name), file, sizeof(file), &flg)); 67 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -f<mat_name> option"); 68 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer)); 69 PetscCall(MatCreate(PETSC_COMM_WORLD, mat)); 70 PetscCall(MatSetType(*mat, MATAIJ)); 71 PetscCall(PetscObjectSetName((PetscObject)*mat, mat_name)); 72 PetscCall(MatSetFromOptions(*mat)); 73 PetscCall(MatLoad(*mat, viewer)); 74 PetscCall(PetscViewerDestroy(&viewer)); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 typedef struct { 79 Mat U, UT, D, aD, aDinv, I_plus_gammaUTaDinvU; 80 PC smw_cholesky; 81 PetscReal gamma, alpha; 82 PetscBool setup_called; 83 } SmwPCCtx; 84 85 PetscErrorCode SmwSetup(PC pc) 86 { 87 SmwPCCtx *ctx; 88 Vec aDVec; 89 90 PetscFunctionBeginUser; 91 PetscCall(PCShellGetContext(pc, &ctx)); 92 93 if (ctx->setup_called) PetscFunctionReturn(PETSC_SUCCESS); 94 95 // Create aD 96 PetscCall(MatDuplicate(ctx->D, MAT_COPY_VALUES, &ctx->aD)); 97 PetscCall(MatScale(ctx->aD, ctx->alpha)); 98 99 // Create aDinv 100 PetscCall(MatDuplicate(ctx->aD, MAT_DO_NOT_COPY_VALUES, &ctx->aDinv)); 101 PetscCall(MatCreateVecs(ctx->aD, &aDVec, NULL)); 102 PetscCall(MatGetDiagonal(ctx->aD, aDVec)); 103 PetscCall(VecReciprocal(aDVec)); 104 PetscCall(MatDiagonalSet(ctx->aDinv, aDVec, INSERT_VALUES)); 105 106 // Create UT 107 PetscCall(MatTranspose(ctx->U, MAT_INITIAL_MATRIX, &ctx->UT)); 108 109 // Create sum Mat 110 PetscCall(MatMatMatMult(ctx->UT, ctx->aDinv, ctx->U, MAT_INITIAL_MATRIX, PETSC_CURRENT, &ctx->I_plus_gammaUTaDinvU)); 111 PetscCall(MatScale(ctx->I_plus_gammaUTaDinvU, ctx->gamma)); 112 PetscCall(MatShift(ctx->I_plus_gammaUTaDinvU, 1.)); 113 114 PetscCall(PCCreate(PETSC_COMM_WORLD, &ctx->smw_cholesky)); 115 PetscCall(PCSetType(ctx->smw_cholesky, PCCHOLESKY)); 116 PetscCall(PCSetOperators(ctx->smw_cholesky, ctx->I_plus_gammaUTaDinvU, ctx->I_plus_gammaUTaDinvU)); 117 PetscCall(PCSetOptionsPrefix(ctx->smw_cholesky, "smw_")); 118 PetscCall(PCSetFromOptions(ctx->smw_cholesky)); 119 PetscCall(PCSetUp(ctx->smw_cholesky)); 120 121 PetscCall(VecDestroy(&aDVec)); 122 123 ctx->setup_called = PETSC_TRUE; 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 PetscErrorCode SmwApply(PC pc, Vec x, Vec y) 128 { 129 SmwPCCtx *ctx; 130 Vec vel0, pressure0, pressure1; 131 132 PetscFunctionBeginUser; 133 PetscCall(PCShellGetContext(pc, &ctx)); 134 135 PetscCall(MatCreateVecs(ctx->UT, &vel0, &pressure0)); 136 PetscCall(VecDuplicate(pressure0, &pressure1)); 137 138 // First term 139 PetscCall(MatMult(ctx->aDinv, x, vel0)); 140 PetscCall(MatMult(ctx->UT, vel0, pressure0)); 141 PetscCall(PCApply(ctx->smw_cholesky, pressure0, pressure1)); 142 PetscCall(MatMult(ctx->U, pressure1, vel0)); 143 PetscCall(MatMult(ctx->aDinv, vel0, y)); 144 PetscCall(VecScale(y, -ctx->gamma)); 145 146 // Second term 147 PetscCall(MatMult(ctx->aDinv, x, vel0)); 148 149 PetscCall(VecAXPY(y, 1, vel0)); 150 151 PetscCall(VecDestroy(&vel0)); 152 PetscCall(VecDestroy(&pressure0)); 153 PetscCall(VecDestroy(&pressure1)); 154 PetscFunctionReturn(PETSC_SUCCESS); 155 } 156 157 int main(int argc, char **args) 158 { 159 Mat A, B, Q, Acondensed, Bcondensed, BT, J, AplusJ, QInv, D, AplusD, JplusD, U; 160 Mat AplusJarray[2]; 161 Vec bound, x, b, Qdiag, DVec; 162 PetscBool flg; 163 PetscViewer viewer; 164 char file[PETSC_MAX_PATH_LEN]; 165 PetscInt *boundary_indices; 166 PetscInt boundary_indices_size, am, an, bm, bn, condensed_am, astart, aend, Dstart, Dend, num_local_bnd_dofs = 0; 167 const PetscScalar zero = 0; 168 IS boundary_is, bulk_is; 169 KSP ksp; 170 PC pc, pcA, pcJ; 171 PetscRandom rctx; 172 PetscReal *boundary_indices_values; 173 PetscReal gamma = 100, alpha = .01; 174 PetscMPIInt rank; 175 SmwPCCtx ctx; 176 177 PetscFunctionBeginUser; 178 PetscCall(PetscInitialize(&argc, &args, NULL, help)); 179 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 180 181 PetscCall(CreateAndLoadMat("A", &A)); 182 PetscCall(CreateAndLoadMat("B", &B)); 183 PetscCall(CreateAndLoadMat("Q", &Q)); 184 185 PetscCall(PetscOptionsGetString(NULL, NULL, "-fbound", file, sizeof(file), &flg)); 186 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -fbound option"); 187 188 if (rank == 0) { 189 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, file, FILE_MODE_READ, &viewer)); 190 PetscCall(VecCreate(PETSC_COMM_SELF, &bound)); 191 PetscCall(PetscObjectSetName((PetscObject)bound, "bound")); 192 PetscCall(VecSetType(bound, VECSEQ)); 193 PetscCall(VecLoad(bound, viewer)); 194 PetscCall(PetscViewerDestroy(&viewer)); 195 PetscCall(VecGetLocalSize(bound, &boundary_indices_size)); 196 PetscCall(VecGetArray(bound, &boundary_indices_values)); 197 } 198 PetscCallMPI(MPI_Bcast(&boundary_indices_size, 1, MPIU_INT, 0, PETSC_COMM_WORLD)); 199 if (rank != 0) PetscCall(PetscMalloc1(boundary_indices_size, &boundary_indices_values)); 200 PetscCallMPI(MPI_Bcast(boundary_indices_values, (PetscMPIInt)boundary_indices_size, MPIU_SCALAR, 0, PETSC_COMM_WORLD)); 201 202 PetscCall(MatGetSize(A, &am, NULL)); 203 // The total number of dofs for a given velocity component 204 const PetscInt nc = am / 2; 205 PetscCall(MatGetOwnershipRange(A, &astart, &aend)); 206 207 PetscCall(PetscMalloc1(2 * boundary_indices_size, &boundary_indices)); 208 209 // 210 // The dof index ordering appears to be all vx dofs and then all vy dofs. 211 // 212 213 // First do vx 214 for (PetscInt i = 0; i < boundary_indices_size; ++i) { 215 // MATLAB uses 1-based indexing 216 const PetscInt bnd_dof = (PetscInt)boundary_indices_values[i] - 1; 217 if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof; 218 } 219 220 // Now vy 221 for (PetscInt i = 0; i < boundary_indices_size; ++i) { 222 // MATLAB uses 1-based indexing 223 const PetscInt bnd_dof = ((PetscInt)boundary_indices_values[i] - 1) + nc; 224 if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof; 225 } 226 if (rank == 0) PetscCall(VecRestoreArray(bound, &boundary_indices_values)); 227 else PetscCall(PetscFree(boundary_indices_values)); 228 229 PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, num_local_bnd_dofs, boundary_indices, PETSC_USE_POINTER, &boundary_is)); 230 PetscCall(ISComplement(boundary_is, astart, aend, &bulk_is)); 231 232 PetscCall(MatCreateSubMatrix(A, bulk_is, bulk_is, MAT_INITIAL_MATRIX, &Acondensed)); 233 // Can't pass null for row index set :-( 234 PetscCall(MatTranspose(B, MAT_INPLACE_MATRIX, &B)); 235 PetscCall(MatCreateSubMatrix(B, bulk_is, NULL, MAT_INITIAL_MATRIX, &Bcondensed)); 236 PetscCall(MatGetLocalSize(Acondensed, &am, &an)); 237 PetscCall(MatGetLocalSize(Bcondensed, &bm, &bn)); 238 239 // Create QInv 240 PetscCall(MatCreateVecs(Q, &Qdiag, NULL)); 241 PetscCall(MatGetDiagonal(Q, Qdiag)); 242 PetscCall(VecReciprocal(Qdiag)); 243 PetscCall(MatDuplicate(Q, MAT_DO_NOT_COPY_VALUES, &QInv)); 244 PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES)); 245 PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY)); 246 PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY)); 247 248 // Create J 249 PetscCall(MatTranspose(Bcondensed, MAT_INITIAL_MATRIX, &BT)); 250 PetscCall(MatMatMatMult(Bcondensed, QInv, BT, MAT_INITIAL_MATRIX, PETSC_CURRENT, &J)); 251 PetscCall(MatScale(J, gamma)); 252 253 // Create sum of A + J 254 AplusJarray[0] = Acondensed; 255 AplusJarray[1] = J; 256 PetscCall(MatCreateComposite(PETSC_COMM_WORLD, 2, AplusJarray, &AplusJ)); 257 258 // Create decomposition matrices 259 // We've already used Qdiag, which currently represents Q^-1, for its necessary purposes. Let's 260 // convert it to represent Q^(-1/2) 261 PetscCall(VecSqrtAbs(Qdiag)); 262 // We can similarly reuse Qinv 263 PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES)); 264 PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY)); 265 PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY)); 266 // Create U 267 PetscCall(MatMatMult(Bcondensed, QInv, MAT_INITIAL_MATRIX, PETSC_CURRENT, &U)); 268 269 // Create x and b 270 PetscCall(MatCreateVecs(AplusJ, &x, &b)); 271 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx)); 272 PetscCall(VecSetRandom(x, rctx)); 273 PetscCall(PetscRandomDestroy(&rctx)); 274 PetscCall(MatMult(AplusJ, x, b)); 275 276 // Compute preconditioner operators 277 PetscCall(MatGetLocalSize(Acondensed, &condensed_am, NULL)); 278 PetscCall(MatCreate(PETSC_COMM_WORLD, &D)); 279 PetscCall(MatSetType(D, MATAIJ)); 280 PetscCall(MatSetSizes(D, condensed_am, condensed_am, PETSC_DETERMINE, PETSC_DETERMINE)); 281 PetscCall(MatGetOwnershipRange(D, &Dstart, &Dend)); 282 for (PetscInt i = Dstart; i < Dend; ++i) PetscCall(MatSetValues(D, 1, &i, 1, &i, &zero, INSERT_VALUES)); 283 PetscCall(MatAssemblyBegin(D, MAT_FINAL_ASSEMBLY)); 284 PetscCall(MatAssemblyEnd(D, MAT_FINAL_ASSEMBLY)); 285 PetscCall(MatCreateVecs(D, &DVec, NULL)); 286 PetscCall(MatGetDiagonal(AplusJ, DVec)); 287 PetscCall(MatDiagonalSet(D, DVec, INSERT_VALUES)); 288 PetscCall(MatDuplicate(Acondensed, MAT_COPY_VALUES, &AplusD)); 289 PetscCall(MatAXPY(AplusD, alpha, D, SUBSET_NONZERO_PATTERN)); 290 PetscCall(MatDuplicate(J, MAT_COPY_VALUES, &JplusD)); 291 PetscCall(MatAXPY(JplusD, alpha, D, SUBSET_NONZERO_PATTERN)); 292 293 // Initialize our SMW context 294 ctx.U = U; 295 ctx.D = D; 296 ctx.gamma = gamma; 297 ctx.alpha = alpha; 298 ctx.setup_called = PETSC_FALSE; 299 300 // Set preconditioner operators 301 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp)); 302 PetscCall(KSPSetType(ksp, KSPFGMRES)); 303 PetscCall(KSPSetOperators(ksp, AplusJ, AplusJ)); 304 PetscCall(KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED)); 305 PetscCall(KSPGMRESSetRestart(ksp, 300)); 306 PetscCall(KSPGetPC(ksp, &pc)); 307 PetscCall(PCSetType(pc, PCCOMPOSITE)); 308 PetscCall(PCCompositeSetType(pc, PC_COMPOSITE_SPECIAL)); 309 PetscCall(PCCompositeAddPCType(pc, PCILU)); 310 PetscCall(PCCompositeAddPCType(pc, PCSHELL)); 311 PetscCall(PCCompositeGetPC(pc, 0, &pcA)); 312 PetscCall(PCCompositeGetPC(pc, 1, &pcJ)); 313 PetscCall(PCSetOperators(pcA, AplusD, AplusD)); 314 PetscCall(PCSetOperators(pcJ, JplusD, JplusD)); 315 PetscCall(PCShellSetContext(pcJ, &ctx)); 316 PetscCall(PCShellSetApply(pcJ, SmwApply)); 317 PetscCall(PCShellSetSetUp(pcJ, SmwSetup)); 318 PetscCall(PCCompositeSpecialSetAlphaMat(pc, D)); 319 320 // Solve 321 PetscCall(KSPSetFromOptions(ksp)); 322 PetscCall(KSPSolve(ksp, b, x)); 323 324 PetscCall(MatDestroy(&A)); 325 PetscCall(MatDestroy(&B)); 326 PetscCall(MatDestroy(&Q)); 327 PetscCall(MatDestroy(&Acondensed)); 328 PetscCall(MatDestroy(&Bcondensed)); 329 PetscCall(MatDestroy(&BT)); 330 PetscCall(MatDestroy(&J)); 331 PetscCall(MatDestroy(&AplusJ)); 332 PetscCall(MatDestroy(&QInv)); 333 PetscCall(MatDestroy(&D)); 334 PetscCall(MatDestroy(&AplusD)); 335 PetscCall(MatDestroy(&JplusD)); 336 PetscCall(MatDestroy(&U)); 337 if (rank == 0) PetscCall(VecDestroy(&bound)); 338 PetscCall(VecDestroy(&x)); 339 PetscCall(VecDestroy(&b)); 340 PetscCall(VecDestroy(&Qdiag)); 341 PetscCall(VecDestroy(&DVec)); 342 PetscCall(ISDestroy(&boundary_is)); 343 PetscCall(ISDestroy(&bulk_is)); 344 PetscCall(KSPDestroy(&ksp)); 345 PetscCall(PetscFree(boundary_indices)); 346 PetscCall(MatDestroy(&ctx.UT)); 347 PetscCall(MatDestroy(&ctx.I_plus_gammaUTaDinvU)); 348 PetscCall(MatDestroy(&ctx.aD)); 349 PetscCall(MatDestroy(&ctx.aDinv)); 350 PetscCall(PCDestroy(&ctx.smw_cholesky)); 351 352 PetscCall(PetscFinalize()); 353 return 0; 354 } 355 356 /*TEST 357 358 build: 359 requires: !complex 360 361 test: 362 args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor 363 requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double 364 365 test: 366 suffix: 2 367 nsize: 2 368 args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor 369 requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double strumpack 370 371 TEST*/ 372