1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs, mbs; 15 } PC_PBJacobi; 16 17 static PetscErrorCode PCApply_PBJacobi_1(PC pc, Vec x, Vec y) { 18 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 19 PetscInt i, m = jac->mbs; 20 const MatScalar *diag = jac->diag; 21 const PetscScalar *xx; 22 PetscScalar *yy; 23 24 PetscFunctionBegin; 25 PetscCall(VecGetArrayRead(x, &xx)); 26 PetscCall(VecGetArray(y, &yy)); 27 for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i]; 28 PetscCall(VecRestoreArrayRead(x, &xx)); 29 PetscCall(VecRestoreArray(y, &yy)); 30 PetscCall(PetscLogFlops(m)); 31 PetscFunctionReturn(0); 32 } 33 34 static PetscErrorCode PCApply_PBJacobi_2(PC pc, Vec x, Vec y) { 35 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 36 PetscInt i, m = jac->mbs; 37 const MatScalar *diag = jac->diag; 38 PetscScalar x0, x1, *yy; 39 const PetscScalar *xx; 40 41 PetscFunctionBegin; 42 PetscCall(VecGetArrayRead(x, &xx)); 43 PetscCall(VecGetArray(y, &yy)); 44 for (i = 0; i < m; i++) { 45 x0 = xx[2 * i]; 46 x1 = xx[2 * i + 1]; 47 yy[2 * i] = diag[0] * x0 + diag[2] * x1; 48 yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1; 49 diag += 4; 50 } 51 PetscCall(VecRestoreArrayRead(x, &xx)); 52 PetscCall(VecRestoreArray(y, &yy)); 53 PetscCall(PetscLogFlops(6.0 * m)); 54 PetscFunctionReturn(0); 55 } 56 static PetscErrorCode PCApply_PBJacobi_3(PC pc, Vec x, Vec y) { 57 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 58 PetscInt i, m = jac->mbs; 59 const MatScalar *diag = jac->diag; 60 PetscScalar x0, x1, x2, *yy; 61 const PetscScalar *xx; 62 63 PetscFunctionBegin; 64 PetscCall(VecGetArrayRead(x, &xx)); 65 PetscCall(VecGetArray(y, &yy)); 66 for (i = 0; i < m; i++) { 67 x0 = xx[3 * i]; 68 x1 = xx[3 * i + 1]; 69 x2 = xx[3 * i + 2]; 70 71 yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 72 yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 73 yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 74 diag += 9; 75 } 76 PetscCall(VecRestoreArrayRead(x, &xx)); 77 PetscCall(VecRestoreArray(y, &yy)); 78 PetscCall(PetscLogFlops(15.0 * m)); 79 PetscFunctionReturn(0); 80 } 81 static PetscErrorCode PCApply_PBJacobi_4(PC pc, Vec x, Vec y) { 82 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 83 PetscInt i, m = jac->mbs; 84 const MatScalar *diag = jac->diag; 85 PetscScalar x0, x1, x2, x3, *yy; 86 const PetscScalar *xx; 87 88 PetscFunctionBegin; 89 PetscCall(VecGetArrayRead(x, &xx)); 90 PetscCall(VecGetArray(y, &yy)); 91 for (i = 0; i < m; i++) { 92 x0 = xx[4 * i]; 93 x1 = xx[4 * i + 1]; 94 x2 = xx[4 * i + 2]; 95 x3 = xx[4 * i + 3]; 96 97 yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 98 yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 99 yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 100 yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 101 diag += 16; 102 } 103 PetscCall(VecRestoreArrayRead(x, &xx)); 104 PetscCall(VecRestoreArray(y, &yy)); 105 PetscCall(PetscLogFlops(28.0 * m)); 106 PetscFunctionReturn(0); 107 } 108 static PetscErrorCode PCApply_PBJacobi_5(PC pc, Vec x, Vec y) { 109 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 110 PetscInt i, m = jac->mbs; 111 const MatScalar *diag = jac->diag; 112 PetscScalar x0, x1, x2, x3, x4, *yy; 113 const PetscScalar *xx; 114 115 PetscFunctionBegin; 116 PetscCall(VecGetArrayRead(x, &xx)); 117 PetscCall(VecGetArray(y, &yy)); 118 for (i = 0; i < m; i++) { 119 x0 = xx[5 * i]; 120 x1 = xx[5 * i + 1]; 121 x2 = xx[5 * i + 2]; 122 x3 = xx[5 * i + 3]; 123 x4 = xx[5 * i + 4]; 124 125 yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 126 yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 127 yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 128 yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 129 yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 130 diag += 25; 131 } 132 PetscCall(VecRestoreArrayRead(x, &xx)); 133 PetscCall(VecRestoreArray(y, &yy)); 134 PetscCall(PetscLogFlops(45.0 * m)); 135 PetscFunctionReturn(0); 136 } 137 static PetscErrorCode PCApply_PBJacobi_6(PC pc, Vec x, Vec y) { 138 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 139 PetscInt i, m = jac->mbs; 140 const MatScalar *diag = jac->diag; 141 PetscScalar x0, x1, x2, x3, x4, x5, *yy; 142 const PetscScalar *xx; 143 144 PetscFunctionBegin; 145 PetscCall(VecGetArrayRead(x, &xx)); 146 PetscCall(VecGetArray(y, &yy)); 147 for (i = 0; i < m; i++) { 148 x0 = xx[6 * i]; 149 x1 = xx[6 * i + 1]; 150 x2 = xx[6 * i + 2]; 151 x3 = xx[6 * i + 3]; 152 x4 = xx[6 * i + 4]; 153 x5 = xx[6 * i + 5]; 154 155 yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 156 yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 157 yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 158 yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 159 yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 160 yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 161 diag += 36; 162 } 163 PetscCall(VecRestoreArrayRead(x, &xx)); 164 PetscCall(VecRestoreArray(y, &yy)); 165 PetscCall(PetscLogFlops(66.0 * m)); 166 PetscFunctionReturn(0); 167 } 168 static PetscErrorCode PCApply_PBJacobi_7(PC pc, Vec x, Vec y) { 169 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 170 PetscInt i, m = jac->mbs; 171 const MatScalar *diag = jac->diag; 172 PetscScalar x0, x1, x2, x3, x4, x5, x6, *yy; 173 const PetscScalar *xx; 174 175 PetscFunctionBegin; 176 PetscCall(VecGetArrayRead(x, &xx)); 177 PetscCall(VecGetArray(y, &yy)); 178 for (i = 0; i < m; i++) { 179 x0 = xx[7 * i]; 180 x1 = xx[7 * i + 1]; 181 x2 = xx[7 * i + 2]; 182 x3 = xx[7 * i + 3]; 183 x4 = xx[7 * i + 4]; 184 x5 = xx[7 * i + 5]; 185 x6 = xx[7 * i + 6]; 186 187 yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 188 yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 189 yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 190 yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 191 yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 192 yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 193 yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 194 diag += 49; 195 } 196 PetscCall(VecRestoreArrayRead(x, &xx)); 197 PetscCall(VecRestoreArray(y, &yy)); 198 PetscCall(PetscLogFlops(91.0 * m)); /* 2*bs2 - bs */ 199 PetscFunctionReturn(0); 200 } 201 static PetscErrorCode PCApply_PBJacobi_N(PC pc, Vec x, Vec y) { 202 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 203 PetscInt i, ib, jb; 204 const PetscInt m = jac->mbs; 205 const PetscInt bs = jac->bs; 206 const MatScalar *diag = jac->diag; 207 PetscScalar *yy; 208 const PetscScalar *xx; 209 210 PetscFunctionBegin; 211 PetscCall(VecGetArrayRead(x, &xx)); 212 PetscCall(VecGetArray(y, &yy)); 213 for (i = 0; i < m; i++) { 214 for (ib = 0; ib < bs; ib++) { 215 PetscScalar rowsum = 0; 216 for (jb = 0; jb < bs; jb++) { rowsum += diag[ib + jb * bs] * xx[bs * i + jb]; } 217 yy[bs * i + ib] = rowsum; 218 } 219 diag += bs * bs; 220 } 221 PetscCall(VecRestoreArrayRead(x, &xx)); 222 PetscCall(VecRestoreArray(y, &yy)); 223 PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */ 224 PetscFunctionReturn(0); 225 } 226 227 static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc, Vec x, Vec y) { 228 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 229 PetscInt i, j, k, m = jac->mbs, bs = jac->bs; 230 const MatScalar *diag = jac->diag; 231 const PetscScalar *xx; 232 PetscScalar *yy; 233 234 PetscFunctionBegin; 235 PetscCall(VecGetArrayRead(x, &xx)); 236 PetscCall(VecGetArray(y, &yy)); 237 for (i = 0; i < m; i++) { 238 for (j = 0; j < bs; j++) yy[i * bs + j] = 0.; 239 for (j = 0; j < bs; j++) { 240 for (k = 0; k < bs; k++) { yy[i * bs + k] += diag[k * bs + j] * xx[i * bs + j]; } 241 } 242 diag += bs * bs; 243 } 244 PetscCall(VecRestoreArrayRead(x, &xx)); 245 PetscCall(VecRestoreArray(y, &yy)); 246 PetscCall(PetscLogFlops(m * bs * (2 * bs - 1))); 247 PetscFunctionReturn(0); 248 } 249 250 /* -------------------------------------------------------------------------- */ 251 static PetscErrorCode PCSetUp_PBJacobi(PC pc) { 252 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 253 Mat A = pc->pmat; 254 MatFactorError err; 255 PetscInt nlocal; 256 257 PetscFunctionBegin; 258 PetscCall(MatInvertBlockDiagonal(A, &jac->diag)); 259 PetscCall(MatFactorGetError(A, &err)); 260 if (err) pc->failedreason = (PCFailedReason)err; 261 262 PetscCall(MatGetBlockSize(A, &jac->bs)); 263 PetscCall(MatGetLocalSize(A, &nlocal, NULL)); 264 jac->mbs = nlocal / jac->bs; 265 switch (jac->bs) { 266 case 1: pc->ops->apply = PCApply_PBJacobi_1; break; 267 case 2: pc->ops->apply = PCApply_PBJacobi_2; break; 268 case 3: pc->ops->apply = PCApply_PBJacobi_3; break; 269 case 4: pc->ops->apply = PCApply_PBJacobi_4; break; 270 case 5: pc->ops->apply = PCApply_PBJacobi_5; break; 271 case 6: pc->ops->apply = PCApply_PBJacobi_6; break; 272 case 7: pc->ops->apply = PCApply_PBJacobi_7; break; 273 default: pc->ops->apply = PCApply_PBJacobi_N; break; 274 } 275 pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N; 276 PetscFunctionReturn(0); 277 } 278 /* -------------------------------------------------------------------------- */ 279 static PetscErrorCode PCDestroy_PBJacobi(PC pc) { 280 PetscFunctionBegin; 281 /* 282 Free the private data structure that was hanging off the PC 283 */ 284 PetscCall(PetscFree(pc->data)); 285 PetscFunctionReturn(0); 286 } 287 288 static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer) { 289 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 290 PetscBool iascii; 291 292 PetscFunctionBegin; 293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 294 if (iascii) { PetscCall(PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs)); } 295 PetscFunctionReturn(0); 296 } 297 298 /* -------------------------------------------------------------------------- */ 299 /*MC 300 PCPBJACOBI - Point block Jacobi preconditioner 301 302 Notes: 303 See PCJACOBI for diagonal Jacobi, PCVPBJACOBI for variable-size point block, and PCBJACOBI for large size blocks 304 305 This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix 306 307 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 308 is detected a PETSc error is generated. 309 310 Developer Notes: 311 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 312 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 313 terminating KSP with a KSP_DIVERGED_NANORIF allowing 314 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 315 316 Perhaps should provide an option that allows generation of a valid preconditioner 317 even if a block is singular as the PCJACOBI does. 318 319 Level: beginner 320 321 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI` 322 323 M*/ 324 325 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) { 326 PC_PBJacobi *jac; 327 328 PetscFunctionBegin; 329 /* 330 Creates the private data structure for this preconditioner and 331 attach it to the PC object. 332 */ 333 PetscCall(PetscNewLog(pc, &jac)); 334 pc->data = (void *)jac; 335 336 /* 337 Initialize the pointers to vectors to ZERO; these will be used to store 338 diagonal entries of the matrix for fast preconditioner application. 339 */ 340 jac->diag = NULL; 341 342 /* 343 Set the pointers for the functions that are provided above. 344 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 345 are called, they will automatically call these functions. Note we 346 choose not to provide a couple of these functions since they are 347 not needed. 348 */ 349 pc->ops->apply = NULL; /*set depending on the block size */ 350 pc->ops->applytranspose = NULL; 351 pc->ops->setup = PCSetUp_PBJacobi; 352 pc->ops->destroy = PCDestroy_PBJacobi; 353 pc->ops->setfromoptions = NULL; 354 pc->ops->view = PCView_PBJacobi; 355 pc->ops->applyrichardson = NULL; 356 pc->ops->applysymmetricleft = NULL; 357 pc->ops->applysymmetricright = NULL; 358 PetscFunctionReturn(0); 359 } 360