1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs, mbs; 15 } PC_PBJacobi; 16 17 static PetscErrorCode PCApply_PBJacobi_1(PC pc, Vec x, Vec y) 18 { 19 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 20 PetscInt i, m = jac->mbs; 21 const MatScalar *diag = jac->diag; 22 const PetscScalar *xx; 23 PetscScalar *yy; 24 25 PetscFunctionBegin; 26 PetscCall(VecGetArrayRead(x, &xx)); 27 PetscCall(VecGetArray(y, &yy)); 28 for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i]; 29 PetscCall(VecRestoreArrayRead(x, &xx)); 30 PetscCall(VecRestoreArray(y, &yy)); 31 PetscCall(PetscLogFlops(m)); 32 PetscFunctionReturn(0); 33 } 34 35 static PetscErrorCode PCApply_PBJacobi_2(PC pc, Vec x, Vec y) 36 { 37 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 38 PetscInt i, m = jac->mbs; 39 const MatScalar *diag = jac->diag; 40 PetscScalar x0, x1, *yy; 41 const PetscScalar *xx; 42 43 PetscFunctionBegin; 44 PetscCall(VecGetArrayRead(x, &xx)); 45 PetscCall(VecGetArray(y, &yy)); 46 for (i = 0; i < m; i++) { 47 x0 = xx[2 * i]; 48 x1 = xx[2 * i + 1]; 49 yy[2 * i] = diag[0] * x0 + diag[2] * x1; 50 yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1; 51 diag += 4; 52 } 53 PetscCall(VecRestoreArrayRead(x, &xx)); 54 PetscCall(VecRestoreArray(y, &yy)); 55 PetscCall(PetscLogFlops(6.0 * m)); 56 PetscFunctionReturn(0); 57 } 58 static PetscErrorCode PCApply_PBJacobi_3(PC pc, Vec x, Vec y) 59 { 60 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 61 PetscInt i, m = jac->mbs; 62 const MatScalar *diag = jac->diag; 63 PetscScalar x0, x1, x2, *yy; 64 const PetscScalar *xx; 65 66 PetscFunctionBegin; 67 PetscCall(VecGetArrayRead(x, &xx)); 68 PetscCall(VecGetArray(y, &yy)); 69 for (i = 0; i < m; i++) { 70 x0 = xx[3 * i]; 71 x1 = xx[3 * i + 1]; 72 x2 = xx[3 * i + 2]; 73 74 yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 75 yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 76 yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 77 diag += 9; 78 } 79 PetscCall(VecRestoreArrayRead(x, &xx)); 80 PetscCall(VecRestoreArray(y, &yy)); 81 PetscCall(PetscLogFlops(15.0 * m)); 82 PetscFunctionReturn(0); 83 } 84 static PetscErrorCode PCApply_PBJacobi_4(PC pc, Vec x, Vec y) 85 { 86 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 87 PetscInt i, m = jac->mbs; 88 const MatScalar *diag = jac->diag; 89 PetscScalar x0, x1, x2, x3, *yy; 90 const PetscScalar *xx; 91 92 PetscFunctionBegin; 93 PetscCall(VecGetArrayRead(x, &xx)); 94 PetscCall(VecGetArray(y, &yy)); 95 for (i = 0; i < m; i++) { 96 x0 = xx[4 * i]; 97 x1 = xx[4 * i + 1]; 98 x2 = xx[4 * i + 2]; 99 x3 = xx[4 * i + 3]; 100 101 yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 102 yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 103 yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 104 yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 105 diag += 16; 106 } 107 PetscCall(VecRestoreArrayRead(x, &xx)); 108 PetscCall(VecRestoreArray(y, &yy)); 109 PetscCall(PetscLogFlops(28.0 * m)); 110 PetscFunctionReturn(0); 111 } 112 static PetscErrorCode PCApply_PBJacobi_5(PC pc, Vec x, Vec y) 113 { 114 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 115 PetscInt i, m = jac->mbs; 116 const MatScalar *diag = jac->diag; 117 PetscScalar x0, x1, x2, x3, x4, *yy; 118 const PetscScalar *xx; 119 120 PetscFunctionBegin; 121 PetscCall(VecGetArrayRead(x, &xx)); 122 PetscCall(VecGetArray(y, &yy)); 123 for (i = 0; i < m; i++) { 124 x0 = xx[5 * i]; 125 x1 = xx[5 * i + 1]; 126 x2 = xx[5 * i + 2]; 127 x3 = xx[5 * i + 3]; 128 x4 = xx[5 * i + 4]; 129 130 yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 131 yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 132 yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 133 yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 134 yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 135 diag += 25; 136 } 137 PetscCall(VecRestoreArrayRead(x, &xx)); 138 PetscCall(VecRestoreArray(y, &yy)); 139 PetscCall(PetscLogFlops(45.0 * m)); 140 PetscFunctionReturn(0); 141 } 142 static PetscErrorCode PCApply_PBJacobi_6(PC pc, Vec x, Vec y) 143 { 144 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 145 PetscInt i, m = jac->mbs; 146 const MatScalar *diag = jac->diag; 147 PetscScalar x0, x1, x2, x3, x4, x5, *yy; 148 const PetscScalar *xx; 149 150 PetscFunctionBegin; 151 PetscCall(VecGetArrayRead(x, &xx)); 152 PetscCall(VecGetArray(y, &yy)); 153 for (i = 0; i < m; i++) { 154 x0 = xx[6 * i]; 155 x1 = xx[6 * i + 1]; 156 x2 = xx[6 * i + 2]; 157 x3 = xx[6 * i + 3]; 158 x4 = xx[6 * i + 4]; 159 x5 = xx[6 * i + 5]; 160 161 yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 162 yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 163 yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 164 yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 165 yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 166 yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 167 diag += 36; 168 } 169 PetscCall(VecRestoreArrayRead(x, &xx)); 170 PetscCall(VecRestoreArray(y, &yy)); 171 PetscCall(PetscLogFlops(66.0 * m)); 172 PetscFunctionReturn(0); 173 } 174 static PetscErrorCode PCApply_PBJacobi_7(PC pc, Vec x, Vec y) 175 { 176 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 177 PetscInt i, m = jac->mbs; 178 const MatScalar *diag = jac->diag; 179 PetscScalar x0, x1, x2, x3, x4, x5, x6, *yy; 180 const PetscScalar *xx; 181 182 PetscFunctionBegin; 183 PetscCall(VecGetArrayRead(x, &xx)); 184 PetscCall(VecGetArray(y, &yy)); 185 for (i = 0; i < m; i++) { 186 x0 = xx[7 * i]; 187 x1 = xx[7 * i + 1]; 188 x2 = xx[7 * i + 2]; 189 x3 = xx[7 * i + 3]; 190 x4 = xx[7 * i + 4]; 191 x5 = xx[7 * i + 5]; 192 x6 = xx[7 * i + 6]; 193 194 yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 195 yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 196 yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 197 yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 198 yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 199 yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 200 yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 201 diag += 49; 202 } 203 PetscCall(VecRestoreArrayRead(x, &xx)); 204 PetscCall(VecRestoreArray(y, &yy)); 205 PetscCall(PetscLogFlops(91.0 * m)); /* 2*bs2 - bs */ 206 PetscFunctionReturn(0); 207 } 208 static PetscErrorCode PCApply_PBJacobi_N(PC pc, Vec x, Vec y) 209 { 210 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 211 PetscInt i, ib, jb; 212 const PetscInt m = jac->mbs; 213 const PetscInt bs = jac->bs; 214 const MatScalar *diag = jac->diag; 215 PetscScalar *yy; 216 const PetscScalar *xx; 217 218 PetscFunctionBegin; 219 PetscCall(VecGetArrayRead(x, &xx)); 220 PetscCall(VecGetArray(y, &yy)); 221 for (i = 0; i < m; i++) { 222 for (ib = 0; ib < bs; ib++) { 223 PetscScalar rowsum = 0; 224 for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb]; 225 yy[bs * i + ib] = rowsum; 226 } 227 diag += bs * bs; 228 } 229 PetscCall(VecRestoreArrayRead(x, &xx)); 230 PetscCall(VecRestoreArray(y, &yy)); 231 PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */ 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc, Vec x, Vec y) 236 { 237 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 238 PetscInt i, j, k, m = jac->mbs, bs = jac->bs; 239 const MatScalar *diag = jac->diag; 240 const PetscScalar *xx; 241 PetscScalar *yy; 242 243 PetscFunctionBegin; 244 PetscCall(VecGetArrayRead(x, &xx)); 245 PetscCall(VecGetArray(y, &yy)); 246 for (i = 0; i < m; i++) { 247 for (j = 0; j < bs; j++) yy[i * bs + j] = 0.; 248 for (j = 0; j < bs; j++) { 249 for (k = 0; k < bs; k++) yy[i * bs + k] += diag[k * bs + j] * xx[i * bs + j]; 250 } 251 diag += bs * bs; 252 } 253 PetscCall(VecRestoreArrayRead(x, &xx)); 254 PetscCall(VecRestoreArray(y, &yy)); 255 PetscCall(PetscLogFlops(m * bs * (2 * bs - 1))); 256 PetscFunctionReturn(0); 257 } 258 259 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 260 { 261 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 262 Mat A = pc->pmat; 263 MatFactorError err; 264 PetscInt nlocal; 265 266 PetscFunctionBegin; 267 PetscCall(MatInvertBlockDiagonal(A, &jac->diag)); 268 PetscCall(MatFactorGetError(A, &err)); 269 if (err) pc->failedreason = (PCFailedReason)err; 270 271 PetscCall(MatGetBlockSize(A, &jac->bs)); 272 PetscCall(MatGetLocalSize(A, &nlocal, NULL)); 273 jac->mbs = nlocal / jac->bs; 274 switch (jac->bs) { 275 case 1: 276 pc->ops->apply = PCApply_PBJacobi_1; 277 break; 278 case 2: 279 pc->ops->apply = PCApply_PBJacobi_2; 280 break; 281 case 3: 282 pc->ops->apply = PCApply_PBJacobi_3; 283 break; 284 case 4: 285 pc->ops->apply = PCApply_PBJacobi_4; 286 break; 287 case 5: 288 pc->ops->apply = PCApply_PBJacobi_5; 289 break; 290 case 6: 291 pc->ops->apply = PCApply_PBJacobi_6; 292 break; 293 case 7: 294 pc->ops->apply = PCApply_PBJacobi_7; 295 break; 296 default: 297 pc->ops->apply = PCApply_PBJacobi_N; 298 break; 299 } 300 pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N; 301 PetscFunctionReturn(0); 302 } 303 304 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 305 { 306 PetscFunctionBegin; 307 /* 308 Free the private data structure that was hanging off the PC 309 */ 310 PetscCall(PetscFree(pc->data)); 311 PetscFunctionReturn(0); 312 } 313 314 static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer) 315 { 316 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 317 PetscBool iascii; 318 319 PetscFunctionBegin; 320 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 321 if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs)); 322 PetscFunctionReturn(0); 323 } 324 325 /*MC 326 PCPBJACOBI - Point block Jacobi preconditioner 327 328 Notes: 329 See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks 330 331 This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix 332 333 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 334 is detected a PETSc error is generated. 335 336 Developer Notes: 337 This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow 338 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 339 terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing 340 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 341 342 Perhaps should provide an option that allows generation of a valid preconditioner 343 even if a block is singular as the `PCJACOBI` does. 344 345 Level: beginner 346 347 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI` 348 M*/ 349 350 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 351 { 352 PC_PBJacobi *jac; 353 354 PetscFunctionBegin; 355 /* 356 Creates the private data structure for this preconditioner and 357 attach it to the PC object. 358 */ 359 PetscCall(PetscNew(&jac)); 360 pc->data = (void *)jac; 361 362 /* 363 Initialize the pointers to vectors to ZERO; these will be used to store 364 diagonal entries of the matrix for fast preconditioner application. 365 */ 366 jac->diag = NULL; 367 368 /* 369 Set the pointers for the functions that are provided above. 370 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 371 are called, they will automatically call these functions. Note we 372 choose not to provide a couple of these functions since they are 373 not needed. 374 */ 375 pc->ops->apply = NULL; /*set depending on the block size */ 376 pc->ops->applytranspose = NULL; 377 pc->ops->setup = PCSetUp_PBJacobi; 378 pc->ops->destroy = PCDestroy_PBJacobi; 379 pc->ops->setfromoptions = NULL; 380 pc->ops->view = PCView_PBJacobi; 381 pc->ops->applyrichardson = NULL; 382 pc->ops->applysymmetricleft = NULL; 383 pc->ops->applysymmetricright = NULL; 384 PetscFunctionReturn(0); 385 } 386