1 /* 2 Defines a block Jacobi preconditioner. 3 */ 4 5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/ 6 7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat); 8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat); 9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC); 10 11 static PetscErrorCode PCSetUp_BJacobi(PC pc) 12 { 13 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 14 Mat mat = pc->mat, pmat = pc->pmat; 15 PetscBool hasop; 16 PetscInt N, M, start, i, sum, end; 17 PetscInt bs, i_start = -1, i_end = -1; 18 PetscMPIInt rank, size; 19 20 PetscFunctionBegin; 21 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 22 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 23 PetscCall(MatGetLocalSize(pc->pmat, &M, &N)); 24 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 25 26 if (jac->n > 0 && jac->n < size) { 27 PetscCall(PCSetUp_BJacobi_Multiproc(pc)); 28 PetscFunctionReturn(PETSC_SUCCESS); 29 } 30 31 /* Determines the number of blocks assigned to each processor */ 32 /* local block count given */ 33 if (jac->n_local > 0 && jac->n < 0) { 34 PetscCall(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 35 if (jac->l_lens) { /* check that user set these correctly */ 36 sum = 0; 37 for (i = 0; i < jac->n_local; i++) { 38 PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout"); 39 sum += jac->l_lens[i]; 40 } 41 PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly"); 42 } else { 43 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 44 for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)); 45 } 46 } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */ 47 /* global blocks given: determine which ones are local */ 48 if (jac->g_lens) { 49 /* check if the g_lens is has valid entries */ 50 for (i = 0; i < jac->n; i++) { 51 PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed"); 52 PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout"); 53 } 54 if (size == 1) { 55 jac->n_local = jac->n; 56 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 57 PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local)); 58 /* check that user set these correctly */ 59 sum = 0; 60 for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i]; 61 PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly"); 62 } else { 63 PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end)); 64 /* loop over blocks determining first one owned by me */ 65 sum = 0; 66 for (i = 0; i < jac->n + 1; i++) { 67 if (sum == start) { 68 i_start = i; 69 goto start_1; 70 } 71 if (i < jac->n) sum += jac->g_lens[i]; 72 } 73 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout"); 74 start_1: 75 for (i = i_start; i < jac->n + 1; i++) { 76 if (sum == end) { 77 i_end = i; 78 goto end_1; 79 } 80 if (i < jac->n) sum += jac->g_lens[i]; 81 } 82 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout"); 83 end_1: 84 jac->n_local = i_end - i_start; 85 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 86 PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local)); 87 } 88 } else { /* no global blocks given, determine then using default layout */ 89 jac->n_local = jac->n / size + ((jac->n % size) > rank); 90 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 91 for (i = 0; i < jac->n_local; i++) { 92 jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs; 93 PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given"); 94 } 95 } 96 } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */ 97 jac->n = size; 98 jac->n_local = 1; 99 PetscCall(PetscMalloc1(1, &jac->l_lens)); 100 jac->l_lens[0] = M; 101 } else { /* jac->n > 0 && jac->n_local > 0 */ 102 if (!jac->l_lens) { 103 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 104 for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)); 105 } 106 } 107 PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors"); 108 109 /* Determines mat and pmat */ 110 PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop)); 111 if (!hasop && size == 1) { 112 mat = pc->mat; 113 pmat = pc->pmat; 114 } else { 115 if (pc->useAmat) { 116 /* use block from Amat matrix, not Pmat for local MatMult() */ 117 PetscCall(MatGetDiagonalBlock(pc->mat, &mat)); 118 } 119 if (pc->pmat != pc->mat || !pc->useAmat) { 120 PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat)); 121 } else pmat = mat; 122 } 123 124 /* 125 Setup code depends on the number of blocks 126 */ 127 if (jac->n_local == 1) { 128 PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat)); 129 } else { 130 PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat)); 131 } 132 PetscFunctionReturn(PETSC_SUCCESS); 133 } 134 135 /* Default destroy, if it has never been setup */ 136 static PetscErrorCode PCDestroy_BJacobi(PC pc) 137 { 138 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 139 140 PetscFunctionBegin; 141 PetscCall(PetscFree(jac->g_lens)); 142 PetscCall(PetscFree(jac->l_lens)); 143 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL)); 144 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL)); 145 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL)); 146 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL)); 147 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL)); 148 PetscCall(PetscFree(pc->data)); 149 PetscFunctionReturn(PETSC_SUCCESS); 150 } 151 152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject) 153 { 154 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 155 PetscInt blocks, i; 156 PetscBool flg; 157 158 PetscFunctionBegin; 159 PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options"); 160 PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg)); 161 if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL)); 162 PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg)); 163 if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL)); 164 if (jac->ksp) { 165 /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called 166 * unless we had already been called. */ 167 for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i])); 168 } 169 PetscOptionsHeadEnd(); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 #include <petscdraw.h> 174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer) 175 { 176 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 177 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 178 PetscMPIInt rank; 179 PetscInt i; 180 PetscBool iascii, isstring, isdraw; 181 PetscViewer sviewer; 182 PetscViewerFormat format; 183 const char *prefix; 184 185 PetscFunctionBegin; 186 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 187 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 188 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 189 if (iascii) { 190 if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, " using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n)); 191 PetscCall(PetscViewerASCIIPrintf(viewer, " number of blocks = %" PetscInt_FMT "\n", jac->n)); 192 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 193 PetscCall(PetscViewerGetFormat(viewer, &format)); 194 if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) { 195 PetscCall(PetscViewerASCIIPrintf(viewer, " Local solver information for first block is in the following KSP and PC objects on rank 0:\n")); 196 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 197 PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : "")); 198 if (jac->ksp && !jac->psubcomm) { 199 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 200 if (rank == 0) { 201 PetscCall(PetscViewerASCIIPushTab(sviewer)); 202 PetscCall(KSPView(jac->ksp[0], sviewer)); 203 PetscCall(PetscViewerASCIIPopTab(sviewer)); 204 } 205 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 206 /* extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 207 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 208 } else if (mpjac && jac->ksp && mpjac->psubcomm) { 209 PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer)); 210 if (!mpjac->psubcomm->color) { 211 PetscCall(PetscViewerASCIIPushTab(sviewer)); 212 PetscCall(KSPView(*jac->ksp, sviewer)); 213 PetscCall(PetscViewerASCIIPopTab(sviewer)); 214 } 215 PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer)); 216 /* extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 217 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 218 } 219 } else { 220 PetscInt n_global; 221 PetscCall(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 222 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 223 PetscCall(PetscViewerASCIIPrintf(viewer, " Local solver information for each block is in the following KSP and PC objects:\n")); 224 PetscCall(PetscViewerASCIIPushTab(viewer)); 225 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 226 PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local)); 227 for (i = 0; i < jac->n_local; i++) { 228 PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i)); 229 PetscCall(KSPView(jac->ksp[i], sviewer)); 230 PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n")); 231 } 232 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 233 PetscCall(PetscViewerASCIIPopTab(viewer)); 234 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 235 } 236 } else if (isstring) { 237 PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n)); 238 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 239 if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer)); 240 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 241 } else if (isdraw) { 242 PetscDraw draw; 243 char str[25]; 244 PetscReal x, y, bottom, h; 245 246 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 247 PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y)); 248 PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n)); 249 PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h)); 250 bottom = y - h; 251 PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom)); 252 /* warning the communicator on viewer is different then on ksp in parallel */ 253 if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer)); 254 PetscCall(PetscDrawPopCurrentPoint(draw)); 255 } 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp) 260 { 261 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 262 263 PetscFunctionBegin; 264 PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first"); 265 266 if (n_local) *n_local = jac->n_local; 267 if (first_local) *first_local = jac->first_local; 268 if (ksp) *ksp = jac->ksp; 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens) 273 { 274 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 275 276 PetscFunctionBegin; 277 PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called"); 278 jac->n = blocks; 279 if (!lens) jac->g_lens = NULL; 280 else { 281 PetscCall(PetscMalloc1(blocks, &jac->g_lens)); 282 PetscCall(PetscArraycpy(jac->g_lens, lens, blocks)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) 288 { 289 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 290 291 PetscFunctionBegin; 292 *blocks = jac->n; 293 if (lens) *lens = jac->g_lens; 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[]) 298 { 299 PC_BJacobi *jac; 300 301 PetscFunctionBegin; 302 jac = (PC_BJacobi *)pc->data; 303 304 jac->n_local = blocks; 305 if (!lens) jac->l_lens = NULL; 306 else { 307 PetscCall(PetscMalloc1(blocks, &jac->l_lens)); 308 PetscCall(PetscArraycpy(jac->l_lens, lens, blocks)); 309 } 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) 314 { 315 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 316 317 PetscFunctionBegin; 318 *blocks = jac->n_local; 319 if (lens) *lens = jac->l_lens; 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 /*@C 324 PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on 325 this processor. 326 327 Not Collective 328 329 Input Parameter: 330 . pc - the preconditioner context 331 332 Output Parameters: 333 + n_local - the number of blocks on this processor, or NULL 334 . first_local - the global number of the first block on this processor, or NULL 335 - ksp - the array of KSP contexts 336 337 Notes: 338 After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed. 339 340 Currently for some matrix implementations only 1 block per processor 341 is supported. 342 343 You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`. 344 345 Fortran Notes: 346 You must pass in a `KSP` array that is large enough to contain all the local `KSP`s. 347 348 You can call `PCBJacobiGetSubKSP`(pc,nlocal,firstlocal,`PETSC_NULL_KSP`,ierr) to determine how large the 349 `KSP` array must be. 350 351 Level: advanced 352 353 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()` 354 @*/ 355 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[]) 356 { 357 PetscFunctionBegin; 358 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 359 PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp)); 360 PetscFunctionReturn(PETSC_SUCCESS); 361 } 362 363 /*@ 364 PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block 365 Jacobi preconditioner. 366 367 Collective 368 369 Input Parameters: 370 + pc - the preconditioner context 371 . blocks - the number of blocks 372 - lens - [optional] integer array containing the size of each block 373 374 Options Database Key: 375 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks 376 377 Note: 378 Currently only a limited number of blocking configurations are supported. 379 All processors sharing the `PC` must call this routine with the same data. 380 381 Level: intermediate 382 383 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()` 384 @*/ 385 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) 386 { 387 PetscFunctionBegin; 388 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 389 PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks"); 390 PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 /*@C 395 PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block 396 Jacobi, `PCBJACOBI`, preconditioner. 397 398 Not Collective 399 400 Input Parameter: 401 . pc - the preconditioner context 402 403 Output Parameters: 404 + blocks - the number of blocks 405 - lens - integer array containing the size of each block 406 407 Level: intermediate 408 409 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()` 410 @*/ 411 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) 412 { 413 PetscFunctionBegin; 414 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 415 PetscAssertPointer(blocks, 2); 416 PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens)); 417 PetscFunctionReturn(PETSC_SUCCESS); 418 } 419 420 /*@ 421 PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block 422 Jacobi, `PCBJACOBI`, preconditioner. 423 424 Not Collective 425 426 Input Parameters: 427 + pc - the preconditioner context 428 . blocks - the number of blocks 429 - lens - [optional] integer array containing size of each block 430 431 Options Database Key: 432 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks 433 434 Note: 435 Currently only a limited number of blocking configurations are supported. 436 437 Level: intermediate 438 439 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()` 440 @*/ 441 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) 442 { 443 PetscFunctionBegin; 444 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 445 PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks"); 446 PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens)); 447 PetscFunctionReturn(PETSC_SUCCESS); 448 } 449 450 /*@C 451 PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block 452 Jacobi, `PCBJACOBI`, preconditioner. 453 454 Not Collective 455 456 Input Parameters: 457 + pc - the preconditioner context 458 . blocks - the number of blocks 459 - lens - [optional] integer array containing size of each block 460 461 Note: 462 Currently only a limited number of blocking configurations are supported. 463 464 Level: intermediate 465 466 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()` 467 @*/ 468 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) 469 { 470 PetscFunctionBegin; 471 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 472 PetscAssertPointer(blocks, 2); 473 PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens)); 474 PetscFunctionReturn(PETSC_SUCCESS); 475 } 476 477 /*MC 478 PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with 479 its own `KSP` object. 480 481 Options Database Keys: 482 + -pc_use_amat - use Amat to apply block of operator in inner Krylov method 483 - -pc_bjacobi_blocks <n> - use n total blocks 484 485 Notes: 486 See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block 487 488 Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor. 489 490 To set options on the solvers for each block append -sub_ to all the `KSP` and `PC` 491 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 492 493 To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()` 494 and set the options directly on the resulting `KSP` object (you can access its `PC` 495 `KSPGetPC()`) 496 497 For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best 498 performance. Different block partitioning may lead to additional data transfers 499 between host and GPU that lead to degraded performance. 500 501 When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes. 502 503 Level: beginner 504 505 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`, 506 `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`, 507 `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI` 508 M*/ 509 510 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc) 511 { 512 PetscMPIInt rank; 513 PC_BJacobi *jac; 514 515 PetscFunctionBegin; 516 PetscCall(PetscNew(&jac)); 517 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 518 519 pc->ops->apply = NULL; 520 pc->ops->matapply = NULL; 521 pc->ops->applytranspose = NULL; 522 pc->ops->setup = PCSetUp_BJacobi; 523 pc->ops->destroy = PCDestroy_BJacobi; 524 pc->ops->setfromoptions = PCSetFromOptions_BJacobi; 525 pc->ops->view = PCView_BJacobi; 526 pc->ops->applyrichardson = NULL; 527 528 pc->data = (void *)jac; 529 jac->n = -1; 530 jac->n_local = -1; 531 jac->first_local = rank; 532 jac->ksp = NULL; 533 jac->g_lens = NULL; 534 jac->l_lens = NULL; 535 jac->psubcomm = NULL; 536 537 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi)); 538 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi)); 539 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi)); 540 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi)); 541 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi)); 542 PetscFunctionReturn(PETSC_SUCCESS); 543 } 544 545 /* 546 These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI 547 */ 548 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc) 549 { 550 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 551 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 552 553 PetscFunctionBegin; 554 PetscCall(KSPReset(jac->ksp[0])); 555 PetscCall(VecDestroy(&bjac->x)); 556 PetscCall(VecDestroy(&bjac->y)); 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc) 561 { 562 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 563 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 564 565 PetscFunctionBegin; 566 PetscCall(PCReset_BJacobi_Singleblock(pc)); 567 PetscCall(KSPDestroy(&jac->ksp[0])); 568 PetscCall(PetscFree(jac->ksp)); 569 PetscCall(PetscFree(bjac)); 570 PetscCall(PCDestroy_BJacobi(pc)); 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 574 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc) 575 { 576 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 577 KSP subksp = jac->ksp[0]; 578 KSPConvergedReason reason; 579 580 PetscFunctionBegin; 581 PetscCall(KSPSetUp(subksp)); 582 PetscCall(KSPGetConvergedReason(subksp, &reason)); 583 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 584 PetscFunctionReturn(PETSC_SUCCESS); 585 } 586 587 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y) 588 { 589 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 590 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 591 592 PetscFunctionBegin; 593 PetscCall(VecGetLocalVectorRead(x, bjac->x)); 594 PetscCall(VecGetLocalVector(y, bjac->y)); 595 /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner 596 matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild 597 of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/ 598 PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner)); 599 PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y)); 600 PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y)); 601 PetscCall(VecRestoreLocalVectorRead(x, bjac->x)); 602 PetscCall(VecRestoreLocalVector(y, bjac->y)); 603 PetscFunctionReturn(PETSC_SUCCESS); 604 } 605 606 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y) 607 { 608 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 609 Mat sX, sY; 610 611 PetscFunctionBegin; 612 /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner 613 matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild 614 of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/ 615 PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner)); 616 PetscCall(MatDenseGetLocalMatrix(X, &sX)); 617 PetscCall(MatDenseGetLocalMatrix(Y, &sY)); 618 PetscCall(KSPMatSolve(jac->ksp[0], sX, sY)); 619 PetscFunctionReturn(PETSC_SUCCESS); 620 } 621 622 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y) 623 { 624 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 625 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 626 PetscScalar *y_array; 627 const PetscScalar *x_array; 628 PC subpc; 629 630 PetscFunctionBegin; 631 /* 632 The VecPlaceArray() is to avoid having to copy the 633 y vector into the bjac->x vector. The reason for 634 the bjac->x vector is that we need a sequential vector 635 for the sequential solve. 636 */ 637 PetscCall(VecGetArrayRead(x, &x_array)); 638 PetscCall(VecGetArray(y, &y_array)); 639 PetscCall(VecPlaceArray(bjac->x, x_array)); 640 PetscCall(VecPlaceArray(bjac->y, y_array)); 641 /* apply the symmetric left portion of the inner PC operator */ 642 /* note this by-passes the inner KSP and its options completely */ 643 PetscCall(KSPGetPC(jac->ksp[0], &subpc)); 644 PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y)); 645 PetscCall(VecResetArray(bjac->x)); 646 PetscCall(VecResetArray(bjac->y)); 647 PetscCall(VecRestoreArrayRead(x, &x_array)); 648 PetscCall(VecRestoreArray(y, &y_array)); 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y) 653 { 654 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 655 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 656 PetscScalar *y_array; 657 const PetscScalar *x_array; 658 PC subpc; 659 660 PetscFunctionBegin; 661 /* 662 The VecPlaceArray() is to avoid having to copy the 663 y vector into the bjac->x vector. The reason for 664 the bjac->x vector is that we need a sequential vector 665 for the sequential solve. 666 */ 667 PetscCall(VecGetArrayRead(x, &x_array)); 668 PetscCall(VecGetArray(y, &y_array)); 669 PetscCall(VecPlaceArray(bjac->x, x_array)); 670 PetscCall(VecPlaceArray(bjac->y, y_array)); 671 672 /* apply the symmetric right portion of the inner PC operator */ 673 /* note this by-passes the inner KSP and its options completely */ 674 675 PetscCall(KSPGetPC(jac->ksp[0], &subpc)); 676 PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y)); 677 678 PetscCall(VecResetArray(bjac->x)); 679 PetscCall(VecResetArray(bjac->y)); 680 PetscCall(VecRestoreArrayRead(x, &x_array)); 681 PetscCall(VecRestoreArray(y, &y_array)); 682 PetscFunctionReturn(PETSC_SUCCESS); 683 } 684 685 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y) 686 { 687 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 688 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 689 PetscScalar *y_array; 690 const PetscScalar *x_array; 691 692 PetscFunctionBegin; 693 /* 694 The VecPlaceArray() is to avoid having to copy the 695 y vector into the bjac->x vector. The reason for 696 the bjac->x vector is that we need a sequential vector 697 for the sequential solve. 698 */ 699 PetscCall(VecGetArrayRead(x, &x_array)); 700 PetscCall(VecGetArray(y, &y_array)); 701 PetscCall(VecPlaceArray(bjac->x, x_array)); 702 PetscCall(VecPlaceArray(bjac->y, y_array)); 703 PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y)); 704 PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y)); 705 PetscCall(VecResetArray(bjac->x)); 706 PetscCall(VecResetArray(bjac->y)); 707 PetscCall(VecRestoreArrayRead(x, &x_array)); 708 PetscCall(VecRestoreArray(y, &y_array)); 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat) 713 { 714 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 715 PetscInt m; 716 KSP ksp; 717 PC_BJacobi_Singleblock *bjac; 718 PetscBool wasSetup = PETSC_TRUE; 719 VecType vectype; 720 const char *prefix; 721 722 PetscFunctionBegin; 723 if (!pc->setupcalled) { 724 if (!jac->ksp) { 725 PetscInt nestlevel; 726 727 wasSetup = PETSC_FALSE; 728 729 PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp)); 730 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 731 PetscCall(KSPSetNestLevel(ksp, nestlevel + 1)); 732 PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure)); 733 PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1)); 734 PetscCall(KSPSetType(ksp, KSPPREONLY)); 735 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 736 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 737 PetscCall(KSPAppendOptionsPrefix(ksp, "sub_")); 738 739 pc->ops->reset = PCReset_BJacobi_Singleblock; 740 pc->ops->destroy = PCDestroy_BJacobi_Singleblock; 741 pc->ops->apply = PCApply_BJacobi_Singleblock; 742 pc->ops->matapply = PCMatApply_BJacobi_Singleblock; 743 pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Singleblock; 744 pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock; 745 pc->ops->applytranspose = PCApplyTranspose_BJacobi_Singleblock; 746 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Singleblock; 747 748 PetscCall(PetscMalloc1(1, &jac->ksp)); 749 jac->ksp[0] = ksp; 750 751 PetscCall(PetscNew(&bjac)); 752 jac->data = (void *)bjac; 753 } else { 754 ksp = jac->ksp[0]; 755 bjac = (PC_BJacobi_Singleblock *)jac->data; 756 } 757 758 /* 759 The reason we need to generate these vectors is to serve 760 as the right-hand side and solution vector for the solve on the 761 block. We do not need to allocate space for the vectors since 762 that is provided via VecPlaceArray() just before the call to 763 KSPSolve() on the block. 764 */ 765 PetscCall(MatGetSize(pmat, &m, &m)); 766 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x)); 767 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y)); 768 PetscCall(MatGetVecType(pmat, &vectype)); 769 PetscCall(VecSetType(bjac->x, vectype)); 770 PetscCall(VecSetType(bjac->y, vectype)); 771 } else { 772 ksp = jac->ksp[0]; 773 bjac = (PC_BJacobi_Singleblock *)jac->data; 774 } 775 PetscCall(KSPGetOptionsPrefix(ksp, &prefix)); 776 if (pc->useAmat) { 777 PetscCall(KSPSetOperators(ksp, mat, pmat)); 778 PetscCall(MatSetOptionsPrefix(mat, prefix)); 779 } else { 780 PetscCall(KSPSetOperators(ksp, pmat, pmat)); 781 } 782 PetscCall(MatSetOptionsPrefix(pmat, prefix)); 783 if (!wasSetup && pc->setfromoptionscalled) { 784 /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */ 785 PetscCall(KSPSetFromOptions(ksp)); 786 } 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc) 791 { 792 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 793 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 794 PetscInt i; 795 796 PetscFunctionBegin; 797 if (bjac && bjac->pmat) { 798 PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat)); 799 if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat)); 800 } 801 802 for (i = 0; i < jac->n_local; i++) { 803 PetscCall(KSPReset(jac->ksp[i])); 804 if (bjac && bjac->x) { 805 PetscCall(VecDestroy(&bjac->x[i])); 806 PetscCall(VecDestroy(&bjac->y[i])); 807 PetscCall(ISDestroy(&bjac->is[i])); 808 } 809 } 810 PetscCall(PetscFree(jac->l_lens)); 811 PetscCall(PetscFree(jac->g_lens)); 812 PetscFunctionReturn(PETSC_SUCCESS); 813 } 814 815 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc) 816 { 817 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 818 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 819 PetscInt i; 820 821 PetscFunctionBegin; 822 PetscCall(PCReset_BJacobi_Multiblock(pc)); 823 if (bjac) { 824 PetscCall(PetscFree2(bjac->x, bjac->y)); 825 PetscCall(PetscFree(bjac->starts)); 826 PetscCall(PetscFree(bjac->is)); 827 } 828 PetscCall(PetscFree(jac->data)); 829 for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i])); 830 PetscCall(PetscFree(jac->ksp)); 831 PetscCall(PCDestroy_BJacobi(pc)); 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc) 836 { 837 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 838 PetscInt i, n_local = jac->n_local; 839 KSPConvergedReason reason; 840 841 PetscFunctionBegin; 842 for (i = 0; i < n_local; i++) { 843 PetscCall(KSPSetUp(jac->ksp[i])); 844 PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason)); 845 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 846 } 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y) 851 { 852 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 853 PetscInt i, n_local = jac->n_local; 854 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 855 PetscScalar *yin; 856 const PetscScalar *xin; 857 858 PetscFunctionBegin; 859 PetscCall(VecGetArrayRead(x, &xin)); 860 PetscCall(VecGetArray(y, &yin)); 861 for (i = 0; i < n_local; i++) { 862 /* 863 To avoid copying the subvector from x into a workspace we instead 864 make the workspace vector array point to the subpart of the array of 865 the global vector. 866 */ 867 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 868 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 869 870 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 871 PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i])); 872 PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i])); 873 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 874 875 PetscCall(VecResetArray(bjac->x[i])); 876 PetscCall(VecResetArray(bjac->y[i])); 877 } 878 PetscCall(VecRestoreArrayRead(x, &xin)); 879 PetscCall(VecRestoreArray(y, &yin)); 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y) 884 { 885 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 886 PetscInt i, n_local = jac->n_local; 887 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 888 PetscScalar *yin; 889 const PetscScalar *xin; 890 PC subpc; 891 892 PetscFunctionBegin; 893 PetscCall(VecGetArrayRead(x, &xin)); 894 PetscCall(VecGetArray(y, &yin)); 895 for (i = 0; i < n_local; i++) { 896 /* 897 To avoid copying the subvector from x into a workspace we instead 898 make the workspace vector array point to the subpart of the array of 899 the global vector. 900 */ 901 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 902 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 903 904 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 905 /* apply the symmetric left portion of the inner PC operator */ 906 /* note this by-passes the inner KSP and its options completely */ 907 PetscCall(KSPGetPC(jac->ksp[i], &subpc)); 908 PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i])); 909 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 910 911 PetscCall(VecResetArray(bjac->x[i])); 912 PetscCall(VecResetArray(bjac->y[i])); 913 } 914 PetscCall(VecRestoreArrayRead(x, &xin)); 915 PetscCall(VecRestoreArray(y, &yin)); 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y) 920 { 921 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 922 PetscInt i, n_local = jac->n_local; 923 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 924 PetscScalar *yin; 925 const PetscScalar *xin; 926 PC subpc; 927 928 PetscFunctionBegin; 929 PetscCall(VecGetArrayRead(x, &xin)); 930 PetscCall(VecGetArray(y, &yin)); 931 for (i = 0; i < n_local; i++) { 932 /* 933 To avoid copying the subvector from x into a workspace we instead 934 make the workspace vector array point to the subpart of the array of 935 the global vector. 936 */ 937 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 938 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 939 940 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 941 /* apply the symmetric left portion of the inner PC operator */ 942 /* note this by-passes the inner KSP and its options completely */ 943 PetscCall(KSPGetPC(jac->ksp[i], &subpc)); 944 PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i])); 945 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 946 947 PetscCall(VecResetArray(bjac->x[i])); 948 PetscCall(VecResetArray(bjac->y[i])); 949 } 950 PetscCall(VecRestoreArrayRead(x, &xin)); 951 PetscCall(VecRestoreArray(y, &yin)); 952 PetscFunctionReturn(PETSC_SUCCESS); 953 } 954 955 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y) 956 { 957 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 958 PetscInt i, n_local = jac->n_local; 959 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 960 PetscScalar *yin; 961 const PetscScalar *xin; 962 963 PetscFunctionBegin; 964 PetscCall(VecGetArrayRead(x, &xin)); 965 PetscCall(VecGetArray(y, &yin)); 966 for (i = 0; i < n_local; i++) { 967 /* 968 To avoid copying the subvector from x into a workspace we instead 969 make the workspace vector array point to the subpart of the array of 970 the global vector. 971 */ 972 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 973 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 974 975 PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 976 PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i])); 977 PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i])); 978 PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 979 980 PetscCall(VecResetArray(bjac->x[i])); 981 PetscCall(VecResetArray(bjac->y[i])); 982 } 983 PetscCall(VecRestoreArrayRead(x, &xin)); 984 PetscCall(VecRestoreArray(y, &yin)); 985 PetscFunctionReturn(PETSC_SUCCESS); 986 } 987 988 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat) 989 { 990 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 991 PetscInt m, n_local, N, M, start, i; 992 const char *prefix; 993 KSP ksp; 994 Vec x, y; 995 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 996 PC subpc; 997 IS is; 998 MatReuse scall; 999 VecType vectype; 1000 MatNullSpace *nullsp_mat = NULL, *nullsp_pmat = NULL; 1001 1002 PetscFunctionBegin; 1003 PetscCall(MatGetLocalSize(pc->pmat, &M, &N)); 1004 1005 n_local = jac->n_local; 1006 1007 if (pc->useAmat) { 1008 PetscBool same; 1009 PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same)); 1010 PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type"); 1011 } 1012 1013 if (!pc->setupcalled) { 1014 PetscInt nestlevel; 1015 1016 scall = MAT_INITIAL_MATRIX; 1017 1018 if (!jac->ksp) { 1019 pc->ops->reset = PCReset_BJacobi_Multiblock; 1020 pc->ops->destroy = PCDestroy_BJacobi_Multiblock; 1021 pc->ops->apply = PCApply_BJacobi_Multiblock; 1022 pc->ops->matapply = NULL; 1023 pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Multiblock; 1024 pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock; 1025 pc->ops->applytranspose = PCApplyTranspose_BJacobi_Multiblock; 1026 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock; 1027 1028 PetscCall(PetscNew(&bjac)); 1029 PetscCall(PetscMalloc1(n_local, &jac->ksp)); 1030 PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y)); 1031 PetscCall(PetscMalloc1(n_local, &bjac->starts)); 1032 1033 jac->data = (void *)bjac; 1034 PetscCall(PetscMalloc1(n_local, &bjac->is)); 1035 1036 for (i = 0; i < n_local; i++) { 1037 PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp)); 1038 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 1039 PetscCall(KSPSetNestLevel(ksp, nestlevel + 1)); 1040 PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure)); 1041 PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1)); 1042 PetscCall(KSPSetType(ksp, KSPPREONLY)); 1043 PetscCall(KSPGetPC(ksp, &subpc)); 1044 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1045 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 1046 PetscCall(KSPAppendOptionsPrefix(ksp, "sub_")); 1047 1048 jac->ksp[i] = ksp; 1049 } 1050 } else { 1051 bjac = (PC_BJacobi_Multiblock *)jac->data; 1052 } 1053 1054 start = 0; 1055 PetscCall(MatGetVecType(pmat, &vectype)); 1056 for (i = 0; i < n_local; i++) { 1057 m = jac->l_lens[i]; 1058 /* 1059 The reason we need to generate these vectors is to serve 1060 as the right-hand side and solution vector for the solve on the 1061 block. We do not need to allocate space for the vectors since 1062 that is provided via VecPlaceArray() just before the call to 1063 KSPSolve() on the block. 1064 1065 */ 1066 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x)); 1067 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y)); 1068 PetscCall(VecSetType(x, vectype)); 1069 PetscCall(VecSetType(y, vectype)); 1070 1071 bjac->x[i] = x; 1072 bjac->y[i] = y; 1073 bjac->starts[i] = start; 1074 1075 PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is)); 1076 bjac->is[i] = is; 1077 1078 start += m; 1079 } 1080 } else { 1081 bjac = (PC_BJacobi_Multiblock *)jac->data; 1082 /* 1083 Destroy the blocks from the previous iteration 1084 */ 1085 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1086 PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat)); 1087 PetscCall(MatDestroyMatrices(n_local, &bjac->pmat)); 1088 if (pc->useAmat) { 1089 PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat)); 1090 PetscCall(MatDestroyMatrices(n_local, &bjac->mat)); 1091 } 1092 scall = MAT_INITIAL_MATRIX; 1093 } else scall = MAT_REUSE_MATRIX; 1094 } 1095 1096 PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat)); 1097 if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat)); 1098 if (pc->useAmat) { 1099 PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat)); 1100 if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat)); 1101 } 1102 /* Return control to the user so that the submatrices can be modified (e.g., to apply 1103 different boundary conditions for the submatrices than for the global problem) */ 1104 PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP)); 1105 1106 for (i = 0; i < n_local; i++) { 1107 PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix)); 1108 if (pc->useAmat) { 1109 PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i])); 1110 PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix)); 1111 } else { 1112 PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i])); 1113 } 1114 PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix)); 1115 if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i])); 1116 } 1117 PetscFunctionReturn(PETSC_SUCCESS); 1118 } 1119 1120 /* 1121 These are for a single block with multiple processes 1122 */ 1123 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc) 1124 { 1125 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1126 KSP subksp = jac->ksp[0]; 1127 KSPConvergedReason reason; 1128 1129 PetscFunctionBegin; 1130 PetscCall(KSPSetUp(subksp)); 1131 PetscCall(KSPGetConvergedReason(subksp, &reason)); 1132 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc) 1137 { 1138 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1139 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1140 1141 PetscFunctionBegin; 1142 PetscCall(VecDestroy(&mpjac->ysub)); 1143 PetscCall(VecDestroy(&mpjac->xsub)); 1144 PetscCall(MatDestroy(&mpjac->submats)); 1145 if (jac->ksp) PetscCall(KSPReset(jac->ksp[0])); 1146 PetscFunctionReturn(PETSC_SUCCESS); 1147 } 1148 1149 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc) 1150 { 1151 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1152 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1153 1154 PetscFunctionBegin; 1155 PetscCall(PCReset_BJacobi_Multiproc(pc)); 1156 PetscCall(KSPDestroy(&jac->ksp[0])); 1157 PetscCall(PetscFree(jac->ksp)); 1158 PetscCall(PetscSubcommDestroy(&mpjac->psubcomm)); 1159 1160 PetscCall(PetscFree(mpjac)); 1161 PetscCall(PCDestroy_BJacobi(pc)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y) 1166 { 1167 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1168 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1169 PetscScalar *yarray; 1170 const PetscScalar *xarray; 1171 KSPConvergedReason reason; 1172 1173 PetscFunctionBegin; 1174 /* place x's and y's local arrays into xsub and ysub */ 1175 PetscCall(VecGetArrayRead(x, &xarray)); 1176 PetscCall(VecGetArray(y, &yarray)); 1177 PetscCall(VecPlaceArray(mpjac->xsub, xarray)); 1178 PetscCall(VecPlaceArray(mpjac->ysub, yarray)); 1179 1180 /* apply preconditioner on each matrix block */ 1181 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0)); 1182 PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub)); 1183 PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub)); 1184 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0)); 1185 PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason)); 1186 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1187 1188 PetscCall(VecResetArray(mpjac->xsub)); 1189 PetscCall(VecResetArray(mpjac->ysub)); 1190 PetscCall(VecRestoreArrayRead(x, &xarray)); 1191 PetscCall(VecRestoreArray(y, &yarray)); 1192 PetscFunctionReturn(PETSC_SUCCESS); 1193 } 1194 1195 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y) 1196 { 1197 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1198 KSPConvergedReason reason; 1199 Mat sX, sY; 1200 const PetscScalar *x; 1201 PetscScalar *y; 1202 PetscInt m, N, lda, ldb; 1203 1204 PetscFunctionBegin; 1205 /* apply preconditioner on each matrix block */ 1206 PetscCall(MatGetLocalSize(X, &m, NULL)); 1207 PetscCall(MatGetSize(X, NULL, &N)); 1208 PetscCall(MatDenseGetLDA(X, &lda)); 1209 PetscCall(MatDenseGetLDA(Y, &ldb)); 1210 PetscCall(MatDenseGetArrayRead(X, &x)); 1211 PetscCall(MatDenseGetArrayWrite(Y, &y)); 1212 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX)); 1213 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY)); 1214 PetscCall(MatDenseSetLDA(sX, lda)); 1215 PetscCall(MatDenseSetLDA(sY, ldb)); 1216 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0)); 1217 PetscCall(KSPMatSolve(jac->ksp[0], sX, sY)); 1218 PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL)); 1219 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0)); 1220 PetscCall(MatDestroy(&sY)); 1221 PetscCall(MatDestroy(&sX)); 1222 PetscCall(MatDenseRestoreArrayWrite(Y, &y)); 1223 PetscCall(MatDenseRestoreArrayRead(X, &x)); 1224 PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason)); 1225 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1226 PetscFunctionReturn(PETSC_SUCCESS); 1227 } 1228 1229 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc) 1230 { 1231 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1232 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1233 PetscInt m, n; 1234 MPI_Comm comm, subcomm = 0; 1235 const char *prefix; 1236 PetscBool wasSetup = PETSC_TRUE; 1237 VecType vectype; 1238 1239 PetscFunctionBegin; 1240 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 1241 PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported"); 1242 jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */ 1243 if (!pc->setupcalled) { 1244 PetscInt nestlevel; 1245 1246 wasSetup = PETSC_FALSE; 1247 PetscCall(PetscNew(&mpjac)); 1248 jac->data = (void *)mpjac; 1249 1250 /* initialize datastructure mpjac */ 1251 if (!jac->psubcomm) { 1252 /* Create default contiguous subcommunicatiors if user does not provide them */ 1253 PetscCall(PetscSubcommCreate(comm, &jac->psubcomm)); 1254 PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n)); 1255 PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS)); 1256 } 1257 mpjac->psubcomm = jac->psubcomm; 1258 subcomm = PetscSubcommChild(mpjac->psubcomm); 1259 1260 /* Get matrix blocks of pmat */ 1261 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats)); 1262 1263 /* create a new PC that processors in each subcomm have copy of */ 1264 PetscCall(PetscMalloc1(1, &jac->ksp)); 1265 PetscCall(KSPCreate(subcomm, &jac->ksp[0])); 1266 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 1267 PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1)); 1268 PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure)); 1269 PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1)); 1270 PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats)); 1271 PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc)); 1272 1273 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1274 PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix)); 1275 PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_")); 1276 PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix)); 1277 PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix)); 1278 1279 /* create dummy vectors xsub and ysub */ 1280 PetscCall(MatGetLocalSize(mpjac->submats, &m, &n)); 1281 PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub)); 1282 PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub)); 1283 PetscCall(MatGetVecType(mpjac->submats, &vectype)); 1284 PetscCall(VecSetType(mpjac->xsub, vectype)); 1285 PetscCall(VecSetType(mpjac->ysub, vectype)); 1286 1287 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc; 1288 pc->ops->reset = PCReset_BJacobi_Multiproc; 1289 pc->ops->destroy = PCDestroy_BJacobi_Multiproc; 1290 pc->ops->apply = PCApply_BJacobi_Multiproc; 1291 pc->ops->matapply = PCMatApply_BJacobi_Multiproc; 1292 } else { /* pc->setupcalled */ 1293 subcomm = PetscSubcommChild(mpjac->psubcomm); 1294 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1295 /* destroy old matrix blocks, then get new matrix blocks */ 1296 if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats)); 1297 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats)); 1298 } else { 1299 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats)); 1300 } 1301 PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats)); 1302 } 1303 1304 if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0])); 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } 1307