1 /* 2 Defines a block Jacobi preconditioner. 3 */ 4 5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/ 6 7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat); 8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat); 9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC); 10 11 static PetscErrorCode PCSetUp_BJacobi(PC pc) 12 { 13 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 14 Mat mat = pc->mat, pmat = pc->pmat; 15 PetscBool hasop; 16 PetscInt N, M, start, i, sum, end; 17 PetscInt bs, i_start = -1, i_end = -1; 18 PetscMPIInt rank, size; 19 20 PetscFunctionBegin; 21 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 22 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 23 PetscCall(MatGetLocalSize(pc->pmat, &M, &N)); 24 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 25 26 if (jac->n > 0 && jac->n < size) { 27 PetscCall(PCSetUp_BJacobi_Multiproc(pc)); 28 PetscFunctionReturn(PETSC_SUCCESS); 29 } 30 31 /* Determines the number of blocks assigned to each processor */ 32 /* local block count given */ 33 if (jac->n_local > 0 && jac->n < 0) { 34 PetscCallMPI(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 35 if (jac->l_lens) { /* check that user set these correctly */ 36 sum = 0; 37 for (i = 0; i < jac->n_local; i++) { 38 PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout"); 39 sum += jac->l_lens[i]; 40 } 41 PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly"); 42 } else { 43 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 44 for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)); 45 } 46 } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */ 47 /* global blocks given: determine which ones are local */ 48 if (jac->g_lens) { 49 /* check if the g_lens is has valid entries */ 50 for (i = 0; i < jac->n; i++) { 51 PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed"); 52 PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout"); 53 } 54 if (size == 1) { 55 jac->n_local = jac->n; 56 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 57 PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local)); 58 /* check that user set these correctly */ 59 sum = 0; 60 for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i]; 61 PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly"); 62 } else { 63 PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end)); 64 /* loop over blocks determining first one owned by me */ 65 sum = 0; 66 for (i = 0; i < jac->n + 1; i++) { 67 if (sum == start) { 68 i_start = i; 69 goto start_1; 70 } 71 if (i < jac->n) sum += jac->g_lens[i]; 72 } 73 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout"); 74 start_1: 75 for (i = i_start; i < jac->n + 1; i++) { 76 if (sum == end) { 77 i_end = i; 78 goto end_1; 79 } 80 if (i < jac->n) sum += jac->g_lens[i]; 81 } 82 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout"); 83 end_1: 84 jac->n_local = i_end - i_start; 85 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 86 PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local)); 87 } 88 } else { /* no global blocks given, determine then using default layout */ 89 jac->n_local = jac->n / size + ((jac->n % size) > rank); 90 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 91 for (i = 0; i < jac->n_local; i++) { 92 jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs; 93 PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given"); 94 } 95 } 96 } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */ 97 jac->n = size; 98 jac->n_local = 1; 99 PetscCall(PetscMalloc1(1, &jac->l_lens)); 100 jac->l_lens[0] = M; 101 } else { /* jac->n > 0 && jac->n_local > 0 */ 102 if (!jac->l_lens) { 103 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 104 for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)); 105 } 106 } 107 PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors"); 108 109 /* Determines mat and pmat */ 110 PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop)); 111 if (!hasop && size == 1) { 112 mat = pc->mat; 113 pmat = pc->pmat; 114 } else { 115 if (pc->useAmat) { 116 /* use block from Amat matrix, not Pmat for local MatMult() */ 117 PetscCall(MatGetDiagonalBlock(pc->mat, &mat)); 118 } 119 if (pc->pmat != pc->mat || !pc->useAmat) { 120 PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat)); 121 } else pmat = mat; 122 } 123 124 /* 125 Setup code depends on the number of blocks 126 */ 127 if (jac->n_local == 1) { 128 PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat)); 129 } else { 130 PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat)); 131 } 132 PetscFunctionReturn(PETSC_SUCCESS); 133 } 134 135 /* Default destroy, if it has never been setup */ 136 static PetscErrorCode PCDestroy_BJacobi(PC pc) 137 { 138 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 139 140 PetscFunctionBegin; 141 PetscCall(PetscFree(jac->g_lens)); 142 PetscCall(PetscFree(jac->l_lens)); 143 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL)); 144 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL)); 145 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL)); 146 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL)); 147 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL)); 148 PetscCall(PetscFree(pc->data)); 149 PetscFunctionReturn(PETSC_SUCCESS); 150 } 151 152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems PetscOptionsObject) 153 { 154 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 155 PetscInt blocks, i; 156 PetscBool flg; 157 158 PetscFunctionBegin; 159 PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options"); 160 PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg)); 161 if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL)); 162 PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg)); 163 if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL)); 164 if (jac->ksp) { 165 /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called 166 * unless we had already been called. */ 167 for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i])); 168 } 169 PetscOptionsHeadEnd(); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 #include <petscdraw.h> 174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer) 175 { 176 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 177 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 178 PetscMPIInt rank; 179 PetscInt i; 180 PetscBool iascii, isstring, isdraw; 181 PetscViewer sviewer; 182 PetscViewerFormat format; 183 const char *prefix; 184 185 PetscFunctionBegin; 186 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 187 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 188 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 189 if (iascii) { 190 if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, " using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n)); 191 PetscCall(PetscViewerASCIIPrintf(viewer, " number of blocks = %" PetscInt_FMT "\n", jac->n)); 192 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 193 PetscCall(PetscViewerGetFormat(viewer, &format)); 194 if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) { 195 PetscCall(PetscViewerASCIIPrintf(viewer, " Local solver information for first block is in the following KSP and PC objects on rank 0:\n")); 196 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 197 PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : "")); 198 if (jac->ksp && !jac->psubcomm) { 199 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 200 if (rank == 0) { 201 PetscCall(PetscViewerASCIIPushTab(sviewer)); 202 PetscCall(KSPView(jac->ksp[0], sviewer)); 203 PetscCall(PetscViewerASCIIPopTab(sviewer)); 204 } 205 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 206 /* extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 207 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 208 } else if (mpjac && jac->ksp && mpjac->psubcomm) { 209 PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer)); 210 if (!mpjac->psubcomm->color) { 211 PetscCall(PetscViewerASCIIPushTab(sviewer)); 212 PetscCall(KSPView(*jac->ksp, sviewer)); 213 PetscCall(PetscViewerASCIIPopTab(sviewer)); 214 } 215 PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer)); 216 /* extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 217 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 218 } 219 } else { 220 PetscInt n_global; 221 PetscCallMPI(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 222 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 223 PetscCall(PetscViewerASCIIPrintf(viewer, " Local solver information for each block is in the following KSP and PC objects:\n")); 224 PetscCall(PetscViewerASCIIPushTab(viewer)); 225 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 226 PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local)); 227 for (i = 0; i < jac->n_local; i++) { 228 PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i)); 229 PetscCall(KSPView(jac->ksp[i], sviewer)); 230 PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n")); 231 } 232 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 233 PetscCall(PetscViewerASCIIPopTab(viewer)); 234 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 235 } 236 } else if (isstring) { 237 PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n)); 238 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 239 if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer)); 240 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 241 } else if (isdraw) { 242 PetscDraw draw; 243 char str[25]; 244 PetscReal x, y, bottom, h; 245 246 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 247 PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y)); 248 PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n)); 249 PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h)); 250 bottom = y - h; 251 PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom)); 252 /* warning the communicator on viewer is different then on ksp in parallel */ 253 if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer)); 254 PetscCall(PetscDrawPopCurrentPoint(draw)); 255 } 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp) 260 { 261 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 262 263 PetscFunctionBegin; 264 PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first"); 265 266 if (n_local) *n_local = jac->n_local; 267 if (first_local) *first_local = jac->first_local; 268 if (ksp) *ksp = jac->ksp; 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens) 273 { 274 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 275 276 PetscFunctionBegin; 277 PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called"); 278 jac->n = blocks; 279 if (!lens) jac->g_lens = NULL; 280 else { 281 PetscCall(PetscMalloc1(blocks, &jac->g_lens)); 282 PetscCall(PetscArraycpy(jac->g_lens, lens, blocks)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) 288 { 289 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 290 291 PetscFunctionBegin; 292 *blocks = jac->n; 293 if (lens) *lens = jac->g_lens; 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[]) 298 { 299 PC_BJacobi *jac; 300 301 PetscFunctionBegin; 302 jac = (PC_BJacobi *)pc->data; 303 304 jac->n_local = blocks; 305 if (!lens) jac->l_lens = NULL; 306 else { 307 PetscCall(PetscMalloc1(blocks, &jac->l_lens)); 308 PetscCall(PetscArraycpy(jac->l_lens, lens, blocks)); 309 } 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) 314 { 315 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 316 317 PetscFunctionBegin; 318 *blocks = jac->n_local; 319 if (lens) *lens = jac->l_lens; 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 /*@C 324 PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on 325 this processor. 326 327 Not Collective 328 329 Input Parameter: 330 . pc - the preconditioner context 331 332 Output Parameters: 333 + n_local - the number of blocks on this processor, or NULL 334 . first_local - the global number of the first block on this processor, or NULL 335 - ksp - the array of KSP contexts 336 337 Level: advanced 338 339 Notes: 340 After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed. 341 342 Currently for some matrix implementations only 1 block per processor 343 is supported. 344 345 You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`. 346 347 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()` 348 @*/ 349 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[]) 350 { 351 PetscFunctionBegin; 352 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 353 PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /*@ 358 PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block 359 Jacobi preconditioner. 360 361 Collective 362 363 Input Parameters: 364 + pc - the preconditioner context 365 . blocks - the number of blocks 366 - lens - [optional] integer array containing the size of each block 367 368 Options Database Key: 369 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks 370 371 Level: intermediate 372 373 Note: 374 Currently only a limited number of blocking configurations are supported. 375 All processors sharing the `PC` must call this routine with the same data. 376 377 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()` 378 @*/ 379 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) 380 { 381 PetscFunctionBegin; 382 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 383 PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks"); 384 PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens)); 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 /*@C 389 PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block 390 Jacobi, `PCBJACOBI`, preconditioner. 391 392 Not Collective 393 394 Input Parameter: 395 . pc - the preconditioner context 396 397 Output Parameters: 398 + blocks - the number of blocks 399 - lens - integer array containing the size of each block 400 401 Level: intermediate 402 403 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()` 404 @*/ 405 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) 406 { 407 PetscFunctionBegin; 408 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 409 PetscAssertPointer(blocks, 2); 410 PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens)); 411 PetscFunctionReturn(PETSC_SUCCESS); 412 } 413 414 /*@ 415 PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block 416 Jacobi, `PCBJACOBI`, preconditioner. 417 418 Not Collective 419 420 Input Parameters: 421 + pc - the preconditioner context 422 . blocks - the number of blocks 423 - lens - [optional] integer array containing size of each block 424 425 Options Database Key: 426 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks 427 428 Level: intermediate 429 430 Note: 431 Currently only a limited number of blocking configurations are supported. 432 433 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()` 434 @*/ 435 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) 436 { 437 PetscFunctionBegin; 438 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 439 PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks"); 440 PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens)); 441 PetscFunctionReturn(PETSC_SUCCESS); 442 } 443 444 /*@C 445 PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block 446 Jacobi, `PCBJACOBI`, preconditioner. 447 448 Not Collective 449 450 Input Parameters: 451 + pc - the preconditioner context 452 . blocks - the number of blocks 453 - lens - [optional] integer array containing size of each block 454 455 Level: intermediate 456 457 Note: 458 Currently only a limited number of blocking configurations are supported. 459 460 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()` 461 @*/ 462 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) 463 { 464 PetscFunctionBegin; 465 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 466 PetscAssertPointer(blocks, 2); 467 PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens)); 468 PetscFunctionReturn(PETSC_SUCCESS); 469 } 470 471 /*MC 472 PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with 473 its own `KSP` object. 474 475 Options Database Keys: 476 + -pc_use_amat - use Amat to apply block of operator in inner Krylov method 477 - -pc_bjacobi_blocks <n> - use n total blocks 478 479 Level: beginner 480 481 Notes: 482 See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block 483 484 Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor. 485 486 To set options on the solvers for each block append -sub_ to all the `KSP` and `PC` 487 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 488 489 To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()` 490 and set the options directly on the resulting `KSP` object (you can access its `PC` 491 `KSPGetPC()`) 492 493 For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best 494 performance. Different block partitioning may lead to additional data transfers 495 between host and GPU that lead to degraded performance. 496 497 When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes. 498 499 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`, 500 `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`, 501 `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI` 502 M*/ 503 504 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc) 505 { 506 PetscMPIInt rank; 507 PC_BJacobi *jac; 508 509 PetscFunctionBegin; 510 PetscCall(PetscNew(&jac)); 511 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 512 513 pc->ops->apply = NULL; 514 pc->ops->matapply = NULL; 515 pc->ops->applytranspose = NULL; 516 pc->ops->matapplytranspose = NULL; 517 pc->ops->setup = PCSetUp_BJacobi; 518 pc->ops->destroy = PCDestroy_BJacobi; 519 pc->ops->setfromoptions = PCSetFromOptions_BJacobi; 520 pc->ops->view = PCView_BJacobi; 521 pc->ops->applyrichardson = NULL; 522 523 pc->data = (void *)jac; 524 jac->n = -1; 525 jac->n_local = -1; 526 jac->first_local = rank; 527 jac->ksp = NULL; 528 jac->g_lens = NULL; 529 jac->l_lens = NULL; 530 jac->psubcomm = NULL; 531 532 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi)); 533 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi)); 534 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi)); 535 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi)); 536 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi)); 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 /* 541 These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI 542 */ 543 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc) 544 { 545 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 546 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 547 548 PetscFunctionBegin; 549 PetscCall(KSPReset(jac->ksp[0])); 550 PetscCall(VecDestroy(&bjac->x)); 551 PetscCall(VecDestroy(&bjac->y)); 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc) 556 { 557 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 558 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 559 560 PetscFunctionBegin; 561 PetscCall(PCReset_BJacobi_Singleblock(pc)); 562 PetscCall(KSPDestroy(&jac->ksp[0])); 563 PetscCall(PetscFree(jac->ksp)); 564 PetscCall(PetscFree(bjac)); 565 PetscCall(PCDestroy_BJacobi(pc)); 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc) 570 { 571 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 572 KSP subksp = jac->ksp[0]; 573 KSPConvergedReason reason; 574 575 PetscFunctionBegin; 576 PetscCall(KSPSetUp(subksp)); 577 PetscCall(KSPGetConvergedReason(subksp, &reason)); 578 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 579 PetscFunctionReturn(PETSC_SUCCESS); 580 } 581 582 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y) 583 { 584 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 585 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 586 587 PetscFunctionBegin; 588 PetscCall(VecGetLocalVectorRead(x, bjac->x)); 589 PetscCall(VecGetLocalVector(y, bjac->y)); 590 /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner 591 matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild 592 of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/ 593 PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner)); 594 PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y)); 595 PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y)); 596 PetscCall(VecRestoreLocalVectorRead(x, bjac->x)); 597 PetscCall(VecRestoreLocalVector(y, bjac->y)); 598 PetscFunctionReturn(PETSC_SUCCESS); 599 } 600 601 static PetscErrorCode PCMatApply_BJacobi_Singleblock_Private(PC pc, Mat X, Mat Y, PetscBool transpose) 602 { 603 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 604 Mat sX, sY; 605 606 PetscFunctionBegin; 607 /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner 608 matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild 609 of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/ 610 PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner)); 611 PetscCall(MatDenseGetLocalMatrix(X, &sX)); 612 PetscCall(MatDenseGetLocalMatrix(Y, &sY)); 613 if (!transpose) PetscCall(KSPMatSolve(jac->ksp[0], sX, sY)); 614 else PetscCall(KSPMatSolveTranspose(jac->ksp[0], sX, sY)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y) 619 { 620 PetscFunctionBegin; 621 PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_FALSE)); 622 PetscFunctionReturn(PETSC_SUCCESS); 623 } 624 625 static PetscErrorCode PCMatApplyTranspose_BJacobi_Singleblock(PC pc, Mat X, Mat Y) 626 { 627 PetscFunctionBegin; 628 PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_TRUE)); 629 PetscFunctionReturn(PETSC_SUCCESS); 630 } 631 632 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y) 633 { 634 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 635 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 636 PetscScalar *y_array; 637 const PetscScalar *x_array; 638 PC subpc; 639 640 PetscFunctionBegin; 641 /* 642 The VecPlaceArray() is to avoid having to copy the 643 y vector into the bjac->x vector. The reason for 644 the bjac->x vector is that we need a sequential vector 645 for the sequential solve. 646 */ 647 PetscCall(VecGetArrayRead(x, &x_array)); 648 PetscCall(VecGetArray(y, &y_array)); 649 PetscCall(VecPlaceArray(bjac->x, x_array)); 650 PetscCall(VecPlaceArray(bjac->y, y_array)); 651 /* apply the symmetric left portion of the inner PC operator */ 652 /* note this bypasses the inner KSP and its options completely */ 653 PetscCall(KSPGetPC(jac->ksp[0], &subpc)); 654 PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y)); 655 PetscCall(VecResetArray(bjac->x)); 656 PetscCall(VecResetArray(bjac->y)); 657 PetscCall(VecRestoreArrayRead(x, &x_array)); 658 PetscCall(VecRestoreArray(y, &y_array)); 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y) 663 { 664 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 665 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 666 PetscScalar *y_array; 667 const PetscScalar *x_array; 668 PC subpc; 669 670 PetscFunctionBegin; 671 /* 672 The VecPlaceArray() is to avoid having to copy the 673 y vector into the bjac->x vector. The reason for 674 the bjac->x vector is that we need a sequential vector 675 for the sequential solve. 676 */ 677 PetscCall(VecGetArrayRead(x, &x_array)); 678 PetscCall(VecGetArray(y, &y_array)); 679 PetscCall(VecPlaceArray(bjac->x, x_array)); 680 PetscCall(VecPlaceArray(bjac->y, y_array)); 681 682 /* apply the symmetric right portion of the inner PC operator */ 683 /* note this bypasses the inner KSP and its options completely */ 684 685 PetscCall(KSPGetPC(jac->ksp[0], &subpc)); 686 PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y)); 687 688 PetscCall(VecResetArray(bjac->x)); 689 PetscCall(VecResetArray(bjac->y)); 690 PetscCall(VecRestoreArrayRead(x, &x_array)); 691 PetscCall(VecRestoreArray(y, &y_array)); 692 PetscFunctionReturn(PETSC_SUCCESS); 693 } 694 695 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y) 696 { 697 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 698 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 699 PetscScalar *y_array; 700 const PetscScalar *x_array; 701 702 PetscFunctionBegin; 703 /* 704 The VecPlaceArray() is to avoid having to copy the 705 y vector into the bjac->x vector. The reason for 706 the bjac->x vector is that we need a sequential vector 707 for the sequential solve. 708 */ 709 PetscCall(VecGetArrayRead(x, &x_array)); 710 PetscCall(VecGetArray(y, &y_array)); 711 PetscCall(VecPlaceArray(bjac->x, x_array)); 712 PetscCall(VecPlaceArray(bjac->y, y_array)); 713 PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y)); 714 PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y)); 715 PetscCall(VecResetArray(bjac->x)); 716 PetscCall(VecResetArray(bjac->y)); 717 PetscCall(VecRestoreArrayRead(x, &x_array)); 718 PetscCall(VecRestoreArray(y, &y_array)); 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat) 723 { 724 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 725 PetscInt m; 726 KSP ksp; 727 PC_BJacobi_Singleblock *bjac; 728 PetscBool wasSetup = PETSC_TRUE; 729 VecType vectype; 730 const char *prefix; 731 732 PetscFunctionBegin; 733 if (!pc->setupcalled) { 734 if (!jac->ksp) { 735 PetscInt nestlevel; 736 737 wasSetup = PETSC_FALSE; 738 739 PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp)); 740 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 741 PetscCall(KSPSetNestLevel(ksp, nestlevel + 1)); 742 PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure)); 743 PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1)); 744 PetscCall(KSPSetType(ksp, KSPPREONLY)); 745 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 746 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 747 PetscCall(KSPAppendOptionsPrefix(ksp, "sub_")); 748 749 pc->ops->reset = PCReset_BJacobi_Singleblock; 750 pc->ops->destroy = PCDestroy_BJacobi_Singleblock; 751 pc->ops->apply = PCApply_BJacobi_Singleblock; 752 pc->ops->matapply = PCMatApply_BJacobi_Singleblock; 753 pc->ops->matapplytranspose = PCMatApplyTranspose_BJacobi_Singleblock; 754 pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Singleblock; 755 pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock; 756 pc->ops->applytranspose = PCApplyTranspose_BJacobi_Singleblock; 757 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Singleblock; 758 759 PetscCall(PetscMalloc1(1, &jac->ksp)); 760 jac->ksp[0] = ksp; 761 762 PetscCall(PetscNew(&bjac)); 763 jac->data = (void *)bjac; 764 } else { 765 ksp = jac->ksp[0]; 766 bjac = (PC_BJacobi_Singleblock *)jac->data; 767 } 768 769 /* 770 The reason we need to generate these vectors is to serve 771 as the right-hand side and solution vector for the solve on the 772 block. We do not need to allocate space for the vectors since 773 that is provided via VecPlaceArray() just before the call to 774 KSPSolve() on the block. 775 */ 776 PetscCall(MatGetSize(pmat, &m, &m)); 777 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x)); 778 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y)); 779 PetscCall(MatGetVecType(pmat, &vectype)); 780 PetscCall(VecSetType(bjac->x, vectype)); 781 PetscCall(VecSetType(bjac->y, vectype)); 782 } else { 783 ksp = jac->ksp[0]; 784 bjac = (PC_BJacobi_Singleblock *)jac->data; 785 } 786 PetscCall(KSPGetOptionsPrefix(ksp, &prefix)); 787 if (pc->useAmat) { 788 PetscCall(KSPSetOperators(ksp, mat, pmat)); 789 PetscCall(MatSetOptionsPrefix(mat, prefix)); 790 } else { 791 PetscCall(KSPSetOperators(ksp, pmat, pmat)); 792 } 793 PetscCall(MatSetOptionsPrefix(pmat, prefix)); 794 if (!wasSetup && pc->setfromoptionscalled) { 795 /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */ 796 PetscCall(KSPSetFromOptions(ksp)); 797 } 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc) 802 { 803 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 804 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 805 PetscInt i; 806 807 PetscFunctionBegin; 808 if (bjac && bjac->pmat) { 809 PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat)); 810 if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat)); 811 } 812 813 for (i = 0; i < jac->n_local; i++) { 814 PetscCall(KSPReset(jac->ksp[i])); 815 if (bjac && bjac->x) { 816 PetscCall(VecDestroy(&bjac->x[i])); 817 PetscCall(VecDestroy(&bjac->y[i])); 818 PetscCall(ISDestroy(&bjac->is[i])); 819 } 820 } 821 PetscCall(PetscFree(jac->l_lens)); 822 PetscCall(PetscFree(jac->g_lens)); 823 PetscFunctionReturn(PETSC_SUCCESS); 824 } 825 826 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc) 827 { 828 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 829 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 830 PetscInt i; 831 832 PetscFunctionBegin; 833 PetscCall(PCReset_BJacobi_Multiblock(pc)); 834 if (bjac) { 835 PetscCall(PetscFree2(bjac->x, bjac->y)); 836 PetscCall(PetscFree(bjac->starts)); 837 PetscCall(PetscFree(bjac->is)); 838 } 839 PetscCall(PetscFree(jac->data)); 840 for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i])); 841 PetscCall(PetscFree(jac->ksp)); 842 PetscCall(PCDestroy_BJacobi(pc)); 843 PetscFunctionReturn(PETSC_SUCCESS); 844 } 845 846 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc) 847 { 848 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 849 PetscInt i, n_local = jac->n_local; 850 KSPConvergedReason reason; 851 852 PetscFunctionBegin; 853 for (i = 0; i < n_local; i++) { 854 PetscCall(KSPSetUp(jac->ksp[i])); 855 PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason)); 856 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 857 } 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y) 862 { 863 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 864 PetscInt i, n_local = jac->n_local; 865 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 866 PetscScalar *yin; 867 const PetscScalar *xin; 868 869 PetscFunctionBegin; 870 PetscCall(VecGetArrayRead(x, &xin)); 871 PetscCall(VecGetArray(y, &yin)); 872 for (i = 0; i < n_local; i++) { 873 /* 874 To avoid copying the subvector from x into a workspace we instead 875 make the workspace vector array point to the subpart of the array of 876 the global vector. 877 */ 878 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 879 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 880 881 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 882 PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i])); 883 PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i])); 884 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 885 886 PetscCall(VecResetArray(bjac->x[i])); 887 PetscCall(VecResetArray(bjac->y[i])); 888 } 889 PetscCall(VecRestoreArrayRead(x, &xin)); 890 PetscCall(VecRestoreArray(y, &yin)); 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y) 895 { 896 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 897 PetscInt i, n_local = jac->n_local; 898 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 899 PetscScalar *yin; 900 const PetscScalar *xin; 901 PC subpc; 902 903 PetscFunctionBegin; 904 PetscCall(VecGetArrayRead(x, &xin)); 905 PetscCall(VecGetArray(y, &yin)); 906 for (i = 0; i < n_local; i++) { 907 /* 908 To avoid copying the subvector from x into a workspace we instead 909 make the workspace vector array point to the subpart of the array of 910 the global vector. 911 */ 912 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 913 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 914 915 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 916 /* apply the symmetric left portion of the inner PC operator */ 917 /* note this bypasses the inner KSP and its options completely */ 918 PetscCall(KSPGetPC(jac->ksp[i], &subpc)); 919 PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i])); 920 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 921 922 PetscCall(VecResetArray(bjac->x[i])); 923 PetscCall(VecResetArray(bjac->y[i])); 924 } 925 PetscCall(VecRestoreArrayRead(x, &xin)); 926 PetscCall(VecRestoreArray(y, &yin)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 930 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y) 931 { 932 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 933 PetscInt i, n_local = jac->n_local; 934 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 935 PetscScalar *yin; 936 const PetscScalar *xin; 937 PC subpc; 938 939 PetscFunctionBegin; 940 PetscCall(VecGetArrayRead(x, &xin)); 941 PetscCall(VecGetArray(y, &yin)); 942 for (i = 0; i < n_local; i++) { 943 /* 944 To avoid copying the subvector from x into a workspace we instead 945 make the workspace vector array point to the subpart of the array of 946 the global vector. 947 */ 948 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 949 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 950 951 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 952 /* apply the symmetric left portion of the inner PC operator */ 953 /* note this bypasses the inner KSP and its options completely */ 954 PetscCall(KSPGetPC(jac->ksp[i], &subpc)); 955 PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i])); 956 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 957 958 PetscCall(VecResetArray(bjac->x[i])); 959 PetscCall(VecResetArray(bjac->y[i])); 960 } 961 PetscCall(VecRestoreArrayRead(x, &xin)); 962 PetscCall(VecRestoreArray(y, &yin)); 963 PetscFunctionReturn(PETSC_SUCCESS); 964 } 965 966 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y) 967 { 968 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 969 PetscInt i, n_local = jac->n_local; 970 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 971 PetscScalar *yin; 972 const PetscScalar *xin; 973 974 PetscFunctionBegin; 975 PetscCall(VecGetArrayRead(x, &xin)); 976 PetscCall(VecGetArray(y, &yin)); 977 for (i = 0; i < n_local; i++) { 978 /* 979 To avoid copying the subvector from x into a workspace we instead 980 make the workspace vector array point to the subpart of the array of 981 the global vector. 982 */ 983 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 984 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 985 986 PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 987 PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i])); 988 PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i])); 989 PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 990 991 PetscCall(VecResetArray(bjac->x[i])); 992 PetscCall(VecResetArray(bjac->y[i])); 993 } 994 PetscCall(VecRestoreArrayRead(x, &xin)); 995 PetscCall(VecRestoreArray(y, &yin)); 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat) 1000 { 1001 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1002 PetscInt m, n_local, N, M, start, i; 1003 const char *prefix; 1004 KSP ksp; 1005 Vec x, y; 1006 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 1007 PC subpc; 1008 IS is; 1009 MatReuse scall; 1010 VecType vectype; 1011 MatNullSpace *nullsp_mat = NULL, *nullsp_pmat = NULL; 1012 1013 PetscFunctionBegin; 1014 PetscCall(MatGetLocalSize(pc->pmat, &M, &N)); 1015 1016 n_local = jac->n_local; 1017 1018 if (pc->useAmat) { 1019 PetscBool same; 1020 PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same)); 1021 PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type"); 1022 } 1023 1024 if (!pc->setupcalled) { 1025 PetscInt nestlevel; 1026 1027 scall = MAT_INITIAL_MATRIX; 1028 1029 if (!jac->ksp) { 1030 pc->ops->reset = PCReset_BJacobi_Multiblock; 1031 pc->ops->destroy = PCDestroy_BJacobi_Multiblock; 1032 pc->ops->apply = PCApply_BJacobi_Multiblock; 1033 pc->ops->matapply = NULL; 1034 pc->ops->matapplytranspose = NULL; 1035 pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Multiblock; 1036 pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock; 1037 pc->ops->applytranspose = PCApplyTranspose_BJacobi_Multiblock; 1038 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock; 1039 1040 PetscCall(PetscNew(&bjac)); 1041 PetscCall(PetscMalloc1(n_local, &jac->ksp)); 1042 PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y)); 1043 PetscCall(PetscMalloc1(n_local, &bjac->starts)); 1044 1045 jac->data = (void *)bjac; 1046 PetscCall(PetscMalloc1(n_local, &bjac->is)); 1047 1048 for (i = 0; i < n_local; i++) { 1049 PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp)); 1050 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 1051 PetscCall(KSPSetNestLevel(ksp, nestlevel + 1)); 1052 PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure)); 1053 PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1)); 1054 PetscCall(KSPSetType(ksp, KSPPREONLY)); 1055 PetscCall(KSPGetPC(ksp, &subpc)); 1056 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1057 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 1058 PetscCall(KSPAppendOptionsPrefix(ksp, "sub_")); 1059 1060 jac->ksp[i] = ksp; 1061 } 1062 } else { 1063 bjac = (PC_BJacobi_Multiblock *)jac->data; 1064 } 1065 1066 start = 0; 1067 PetscCall(MatGetVecType(pmat, &vectype)); 1068 for (i = 0; i < n_local; i++) { 1069 m = jac->l_lens[i]; 1070 /* 1071 The reason we need to generate these vectors is to serve 1072 as the right-hand side and solution vector for the solve on the 1073 block. We do not need to allocate space for the vectors since 1074 that is provided via VecPlaceArray() just before the call to 1075 KSPSolve() on the block. 1076 1077 */ 1078 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x)); 1079 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y)); 1080 PetscCall(VecSetType(x, vectype)); 1081 PetscCall(VecSetType(y, vectype)); 1082 1083 bjac->x[i] = x; 1084 bjac->y[i] = y; 1085 bjac->starts[i] = start; 1086 1087 PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is)); 1088 bjac->is[i] = is; 1089 1090 start += m; 1091 } 1092 } else { 1093 bjac = (PC_BJacobi_Multiblock *)jac->data; 1094 /* 1095 Destroy the blocks from the previous iteration 1096 */ 1097 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1098 PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat)); 1099 PetscCall(MatDestroyMatrices(n_local, &bjac->pmat)); 1100 if (pc->useAmat) { 1101 PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat)); 1102 PetscCall(MatDestroyMatrices(n_local, &bjac->mat)); 1103 } 1104 scall = MAT_INITIAL_MATRIX; 1105 } else scall = MAT_REUSE_MATRIX; 1106 } 1107 1108 PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat)); 1109 if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat)); 1110 if (pc->useAmat) { 1111 PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat)); 1112 if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat)); 1113 } 1114 /* Return control to the user so that the submatrices can be modified (e.g., to apply 1115 different boundary conditions for the submatrices than for the global problem) */ 1116 PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP)); 1117 1118 for (i = 0; i < n_local; i++) { 1119 PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix)); 1120 if (pc->useAmat) { 1121 PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i])); 1122 PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix)); 1123 } else { 1124 PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i])); 1125 } 1126 PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix)); 1127 if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i])); 1128 } 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 /* 1133 These are for a single block with multiple processes 1134 */ 1135 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc) 1136 { 1137 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1138 KSP subksp = jac->ksp[0]; 1139 KSPConvergedReason reason; 1140 1141 PetscFunctionBegin; 1142 PetscCall(KSPSetUp(subksp)); 1143 PetscCall(KSPGetConvergedReason(subksp, &reason)); 1144 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1145 PetscFunctionReturn(PETSC_SUCCESS); 1146 } 1147 1148 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc) 1149 { 1150 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1151 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1152 1153 PetscFunctionBegin; 1154 PetscCall(VecDestroy(&mpjac->ysub)); 1155 PetscCall(VecDestroy(&mpjac->xsub)); 1156 PetscCall(MatDestroy(&mpjac->submats)); 1157 if (jac->ksp) PetscCall(KSPReset(jac->ksp[0])); 1158 PetscFunctionReturn(PETSC_SUCCESS); 1159 } 1160 1161 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc) 1162 { 1163 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1164 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1165 1166 PetscFunctionBegin; 1167 PetscCall(PCReset_BJacobi_Multiproc(pc)); 1168 PetscCall(KSPDestroy(&jac->ksp[0])); 1169 PetscCall(PetscFree(jac->ksp)); 1170 PetscCall(PetscSubcommDestroy(&mpjac->psubcomm)); 1171 1172 PetscCall(PetscFree(mpjac)); 1173 PetscCall(PCDestroy_BJacobi(pc)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y) 1178 { 1179 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1180 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1181 PetscScalar *yarray; 1182 const PetscScalar *xarray; 1183 KSPConvergedReason reason; 1184 1185 PetscFunctionBegin; 1186 /* place x's and y's local arrays into xsub and ysub */ 1187 PetscCall(VecGetArrayRead(x, &xarray)); 1188 PetscCall(VecGetArray(y, &yarray)); 1189 PetscCall(VecPlaceArray(mpjac->xsub, xarray)); 1190 PetscCall(VecPlaceArray(mpjac->ysub, yarray)); 1191 1192 /* apply preconditioner on each matrix block */ 1193 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0)); 1194 PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub)); 1195 PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub)); 1196 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0)); 1197 PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason)); 1198 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1199 1200 PetscCall(VecResetArray(mpjac->xsub)); 1201 PetscCall(VecResetArray(mpjac->ysub)); 1202 PetscCall(VecRestoreArrayRead(x, &xarray)); 1203 PetscCall(VecRestoreArray(y, &yarray)); 1204 PetscFunctionReturn(PETSC_SUCCESS); 1205 } 1206 1207 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y) 1208 { 1209 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1210 KSPConvergedReason reason; 1211 Mat sX, sY; 1212 const PetscScalar *x; 1213 PetscScalar *y; 1214 PetscInt m, N, lda, ldb; 1215 1216 PetscFunctionBegin; 1217 /* apply preconditioner on each matrix block */ 1218 PetscCall(MatGetLocalSize(X, &m, NULL)); 1219 PetscCall(MatGetSize(X, NULL, &N)); 1220 PetscCall(MatDenseGetLDA(X, &lda)); 1221 PetscCall(MatDenseGetLDA(Y, &ldb)); 1222 PetscCall(MatDenseGetArrayRead(X, &x)); 1223 PetscCall(MatDenseGetArrayWrite(Y, &y)); 1224 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX)); 1225 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY)); 1226 PetscCall(MatDenseSetLDA(sX, lda)); 1227 PetscCall(MatDenseSetLDA(sY, ldb)); 1228 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0)); 1229 PetscCall(KSPMatSolve(jac->ksp[0], sX, sY)); 1230 PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL)); 1231 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0)); 1232 PetscCall(MatDestroy(&sY)); 1233 PetscCall(MatDestroy(&sX)); 1234 PetscCall(MatDenseRestoreArrayWrite(Y, &y)); 1235 PetscCall(MatDenseRestoreArrayRead(X, &x)); 1236 PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason)); 1237 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1238 PetscFunctionReturn(PETSC_SUCCESS); 1239 } 1240 1241 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc) 1242 { 1243 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1244 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1245 PetscInt m, n; 1246 MPI_Comm comm, subcomm = 0; 1247 const char *prefix; 1248 PetscBool wasSetup = PETSC_TRUE; 1249 VecType vectype; 1250 1251 PetscFunctionBegin; 1252 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 1253 PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported"); 1254 jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */ 1255 if (!pc->setupcalled) { 1256 PetscInt nestlevel; 1257 1258 wasSetup = PETSC_FALSE; 1259 PetscCall(PetscNew(&mpjac)); 1260 jac->data = (void *)mpjac; 1261 1262 /* initialize datastructure mpjac */ 1263 if (!jac->psubcomm) { 1264 /* Create default contiguous subcommunicatiors if user does not provide them */ 1265 PetscCall(PetscSubcommCreate(comm, &jac->psubcomm)); 1266 PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n)); 1267 PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS)); 1268 } 1269 mpjac->psubcomm = jac->psubcomm; 1270 subcomm = PetscSubcommChild(mpjac->psubcomm); 1271 1272 /* Get matrix blocks of pmat */ 1273 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats)); 1274 1275 /* create a new PC that processors in each subcomm have copy of */ 1276 PetscCall(PetscMalloc1(1, &jac->ksp)); 1277 PetscCall(KSPCreate(subcomm, &jac->ksp[0])); 1278 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 1279 PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1)); 1280 PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure)); 1281 PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1)); 1282 PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats)); 1283 PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc)); 1284 1285 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1286 PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix)); 1287 PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_")); 1288 PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix)); 1289 PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix)); 1290 1291 /* create dummy vectors xsub and ysub */ 1292 PetscCall(MatGetLocalSize(mpjac->submats, &m, &n)); 1293 PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub)); 1294 PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub)); 1295 PetscCall(MatGetVecType(mpjac->submats, &vectype)); 1296 PetscCall(VecSetType(mpjac->xsub, vectype)); 1297 PetscCall(VecSetType(mpjac->ysub, vectype)); 1298 1299 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc; 1300 pc->ops->reset = PCReset_BJacobi_Multiproc; 1301 pc->ops->destroy = PCDestroy_BJacobi_Multiproc; 1302 pc->ops->apply = PCApply_BJacobi_Multiproc; 1303 pc->ops->matapply = PCMatApply_BJacobi_Multiproc; 1304 } else { /* pc->setupcalled */ 1305 subcomm = PetscSubcommChild(mpjac->psubcomm); 1306 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1307 /* destroy old matrix blocks, then get new matrix blocks */ 1308 if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats)); 1309 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats)); 1310 } else { 1311 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats)); 1312 } 1313 PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats)); 1314 } 1315 1316 if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0])); 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } 1319