xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision 174dc0c8cee294b82b85e4dd3b331b29396264fc) !
1 /*
2    Defines a block Jacobi preconditioner.
3 */
4 
5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
6 
7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
10 
11 static PetscErrorCode PCSetUp_BJacobi(PC pc)
12 {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(PETSC_SUCCESS);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCallMPI(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determining first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
148   PetscCall(PetscFree(pc->data));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems PetscOptionsObject)
153 {
154   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
155   PetscInt    blocks, i;
156   PetscBool   flg;
157 
158   PetscFunctionBegin;
159   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
160   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
162   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
163   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
164   if (jac->ksp) {
165     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
166      * unless we had already been called. */
167     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
168   }
169   PetscOptionsHeadEnd();
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 #include <petscdraw.h>
174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
175 {
176   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
177   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
178   PetscMPIInt           rank;
179   PetscInt              i;
180   PetscBool             iascii, isstring, isdraw;
181   PetscViewer           sviewer;
182   PetscViewerFormat     format;
183   const char           *prefix;
184 
185   PetscFunctionBegin;
186   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
189   if (iascii) {
190     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
191     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
193     PetscCall(PetscViewerGetFormat(viewer, &format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       PetscCall(PCGetOptionsPrefix(pc, &prefix));
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
198       if (jac->ksp && !jac->psubcomm) {
199         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
200         if (rank == 0) {
201           PetscCall(PetscViewerASCIIPushTab(sviewer));
202           PetscCall(KSPView(jac->ksp[0], sviewer));
203           PetscCall(PetscViewerASCIIPopTab(sviewer));
204         }
205         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
206         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
207         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
208       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
209         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
210         if (!mpjac->psubcomm->color) {
211           PetscCall(PetscViewerASCIIPushTab(sviewer));
212           PetscCall(KSPView(*jac->ksp, sviewer));
213           PetscCall(PetscViewerASCIIPopTab(sviewer));
214         }
215         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
216         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
217         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
218       }
219     } else {
220       PetscInt n_global;
221       PetscCallMPI(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
222       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
223       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
224       PetscCall(PetscViewerASCIIPushTab(viewer));
225       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
226       PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
227       for (i = 0; i < jac->n_local; i++) {
228         PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
229         PetscCall(KSPView(jac->ksp[i], sviewer));
230         PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n"));
231       }
232       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
233       PetscCall(PetscViewerASCIIPopTab(viewer));
234       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
235     }
236   } else if (isstring) {
237     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
238     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
239     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
240     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
241   } else if (isdraw) {
242     PetscDraw draw;
243     char      str[25];
244     PetscReal x, y, bottom, h;
245 
246     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
247     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
248     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
249     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
250     bottom = y - h;
251     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
252     /* warning the communicator on viewer is different then on ksp in parallel */
253     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
254     PetscCall(PetscDrawPopCurrentPoint(draw));
255   }
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
260 {
261   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
262 
263   PetscFunctionBegin;
264   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
265 
266   if (n_local) *n_local = jac->n_local;
267   if (first_local) *first_local = jac->first_local;
268   if (ksp) *ksp = jac->ksp;
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens)
273 {
274   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
278   jac->n = blocks;
279   if (!lens) jac->g_lens = NULL;
280   else {
281     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
282     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
288 {
289   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
290 
291   PetscFunctionBegin;
292   *blocks = jac->n;
293   if (lens) *lens = jac->g_lens;
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
298 {
299   PC_BJacobi *jac;
300 
301   PetscFunctionBegin;
302   jac = (PC_BJacobi *)pc->data;
303 
304   jac->n_local = blocks;
305   if (!lens) jac->l_lens = NULL;
306   else {
307     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
308     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
309   }
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
314 {
315   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
316 
317   PetscFunctionBegin;
318   *blocks = jac->n_local;
319   if (lens) *lens = jac->l_lens;
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@C
324   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
325   this processor.
326 
327   Not Collective
328 
329   Input Parameter:
330 . pc - the preconditioner context
331 
332   Output Parameters:
333 + n_local     - the number of blocks on this processor, or NULL
334 . first_local - the global number of the first block on this processor, or NULL
335 - ksp         - the array of KSP contexts
336 
337   Level: advanced
338 
339   Notes:
340   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
341 
342   Currently for some matrix implementations only 1 block per processor
343   is supported.
344 
345   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
346 
347 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
348 @*/
349 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
350 {
351   PetscFunctionBegin;
352   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
353   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*@
358   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
359   Jacobi preconditioner.
360 
361   Collective
362 
363   Input Parameters:
364 + pc     - the preconditioner context
365 . blocks - the number of blocks
366 - lens   - [optional] integer array containing the size of each block
367 
368   Options Database Key:
369 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
370 
371   Level: intermediate
372 
373   Note:
374   Currently only a limited number of blocking configurations are supported.
375   All processors sharing the `PC` must call this routine with the same data.
376 
377 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
378 @*/
379 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
380 {
381   PetscFunctionBegin;
382   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
383   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
384   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 /*@C
389   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
390   Jacobi, `PCBJACOBI`, preconditioner.
391 
392   Not Collective
393 
394   Input Parameter:
395 . pc - the preconditioner context
396 
397   Output Parameters:
398 + blocks - the number of blocks
399 - lens   - integer array containing the size of each block
400 
401   Level: intermediate
402 
403 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
404 @*/
405 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
406 {
407   PetscFunctionBegin;
408   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
409   PetscAssertPointer(blocks, 2);
410   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 
414 /*@
415   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
416   Jacobi, `PCBJACOBI`,  preconditioner.
417 
418   Not Collective
419 
420   Input Parameters:
421 + pc     - the preconditioner context
422 . blocks - the number of blocks
423 - lens   - [optional] integer array containing size of each block
424 
425   Options Database Key:
426 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
427 
428   Level: intermediate
429 
430   Note:
431   Currently only a limited number of blocking configurations are supported.
432 
433 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
434 @*/
435 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
436 {
437   PetscFunctionBegin;
438   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
439   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
440   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 /*@C
445   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
446   Jacobi, `PCBJACOBI`, preconditioner.
447 
448   Not Collective
449 
450   Input Parameters:
451 + pc     - the preconditioner context
452 . blocks - the number of blocks
453 - lens   - [optional] integer array containing size of each block
454 
455   Level: intermediate
456 
457   Note:
458   Currently only a limited number of blocking configurations are supported.
459 
460 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
461 @*/
462 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
463 {
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
466   PetscAssertPointer(blocks, 2);
467   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
468   PetscFunctionReturn(PETSC_SUCCESS);
469 }
470 
471 /*MC
472    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
473            its own `KSP` object.
474 
475    Options Database Keys:
476 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
477 -  -pc_bjacobi_blocks <n> - use n total blocks
478 
479    Level: beginner
480 
481    Notes:
482     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
483 
484     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
485 
486      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
487         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
488 
489      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
490          and set the options directly on the resulting `KSP` object (you can access its `PC`
491          `KSPGetPC()`)
492 
493      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
494          performance.  Different block partitioning may lead to additional data transfers
495          between host and GPU that lead to degraded performance.
496 
497      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
498 
499 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
500           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
501           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
502 M*/
503 
504 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
505 {
506   PetscMPIInt rank;
507   PC_BJacobi *jac;
508 
509   PetscFunctionBegin;
510   PetscCall(PetscNew(&jac));
511   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
512 
513   pc->ops->apply             = NULL;
514   pc->ops->matapply          = NULL;
515   pc->ops->applytranspose    = NULL;
516   pc->ops->matapplytranspose = NULL;
517   pc->ops->setup             = PCSetUp_BJacobi;
518   pc->ops->destroy           = PCDestroy_BJacobi;
519   pc->ops->setfromoptions    = PCSetFromOptions_BJacobi;
520   pc->ops->view              = PCView_BJacobi;
521   pc->ops->applyrichardson   = NULL;
522 
523   pc->data         = (void *)jac;
524   jac->n           = -1;
525   jac->n_local     = -1;
526   jac->first_local = rank;
527   jac->ksp         = NULL;
528   jac->g_lens      = NULL;
529   jac->l_lens      = NULL;
530   jac->psubcomm    = NULL;
531 
532   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
533   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
534   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
535   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
536   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
537   PetscFunctionReturn(PETSC_SUCCESS);
538 }
539 
540 /*
541         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
542 */
543 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
544 {
545   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
546   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
547 
548   PetscFunctionBegin;
549   PetscCall(KSPReset(jac->ksp[0]));
550   PetscCall(VecDestroy(&bjac->x));
551   PetscCall(VecDestroy(&bjac->y));
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
556 {
557   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
558   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
559 
560   PetscFunctionBegin;
561   PetscCall(PCReset_BJacobi_Singleblock(pc));
562   PetscCall(KSPDestroy(&jac->ksp[0]));
563   PetscCall(PetscFree(jac->ksp));
564   PetscCall(PetscFree(bjac));
565   PetscCall(PCDestroy_BJacobi(pc));
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
570 {
571   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
572   KSP                subksp = jac->ksp[0];
573   KSPConvergedReason reason;
574 
575   PetscFunctionBegin;
576   PetscCall(KSPSetUp(subksp));
577   PetscCall(KSPGetConvergedReason(subksp, &reason));
578   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
582 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
583 {
584   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
585   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
586 
587   PetscFunctionBegin;
588   PetscCall(VecGetLocalVectorRead(x, bjac->x));
589   PetscCall(VecGetLocalVector(y, bjac->y));
590   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
591      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
592      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
593   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
594   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
595   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
596   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
597   PetscCall(VecRestoreLocalVector(y, bjac->y));
598   PetscFunctionReturn(PETSC_SUCCESS);
599 }
600 
601 static PetscErrorCode PCMatApply_BJacobi_Singleblock_Private(PC pc, Mat X, Mat Y, PetscBool transpose)
602 {
603   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
604   Mat         sX, sY;
605 
606   PetscFunctionBegin;
607   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
608      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
609      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
610   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
611   PetscCall(MatDenseGetLocalMatrix(X, &sX));
612   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
613   if (!transpose) PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
614   else PetscCall(KSPMatSolveTranspose(jac->ksp[0], sX, sY));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
619 {
620   PetscFunctionBegin;
621   PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_FALSE));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode PCMatApplyTranspose_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
626 {
627   PetscFunctionBegin;
628   PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_TRUE));
629   PetscFunctionReturn(PETSC_SUCCESS);
630 }
631 
632 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
633 {
634   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
635   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
636   PetscScalar            *y_array;
637   const PetscScalar      *x_array;
638   PC                      subpc;
639 
640   PetscFunctionBegin;
641   /*
642       The VecPlaceArray() is to avoid having to copy the
643     y vector into the bjac->x vector. The reason for
644     the bjac->x vector is that we need a sequential vector
645     for the sequential solve.
646   */
647   PetscCall(VecGetArrayRead(x, &x_array));
648   PetscCall(VecGetArray(y, &y_array));
649   PetscCall(VecPlaceArray(bjac->x, x_array));
650   PetscCall(VecPlaceArray(bjac->y, y_array));
651   /* apply the symmetric left portion of the inner PC operator */
652   /* note this bypasses the inner KSP and its options completely */
653   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
654   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
655   PetscCall(VecResetArray(bjac->x));
656   PetscCall(VecResetArray(bjac->y));
657   PetscCall(VecRestoreArrayRead(x, &x_array));
658   PetscCall(VecRestoreArray(y, &y_array));
659   PetscFunctionReturn(PETSC_SUCCESS);
660 }
661 
662 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
663 {
664   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
665   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
666   PetscScalar            *y_array;
667   const PetscScalar      *x_array;
668   PC                      subpc;
669 
670   PetscFunctionBegin;
671   /*
672       The VecPlaceArray() is to avoid having to copy the
673     y vector into the bjac->x vector. The reason for
674     the bjac->x vector is that we need a sequential vector
675     for the sequential solve.
676   */
677   PetscCall(VecGetArrayRead(x, &x_array));
678   PetscCall(VecGetArray(y, &y_array));
679   PetscCall(VecPlaceArray(bjac->x, x_array));
680   PetscCall(VecPlaceArray(bjac->y, y_array));
681 
682   /* apply the symmetric right portion of the inner PC operator */
683   /* note this bypasses the inner KSP and its options completely */
684 
685   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
686   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
687 
688   PetscCall(VecResetArray(bjac->x));
689   PetscCall(VecResetArray(bjac->y));
690   PetscCall(VecRestoreArrayRead(x, &x_array));
691   PetscCall(VecRestoreArray(y, &y_array));
692   PetscFunctionReturn(PETSC_SUCCESS);
693 }
694 
695 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
696 {
697   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
698   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
699   PetscScalar            *y_array;
700   const PetscScalar      *x_array;
701 
702   PetscFunctionBegin;
703   /*
704       The VecPlaceArray() is to avoid having to copy the
705     y vector into the bjac->x vector. The reason for
706     the bjac->x vector is that we need a sequential vector
707     for the sequential solve.
708   */
709   PetscCall(VecGetArrayRead(x, &x_array));
710   PetscCall(VecGetArray(y, &y_array));
711   PetscCall(VecPlaceArray(bjac->x, x_array));
712   PetscCall(VecPlaceArray(bjac->y, y_array));
713   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
714   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
715   PetscCall(VecResetArray(bjac->x));
716   PetscCall(VecResetArray(bjac->y));
717   PetscCall(VecRestoreArrayRead(x, &x_array));
718   PetscCall(VecRestoreArray(y, &y_array));
719   PetscFunctionReturn(PETSC_SUCCESS);
720 }
721 
722 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
723 {
724   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
725   PetscInt                m;
726   KSP                     ksp;
727   PC_BJacobi_Singleblock *bjac;
728   PetscBool               wasSetup = PETSC_TRUE;
729   VecType                 vectype;
730   const char             *prefix;
731 
732   PetscFunctionBegin;
733   if (!pc->setupcalled) {
734     if (!jac->ksp) {
735       PetscInt nestlevel;
736 
737       wasSetup = PETSC_FALSE;
738 
739       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
740       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
741       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
742       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
743       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
744       PetscCall(KSPSetType(ksp, KSPPREONLY));
745       PetscCall(PCGetOptionsPrefix(pc, &prefix));
746       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
747       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
748 
749       pc->ops->reset               = PCReset_BJacobi_Singleblock;
750       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
751       pc->ops->apply               = PCApply_BJacobi_Singleblock;
752       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
753       pc->ops->matapplytranspose   = PCMatApplyTranspose_BJacobi_Singleblock;
754       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
755       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
756       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
757       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
758 
759       PetscCall(PetscMalloc1(1, &jac->ksp));
760       jac->ksp[0] = ksp;
761 
762       PetscCall(PetscNew(&bjac));
763       jac->data = (void *)bjac;
764     } else {
765       ksp  = jac->ksp[0];
766       bjac = (PC_BJacobi_Singleblock *)jac->data;
767     }
768 
769     /*
770       The reason we need to generate these vectors is to serve
771       as the right-hand side and solution vector for the solve on the
772       block. We do not need to allocate space for the vectors since
773       that is provided via VecPlaceArray() just before the call to
774       KSPSolve() on the block.
775     */
776     PetscCall(MatGetSize(pmat, &m, &m));
777     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
778     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
779     PetscCall(MatGetVecType(pmat, &vectype));
780     PetscCall(VecSetType(bjac->x, vectype));
781     PetscCall(VecSetType(bjac->y, vectype));
782   } else {
783     ksp  = jac->ksp[0];
784     bjac = (PC_BJacobi_Singleblock *)jac->data;
785   }
786   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
787   if (pc->useAmat) {
788     PetscCall(KSPSetOperators(ksp, mat, pmat));
789     PetscCall(MatSetOptionsPrefix(mat, prefix));
790   } else {
791     PetscCall(KSPSetOperators(ksp, pmat, pmat));
792   }
793   PetscCall(MatSetOptionsPrefix(pmat, prefix));
794   if (!wasSetup && pc->setfromoptionscalled) {
795     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
796     PetscCall(KSPSetFromOptions(ksp));
797   }
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
802 {
803   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
804   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
805   PetscInt               i;
806 
807   PetscFunctionBegin;
808   if (bjac && bjac->pmat) {
809     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
810     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
811   }
812 
813   for (i = 0; i < jac->n_local; i++) {
814     PetscCall(KSPReset(jac->ksp[i]));
815     if (bjac && bjac->x) {
816       PetscCall(VecDestroy(&bjac->x[i]));
817       PetscCall(VecDestroy(&bjac->y[i]));
818       PetscCall(ISDestroy(&bjac->is[i]));
819     }
820   }
821   PetscCall(PetscFree(jac->l_lens));
822   PetscCall(PetscFree(jac->g_lens));
823   PetscFunctionReturn(PETSC_SUCCESS);
824 }
825 
826 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
827 {
828   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
829   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
830   PetscInt               i;
831 
832   PetscFunctionBegin;
833   PetscCall(PCReset_BJacobi_Multiblock(pc));
834   if (bjac) {
835     PetscCall(PetscFree2(bjac->x, bjac->y));
836     PetscCall(PetscFree(bjac->starts));
837     PetscCall(PetscFree(bjac->is));
838   }
839   PetscCall(PetscFree(jac->data));
840   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
841   PetscCall(PetscFree(jac->ksp));
842   PetscCall(PCDestroy_BJacobi(pc));
843   PetscFunctionReturn(PETSC_SUCCESS);
844 }
845 
846 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
847 {
848   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
849   PetscInt           i, n_local = jac->n_local;
850   KSPConvergedReason reason;
851 
852   PetscFunctionBegin;
853   for (i = 0; i < n_local; i++) {
854     PetscCall(KSPSetUp(jac->ksp[i]));
855     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
856     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
857   }
858   PetscFunctionReturn(PETSC_SUCCESS);
859 }
860 
861 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
862 {
863   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
864   PetscInt               i, n_local = jac->n_local;
865   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
866   PetscScalar           *yin;
867   const PetscScalar     *xin;
868 
869   PetscFunctionBegin;
870   PetscCall(VecGetArrayRead(x, &xin));
871   PetscCall(VecGetArray(y, &yin));
872   for (i = 0; i < n_local; i++) {
873     /*
874        To avoid copying the subvector from x into a workspace we instead
875        make the workspace vector array point to the subpart of the array of
876        the global vector.
877     */
878     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
879     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
880 
881     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
882     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
883     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
884     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
885 
886     PetscCall(VecResetArray(bjac->x[i]));
887     PetscCall(VecResetArray(bjac->y[i]));
888   }
889   PetscCall(VecRestoreArrayRead(x, &xin));
890   PetscCall(VecRestoreArray(y, &yin));
891   PetscFunctionReturn(PETSC_SUCCESS);
892 }
893 
894 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
895 {
896   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
897   PetscInt               i, n_local = jac->n_local;
898   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
899   PetscScalar           *yin;
900   const PetscScalar     *xin;
901   PC                     subpc;
902 
903   PetscFunctionBegin;
904   PetscCall(VecGetArrayRead(x, &xin));
905   PetscCall(VecGetArray(y, &yin));
906   for (i = 0; i < n_local; i++) {
907     /*
908        To avoid copying the subvector from x into a workspace we instead
909        make the workspace vector array point to the subpart of the array of
910        the global vector.
911     */
912     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
913     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
914 
915     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
916     /* apply the symmetric left portion of the inner PC operator */
917     /* note this bypasses the inner KSP and its options completely */
918     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
919     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
920     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
921 
922     PetscCall(VecResetArray(bjac->x[i]));
923     PetscCall(VecResetArray(bjac->y[i]));
924   }
925   PetscCall(VecRestoreArrayRead(x, &xin));
926   PetscCall(VecRestoreArray(y, &yin));
927   PetscFunctionReturn(PETSC_SUCCESS);
928 }
929 
930 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
931 {
932   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
933   PetscInt               i, n_local = jac->n_local;
934   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
935   PetscScalar           *yin;
936   const PetscScalar     *xin;
937   PC                     subpc;
938 
939   PetscFunctionBegin;
940   PetscCall(VecGetArrayRead(x, &xin));
941   PetscCall(VecGetArray(y, &yin));
942   for (i = 0; i < n_local; i++) {
943     /*
944        To avoid copying the subvector from x into a workspace we instead
945        make the workspace vector array point to the subpart of the array of
946        the global vector.
947     */
948     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
949     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
950 
951     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
952     /* apply the symmetric left portion of the inner PC operator */
953     /* note this bypasses the inner KSP and its options completely */
954     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
955     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
956     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
957 
958     PetscCall(VecResetArray(bjac->x[i]));
959     PetscCall(VecResetArray(bjac->y[i]));
960   }
961   PetscCall(VecRestoreArrayRead(x, &xin));
962   PetscCall(VecRestoreArray(y, &yin));
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965 
966 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
967 {
968   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
969   PetscInt               i, n_local = jac->n_local;
970   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
971   PetscScalar           *yin;
972   const PetscScalar     *xin;
973 
974   PetscFunctionBegin;
975   PetscCall(VecGetArrayRead(x, &xin));
976   PetscCall(VecGetArray(y, &yin));
977   for (i = 0; i < n_local; i++) {
978     /*
979        To avoid copying the subvector from x into a workspace we instead
980        make the workspace vector array point to the subpart of the array of
981        the global vector.
982     */
983     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
984     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
985 
986     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
987     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
988     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
989     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
990 
991     PetscCall(VecResetArray(bjac->x[i]));
992     PetscCall(VecResetArray(bjac->y[i]));
993   }
994   PetscCall(VecRestoreArrayRead(x, &xin));
995   PetscCall(VecRestoreArray(y, &yin));
996   PetscFunctionReturn(PETSC_SUCCESS);
997 }
998 
999 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
1000 {
1001   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
1002   PetscInt               m, n_local, N, M, start, i;
1003   const char            *prefix;
1004   KSP                    ksp;
1005   Vec                    x, y;
1006   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
1007   PC                     subpc;
1008   IS                     is;
1009   MatReuse               scall;
1010   VecType                vectype;
1011   MatNullSpace          *nullsp_mat = NULL, *nullsp_pmat = NULL;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
1015 
1016   n_local = jac->n_local;
1017 
1018   if (pc->useAmat) {
1019     PetscBool same;
1020     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1021     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1022   }
1023 
1024   if (!pc->setupcalled) {
1025     PetscInt nestlevel;
1026 
1027     scall = MAT_INITIAL_MATRIX;
1028 
1029     if (!jac->ksp) {
1030       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1031       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1032       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1033       pc->ops->matapply            = NULL;
1034       pc->ops->matapplytranspose   = NULL;
1035       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1036       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1037       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1038       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1039 
1040       PetscCall(PetscNew(&bjac));
1041       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1042       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1043       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1044 
1045       jac->data = (void *)bjac;
1046       PetscCall(PetscMalloc1(n_local, &bjac->is));
1047 
1048       for (i = 0; i < n_local; i++) {
1049         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1050         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1051         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1052         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1053         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1054         PetscCall(KSPSetType(ksp, KSPPREONLY));
1055         PetscCall(KSPGetPC(ksp, &subpc));
1056         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1057         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1058         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1059 
1060         jac->ksp[i] = ksp;
1061       }
1062     } else {
1063       bjac = (PC_BJacobi_Multiblock *)jac->data;
1064     }
1065 
1066     start = 0;
1067     PetscCall(MatGetVecType(pmat, &vectype));
1068     for (i = 0; i < n_local; i++) {
1069       m = jac->l_lens[i];
1070       /*
1071       The reason we need to generate these vectors is to serve
1072       as the right-hand side and solution vector for the solve on the
1073       block. We do not need to allocate space for the vectors since
1074       that is provided via VecPlaceArray() just before the call to
1075       KSPSolve() on the block.
1076 
1077       */
1078       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1079       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1080       PetscCall(VecSetType(x, vectype));
1081       PetscCall(VecSetType(y, vectype));
1082 
1083       bjac->x[i]      = x;
1084       bjac->y[i]      = y;
1085       bjac->starts[i] = start;
1086 
1087       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1088       bjac->is[i] = is;
1089 
1090       start += m;
1091     }
1092   } else {
1093     bjac = (PC_BJacobi_Multiblock *)jac->data;
1094     /*
1095        Destroy the blocks from the previous iteration
1096     */
1097     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1098       PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1099       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1100       if (pc->useAmat) {
1101         PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat));
1102         PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1103       }
1104       scall = MAT_INITIAL_MATRIX;
1105     } else scall = MAT_REUSE_MATRIX;
1106   }
1107 
1108   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1109   if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1110   if (pc->useAmat) {
1111     PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1112     if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat));
1113   }
1114   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1115      different boundary conditions for the submatrices than for the global problem) */
1116   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1117 
1118   for (i = 0; i < n_local; i++) {
1119     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1120     if (pc->useAmat) {
1121       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1122       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1123     } else {
1124       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1125     }
1126     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1127     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1128   }
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 /*
1133       These are for a single block with multiple processes
1134 */
1135 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1136 {
1137   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1138   KSP                subksp = jac->ksp[0];
1139   KSPConvergedReason reason;
1140 
1141   PetscFunctionBegin;
1142   PetscCall(KSPSetUp(subksp));
1143   PetscCall(KSPGetConvergedReason(subksp, &reason));
1144   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1145   PetscFunctionReturn(PETSC_SUCCESS);
1146 }
1147 
1148 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1149 {
1150   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1151   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1152 
1153   PetscFunctionBegin;
1154   PetscCall(VecDestroy(&mpjac->ysub));
1155   PetscCall(VecDestroy(&mpjac->xsub));
1156   PetscCall(MatDestroy(&mpjac->submats));
1157   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1162 {
1163   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1164   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1165 
1166   PetscFunctionBegin;
1167   PetscCall(PCReset_BJacobi_Multiproc(pc));
1168   PetscCall(KSPDestroy(&jac->ksp[0]));
1169   PetscCall(PetscFree(jac->ksp));
1170   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1171 
1172   PetscCall(PetscFree(mpjac));
1173   PetscCall(PCDestroy_BJacobi(pc));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1178 {
1179   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1180   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1181   PetscScalar          *yarray;
1182   const PetscScalar    *xarray;
1183   KSPConvergedReason    reason;
1184 
1185   PetscFunctionBegin;
1186   /* place x's and y's local arrays into xsub and ysub */
1187   PetscCall(VecGetArrayRead(x, &xarray));
1188   PetscCall(VecGetArray(y, &yarray));
1189   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1190   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1191 
1192   /* apply preconditioner on each matrix block */
1193   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1194   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1195   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1196   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1197   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1198   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1199 
1200   PetscCall(VecResetArray(mpjac->xsub));
1201   PetscCall(VecResetArray(mpjac->ysub));
1202   PetscCall(VecRestoreArrayRead(x, &xarray));
1203   PetscCall(VecRestoreArray(y, &yarray));
1204   PetscFunctionReturn(PETSC_SUCCESS);
1205 }
1206 
1207 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1208 {
1209   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1210   KSPConvergedReason reason;
1211   Mat                sX, sY;
1212   const PetscScalar *x;
1213   PetscScalar       *y;
1214   PetscInt           m, N, lda, ldb;
1215 
1216   PetscFunctionBegin;
1217   /* apply preconditioner on each matrix block */
1218   PetscCall(MatGetLocalSize(X, &m, NULL));
1219   PetscCall(MatGetSize(X, NULL, &N));
1220   PetscCall(MatDenseGetLDA(X, &lda));
1221   PetscCall(MatDenseGetLDA(Y, &ldb));
1222   PetscCall(MatDenseGetArrayRead(X, &x));
1223   PetscCall(MatDenseGetArrayWrite(Y, &y));
1224   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1225   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1226   PetscCall(MatDenseSetLDA(sX, lda));
1227   PetscCall(MatDenseSetLDA(sY, ldb));
1228   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1229   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1230   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1231   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1232   PetscCall(MatDestroy(&sY));
1233   PetscCall(MatDestroy(&sX));
1234   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1235   PetscCall(MatDenseRestoreArrayRead(X, &x));
1236   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1237   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1238   PetscFunctionReturn(PETSC_SUCCESS);
1239 }
1240 
1241 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1242 {
1243   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1244   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1245   PetscInt              m, n;
1246   MPI_Comm              comm, subcomm = 0;
1247   const char           *prefix;
1248   PetscBool             wasSetup = PETSC_TRUE;
1249   VecType               vectype;
1250 
1251   PetscFunctionBegin;
1252   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1253   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1254   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1255   if (!pc->setupcalled) {
1256     PetscInt nestlevel;
1257 
1258     wasSetup = PETSC_FALSE;
1259     PetscCall(PetscNew(&mpjac));
1260     jac->data = (void *)mpjac;
1261 
1262     /* initialize datastructure mpjac */
1263     if (!jac->psubcomm) {
1264       /* Create default contiguous subcommunicatiors if user does not provide them */
1265       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1266       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1267       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1268     }
1269     mpjac->psubcomm = jac->psubcomm;
1270     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1271 
1272     /* Get matrix blocks of pmat */
1273     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1274 
1275     /* create a new PC that processors in each subcomm have copy of */
1276     PetscCall(PetscMalloc1(1, &jac->ksp));
1277     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1278     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1279     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1280     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1281     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1282     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1283     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1284 
1285     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1286     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1287     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1288     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1289     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1290 
1291     /* create dummy vectors xsub and ysub */
1292     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1293     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1294     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1295     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1296     PetscCall(VecSetType(mpjac->xsub, vectype));
1297     PetscCall(VecSetType(mpjac->ysub, vectype));
1298 
1299     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1300     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1301     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1302     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1303     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1304   } else { /* pc->setupcalled */
1305     subcomm = PetscSubcommChild(mpjac->psubcomm);
1306     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1307       /* destroy old matrix blocks, then get new matrix blocks */
1308       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1309       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1310     } else {
1311       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1312     }
1313     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1314   }
1315 
1316   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1317   PetscFunctionReturn(PETSC_SUCCESS);
1318 }
1319