xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 
2 /*
3    Defines a block Jacobi preconditioner.
4 */
5 
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
7 
8 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
10 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
11 
12 static PetscErrorCode PCSetUp_BJacobi(PC pc) {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(0);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCall(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determing first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc) {
137   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
138 
139   PetscFunctionBegin;
140   PetscCall(PetscFree(jac->g_lens));
141   PetscCall(PetscFree(jac->l_lens));
142   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
147   PetscCall(PetscFree(pc->data));
148   PetscFunctionReturn(0);
149 }
150 
151 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject) {
152   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
153   PetscInt    blocks, i;
154   PetscBool   flg;
155 
156   PetscFunctionBegin;
157   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
158   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
159   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
160   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
162   if (jac->ksp) {
163     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
164      * unless we had already been called. */
165     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
166   }
167   PetscOptionsHeadEnd();
168   PetscFunctionReturn(0);
169 }
170 
171 #include <petscdraw.h>
172 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer) {
173   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
174   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
175   PetscMPIInt           rank;
176   PetscInt              i;
177   PetscBool             iascii, isstring, isdraw;
178   PetscViewer           sviewer;
179   PetscViewerFormat     format;
180   const char           *prefix;
181 
182   PetscFunctionBegin;
183   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
184   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
185   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
186   if (iascii) {
187     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
188     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
189     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
190     PetscCall(PetscViewerGetFormat(viewer, &format));
191     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
192       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
193       PetscCall(PCGetOptionsPrefix(pc, &prefix));
194       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
195       if (jac->ksp && !jac->psubcomm) {
196         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
197         if (rank == 0) {
198           PetscCall(PetscViewerASCIIPushTab(viewer));
199           PetscCall(KSPView(jac->ksp[0], sviewer));
200           PetscCall(PetscViewerASCIIPopTab(viewer));
201         }
202         PetscCall(PetscViewerFlush(sviewer));
203         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
204         PetscCall(PetscViewerFlush(viewer));
205         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
206         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
207       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
208         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
209         if (!mpjac->psubcomm->color) {
210           PetscCall(PetscViewerASCIIPushTab(viewer));
211           PetscCall(KSPView(*(jac->ksp), sviewer));
212           PetscCall(PetscViewerASCIIPopTab(viewer));
213         }
214         PetscCall(PetscViewerFlush(sviewer));
215         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
216         PetscCall(PetscViewerFlush(viewer));
217         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
218         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
219       } else {
220         PetscCall(PetscViewerFlush(viewer));
221       }
222     } else {
223       PetscInt n_global;
224       PetscCall(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
225       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
226       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
227       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
228       PetscCall(PetscViewerASCIIPushTab(viewer));
229       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
230       for (i = 0; i < jac->n_local; i++) {
231         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
232         PetscCall(KSPView(jac->ksp[i], sviewer));
233         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "- - - - - - - - - - - - - - - - - -\n"));
234       }
235       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
236       PetscCall(PetscViewerASCIIPopTab(viewer));
237       PetscCall(PetscViewerFlush(viewer));
238       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
239     }
240   } else if (isstring) {
241     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
242     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
243     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
244     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
245   } else if (isdraw) {
246     PetscDraw draw;
247     char      str[25];
248     PetscReal x, y, bottom, h;
249 
250     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
251     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
252     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
253     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
254     bottom = y - h;
255     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
256     /* warning the communicator on viewer is different then on ksp in parallel */
257     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
258     PetscCall(PetscDrawPopCurrentPoint(draw));
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp) {
264   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
268 
269   if (n_local) *n_local = jac->n_local;
270   if (first_local) *first_local = jac->first_local;
271   if (ksp) *ksp = jac->ksp;
272   PetscFunctionReturn(0);
273 }
274 
275 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, PetscInt *lens) {
276   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
277 
278   PetscFunctionBegin;
279   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
280   jac->n = blocks;
281   if (!lens) jac->g_lens = NULL;
282   else {
283     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
284     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
285   }
286   PetscFunctionReturn(0);
287 }
288 
289 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) {
290   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
291 
292   PetscFunctionBegin;
293   *blocks = jac->n;
294   if (lens) *lens = jac->g_lens;
295   PetscFunctionReturn(0);
296 }
297 
298 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[]) {
299   PC_BJacobi *jac;
300 
301   PetscFunctionBegin;
302   jac = (PC_BJacobi *)pc->data;
303 
304   jac->n_local = blocks;
305   if (!lens) jac->l_lens = NULL;
306   else {
307     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
308     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) {
314   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
315 
316   PetscFunctionBegin;
317   *blocks = jac->n_local;
318   if (lens) *lens = jac->l_lens;
319   PetscFunctionReturn(0);
320 }
321 
322 /*@C
323    PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
324    this processor.
325 
326    Not Collective
327 
328    Input Parameter:
329 .  pc - the preconditioner context
330 
331    Output Parameters:
332 +  n_local - the number of blocks on this processor, or NULL
333 .  first_local - the global number of the first block on this processor, or NULL
334 -  ksp - the array of KSP contexts
335 
336    Notes:
337    After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
338 
339    Currently for some matrix implementations only 1 block per processor
340    is supported.
341 
342    You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
343 
344    Fortran Usage:
345    You must pass in a `KSP` array that is large enough to contain all the local `KSP`s.
346 
347    You can call `PCBJacobiGetSubKSP`(pc,nlocal,firstlocal,`PETSC_NULL_KSP`,ierr) to determine how large the
348    `KSP` array must be.
349 
350    Level: advanced
351 
352 .seealso: `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
353 @*/
354 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[]) {
355   PetscFunctionBegin;
356   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
357   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
358   PetscFunctionReturn(0);
359 }
360 
361 /*@
362    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
363    Jacobi preconditioner.
364 
365    Collective on pc
366 
367    Input Parameters:
368 +  pc - the preconditioner context
369 .  blocks - the number of blocks
370 -  lens - [optional] integer array containing the size of each block
371 
372    Options Database Key:
373 .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
374 
375    Note:
376    Currently only a limited number of blocking configurations are supported.
377    All processors sharing the `PC` must call this routine with the same data.
378 
379    Level: intermediate
380 
381 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
382 @*/
383 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) {
384   PetscFunctionBegin;
385   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
386   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
387   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
388   PetscFunctionReturn(0);
389 }
390 
391 /*@C
392    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
393    Jacobi, `PCBJACOBI`, preconditioner.
394 
395    Not Collective
396 
397    Input Parameter:
398 .  pc - the preconditioner context
399 
400    Output parameters:
401 +  blocks - the number of blocks
402 -  lens - integer array containing the size of each block
403 
404    Level: intermediate
405 
406 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
407 @*/
408 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) {
409   PetscFunctionBegin;
410   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
411   PetscValidIntPointer(blocks, 2);
412   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
413   PetscFunctionReturn(0);
414 }
415 
416 /*@
417    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
418    Jacobi, `PCBJACOBI`,  preconditioner.
419 
420    Not Collective
421 
422    Input Parameters:
423 +  pc - the preconditioner context
424 .  blocks - the number of blocks
425 -  lens - [optional] integer array containing size of each block
426 
427    Options Database Key:
428 .  -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
429 
430    Note:
431    Currently only a limited number of blocking configurations are supported.
432 
433    Level: intermediate
434 
435 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
436 @*/
437 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) {
438   PetscFunctionBegin;
439   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
440   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
441   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
442   PetscFunctionReturn(0);
443 }
444 
445 /*@C
446    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
447    Jacobi, `PCBJACOBI`, preconditioner.
448 
449    Not Collective
450 
451    Input Parameters:
452 +  pc - the preconditioner context
453 .  blocks - the number of blocks
454 -  lens - [optional] integer array containing size of each block
455 
456    Note:
457    Currently only a limited number of blocking configurations are supported.
458 
459    Level: intermediate
460 
461 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
462 @*/
463 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) {
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
466   PetscValidIntPointer(blocks, 2);
467   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
468   PetscFunctionReturn(0);
469 }
470 
471 /*MC
472    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
473            its own `KSP` object.
474 
475    Options Database Keys:
476 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
477 -  -pc_bjacobi_blocks <n> - use n total blocks
478 
479    Notes:
480     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
481 
482     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
483 
484      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
485         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
486 
487      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
488          and set the options directly on the resulting `KSP` object (you can access its `PC`
489          `KSPGetPC())`
490 
491      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
492          performance.  Different block partitioning may lead to additional data transfers
493          between host and GPU that lead to degraded performance.
494 
495      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
496 
497    Level: beginner
498 
499 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
500           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
501           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
502 M*/
503 
504 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc) {
505   PetscMPIInt rank;
506   PC_BJacobi *jac;
507 
508   PetscFunctionBegin;
509   PetscCall(PetscNew(&jac));
510   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
511 
512   pc->ops->apply           = NULL;
513   pc->ops->matapply        = NULL;
514   pc->ops->applytranspose  = NULL;
515   pc->ops->setup           = PCSetUp_BJacobi;
516   pc->ops->destroy         = PCDestroy_BJacobi;
517   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
518   pc->ops->view            = PCView_BJacobi;
519   pc->ops->applyrichardson = NULL;
520 
521   pc->data         = (void *)jac;
522   jac->n           = -1;
523   jac->n_local     = -1;
524   jac->first_local = rank;
525   jac->ksp         = NULL;
526   jac->g_lens      = NULL;
527   jac->l_lens      = NULL;
528   jac->psubcomm    = NULL;
529 
530   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
531   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
532   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
533   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
534   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
535   PetscFunctionReturn(0);
536 }
537 
538 /*
539         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
540 */
541 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc) {
542   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
543   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
544 
545   PetscFunctionBegin;
546   PetscCall(KSPReset(jac->ksp[0]));
547   PetscCall(VecDestroy(&bjac->x));
548   PetscCall(VecDestroy(&bjac->y));
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc) {
553   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
554   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
555 
556   PetscFunctionBegin;
557   PetscCall(PCReset_BJacobi_Singleblock(pc));
558   PetscCall(KSPDestroy(&jac->ksp[0]));
559   PetscCall(PetscFree(jac->ksp));
560   PetscCall(PetscFree(bjac));
561   PetscCall(PCDestroy_BJacobi(pc));
562   PetscFunctionReturn(0);
563 }
564 
565 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc) {
566   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
567   KSP                subksp = jac->ksp[0];
568   KSPConvergedReason reason;
569 
570   PetscFunctionBegin;
571   PetscCall(KSPSetUp(subksp));
572   PetscCall(KSPGetConvergedReason(subksp, &reason));
573   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
574   PetscFunctionReturn(0);
575 }
576 
577 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y) {
578   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
579   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
580 
581   PetscFunctionBegin;
582   PetscCall(VecGetLocalVectorRead(x, bjac->x));
583   PetscCall(VecGetLocalVector(y, bjac->y));
584   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
585      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
586      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
587   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
588   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
589   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
590   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
591   PetscCall(VecRestoreLocalVector(y, bjac->y));
592   PetscFunctionReturn(0);
593 }
594 
595 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y) {
596   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
597   Mat         sX, sY;
598 
599   PetscFunctionBegin;
600   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
601      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
602      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
603   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
604   PetscCall(MatDenseGetLocalMatrix(X, &sX));
605   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
606   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
607   PetscFunctionReturn(0);
608 }
609 
610 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y) {
611   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
612   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
613   PetscScalar            *y_array;
614   const PetscScalar      *x_array;
615   PC                      subpc;
616 
617   PetscFunctionBegin;
618   /*
619       The VecPlaceArray() is to avoid having to copy the
620     y vector into the bjac->x vector. The reason for
621     the bjac->x vector is that we need a sequential vector
622     for the sequential solve.
623   */
624   PetscCall(VecGetArrayRead(x, &x_array));
625   PetscCall(VecGetArray(y, &y_array));
626   PetscCall(VecPlaceArray(bjac->x, x_array));
627   PetscCall(VecPlaceArray(bjac->y, y_array));
628   /* apply the symmetric left portion of the inner PC operator */
629   /* note this by-passes the inner KSP and its options completely */
630   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
631   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
632   PetscCall(VecResetArray(bjac->x));
633   PetscCall(VecResetArray(bjac->y));
634   PetscCall(VecRestoreArrayRead(x, &x_array));
635   PetscCall(VecRestoreArray(y, &y_array));
636   PetscFunctionReturn(0);
637 }
638 
639 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y) {
640   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
641   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
642   PetscScalar            *y_array;
643   const PetscScalar      *x_array;
644   PC                      subpc;
645 
646   PetscFunctionBegin;
647   /*
648       The VecPlaceArray() is to avoid having to copy the
649     y vector into the bjac->x vector. The reason for
650     the bjac->x vector is that we need a sequential vector
651     for the sequential solve.
652   */
653   PetscCall(VecGetArrayRead(x, &x_array));
654   PetscCall(VecGetArray(y, &y_array));
655   PetscCall(VecPlaceArray(bjac->x, x_array));
656   PetscCall(VecPlaceArray(bjac->y, y_array));
657 
658   /* apply the symmetric right portion of the inner PC operator */
659   /* note this by-passes the inner KSP and its options completely */
660 
661   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
662   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
663 
664   PetscCall(VecRestoreArrayRead(x, &x_array));
665   PetscCall(VecRestoreArray(y, &y_array));
666   PetscFunctionReturn(0);
667 }
668 
669 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y) {
670   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
671   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
672   PetscScalar            *y_array;
673   const PetscScalar      *x_array;
674 
675   PetscFunctionBegin;
676   /*
677       The VecPlaceArray() is to avoid having to copy the
678     y vector into the bjac->x vector. The reason for
679     the bjac->x vector is that we need a sequential vector
680     for the sequential solve.
681   */
682   PetscCall(VecGetArrayRead(x, &x_array));
683   PetscCall(VecGetArray(y, &y_array));
684   PetscCall(VecPlaceArray(bjac->x, x_array));
685   PetscCall(VecPlaceArray(bjac->y, y_array));
686   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
687   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
688   PetscCall(VecResetArray(bjac->x));
689   PetscCall(VecResetArray(bjac->y));
690   PetscCall(VecRestoreArrayRead(x, &x_array));
691   PetscCall(VecRestoreArray(y, &y_array));
692   PetscFunctionReturn(0);
693 }
694 
695 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat) {
696   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
697   PetscInt                m;
698   KSP                     ksp;
699   PC_BJacobi_Singleblock *bjac;
700   PetscBool               wasSetup = PETSC_TRUE;
701   VecType                 vectype;
702   const char             *prefix;
703 
704   PetscFunctionBegin;
705   if (!pc->setupcalled) {
706     if (!jac->ksp) {
707       wasSetup = PETSC_FALSE;
708 
709       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
710       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
711       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
712       PetscCall(KSPSetType(ksp, KSPPREONLY));
713       PetscCall(PCGetOptionsPrefix(pc, &prefix));
714       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
715       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
716 
717       pc->ops->reset               = PCReset_BJacobi_Singleblock;
718       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
719       pc->ops->apply               = PCApply_BJacobi_Singleblock;
720       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
721       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
722       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
723       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
724       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
725 
726       PetscCall(PetscMalloc1(1, &jac->ksp));
727       jac->ksp[0] = ksp;
728 
729       PetscCall(PetscNew(&bjac));
730       jac->data = (void *)bjac;
731     } else {
732       ksp  = jac->ksp[0];
733       bjac = (PC_BJacobi_Singleblock *)jac->data;
734     }
735 
736     /*
737       The reason we need to generate these vectors is to serve
738       as the right-hand side and solution vector for the solve on the
739       block. We do not need to allocate space for the vectors since
740       that is provided via VecPlaceArray() just before the call to
741       KSPSolve() on the block.
742     */
743     PetscCall(MatGetSize(pmat, &m, &m));
744     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
745     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
746     PetscCall(MatGetVecType(pmat, &vectype));
747     PetscCall(VecSetType(bjac->x, vectype));
748     PetscCall(VecSetType(bjac->y, vectype));
749   } else {
750     ksp  = jac->ksp[0];
751     bjac = (PC_BJacobi_Singleblock *)jac->data;
752   }
753   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
754   if (pc->useAmat) {
755     PetscCall(KSPSetOperators(ksp, mat, pmat));
756     PetscCall(MatSetOptionsPrefix(mat, prefix));
757   } else {
758     PetscCall(KSPSetOperators(ksp, pmat, pmat));
759   }
760   PetscCall(MatSetOptionsPrefix(pmat, prefix));
761   if (!wasSetup && pc->setfromoptionscalled) {
762     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
763     PetscCall(KSPSetFromOptions(ksp));
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc) {
769   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
770   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
771   PetscInt               i;
772 
773   PetscFunctionBegin;
774   if (bjac && bjac->pmat) {
775     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
776     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
777   }
778 
779   for (i = 0; i < jac->n_local; i++) {
780     PetscCall(KSPReset(jac->ksp[i]));
781     if (bjac && bjac->x) {
782       PetscCall(VecDestroy(&bjac->x[i]));
783       PetscCall(VecDestroy(&bjac->y[i]));
784       PetscCall(ISDestroy(&bjac->is[i]));
785     }
786   }
787   PetscCall(PetscFree(jac->l_lens));
788   PetscCall(PetscFree(jac->g_lens));
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc) {
793   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
794   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
795   PetscInt               i;
796 
797   PetscFunctionBegin;
798   PetscCall(PCReset_BJacobi_Multiblock(pc));
799   if (bjac) {
800     PetscCall(PetscFree2(bjac->x, bjac->y));
801     PetscCall(PetscFree(bjac->starts));
802     PetscCall(PetscFree(bjac->is));
803   }
804   PetscCall(PetscFree(jac->data));
805   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
806   PetscCall(PetscFree(jac->ksp));
807   PetscCall(PCDestroy_BJacobi(pc));
808   PetscFunctionReturn(0);
809 }
810 
811 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc) {
812   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
813   PetscInt           i, n_local = jac->n_local;
814   KSPConvergedReason reason;
815 
816   PetscFunctionBegin;
817   for (i = 0; i < n_local; i++) {
818     PetscCall(KSPSetUp(jac->ksp[i]));
819     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
820     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 /*
826       Preconditioner for block Jacobi
827 */
828 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y) {
829   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
830   PetscInt               i, n_local = jac->n_local;
831   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
832   PetscScalar           *yin;
833   const PetscScalar     *xin;
834 
835   PetscFunctionBegin;
836   PetscCall(VecGetArrayRead(x, &xin));
837   PetscCall(VecGetArray(y, &yin));
838   for (i = 0; i < n_local; i++) {
839     /*
840        To avoid copying the subvector from x into a workspace we instead
841        make the workspace vector array point to the subpart of the array of
842        the global vector.
843     */
844     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
845     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
846 
847     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
848     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
849     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
850     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
851 
852     PetscCall(VecResetArray(bjac->x[i]));
853     PetscCall(VecResetArray(bjac->y[i]));
854   }
855   PetscCall(VecRestoreArrayRead(x, &xin));
856   PetscCall(VecRestoreArray(y, &yin));
857   PetscFunctionReturn(0);
858 }
859 
860 /*
861       Preconditioner for block Jacobi
862 */
863 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y) {
864   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
865   PetscInt               i, n_local = jac->n_local;
866   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
867   PetscScalar           *yin;
868   const PetscScalar     *xin;
869 
870   PetscFunctionBegin;
871   PetscCall(VecGetArrayRead(x, &xin));
872   PetscCall(VecGetArray(y, &yin));
873   for (i = 0; i < n_local; i++) {
874     /*
875        To avoid copying the subvector from x into a workspace we instead
876        make the workspace vector array point to the subpart of the array of
877        the global vector.
878     */
879     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
880     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
881 
882     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
883     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
884     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
885     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
886 
887     PetscCall(VecResetArray(bjac->x[i]));
888     PetscCall(VecResetArray(bjac->y[i]));
889   }
890   PetscCall(VecRestoreArrayRead(x, &xin));
891   PetscCall(VecRestoreArray(y, &yin));
892   PetscFunctionReturn(0);
893 }
894 
895 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat) {
896   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
897   PetscInt               m, n_local, N, M, start, i;
898   const char            *prefix;
899   KSP                    ksp;
900   Vec                    x, y;
901   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
902   PC                     subpc;
903   IS                     is;
904   MatReuse               scall;
905   VecType                vectype;
906 
907   PetscFunctionBegin;
908   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
909 
910   n_local = jac->n_local;
911 
912   if (pc->useAmat) {
913     PetscBool same;
914     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
915     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
916   }
917 
918   if (!pc->setupcalled) {
919     scall = MAT_INITIAL_MATRIX;
920 
921     if (!jac->ksp) {
922       pc->ops->reset          = PCReset_BJacobi_Multiblock;
923       pc->ops->destroy        = PCDestroy_BJacobi_Multiblock;
924       pc->ops->apply          = PCApply_BJacobi_Multiblock;
925       pc->ops->matapply       = NULL;
926       pc->ops->applytranspose = PCApplyTranspose_BJacobi_Multiblock;
927       pc->ops->setuponblocks  = PCSetUpOnBlocks_BJacobi_Multiblock;
928 
929       PetscCall(PetscNew(&bjac));
930       PetscCall(PetscMalloc1(n_local, &jac->ksp));
931       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
932       PetscCall(PetscMalloc1(n_local, &bjac->starts));
933 
934       jac->data = (void *)bjac;
935       PetscCall(PetscMalloc1(n_local, &bjac->is));
936 
937       for (i = 0; i < n_local; i++) {
938         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
939         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
940         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
941         PetscCall(KSPSetType(ksp, KSPPREONLY));
942         PetscCall(KSPGetPC(ksp, &subpc));
943         PetscCall(PCGetOptionsPrefix(pc, &prefix));
944         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
945         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
946 
947         jac->ksp[i] = ksp;
948       }
949     } else {
950       bjac = (PC_BJacobi_Multiblock *)jac->data;
951     }
952 
953     start = 0;
954     PetscCall(MatGetVecType(pmat, &vectype));
955     for (i = 0; i < n_local; i++) {
956       m = jac->l_lens[i];
957       /*
958       The reason we need to generate these vectors is to serve
959       as the right-hand side and solution vector for the solve on the
960       block. We do not need to allocate space for the vectors since
961       that is provided via VecPlaceArray() just before the call to
962       KSPSolve() on the block.
963 
964       */
965       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
966       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
967       PetscCall(VecSetType(x, vectype));
968       PetscCall(VecSetType(y, vectype));
969 
970       bjac->x[i]      = x;
971       bjac->y[i]      = y;
972       bjac->starts[i] = start;
973 
974       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
975       bjac->is[i] = is;
976 
977       start += m;
978     }
979   } else {
980     bjac = (PC_BJacobi_Multiblock *)jac->data;
981     /*
982        Destroy the blocks from the previous iteration
983     */
984     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
985       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
986       if (pc->useAmat) PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
987       scall = MAT_INITIAL_MATRIX;
988     } else scall = MAT_REUSE_MATRIX;
989   }
990 
991   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
992   if (pc->useAmat) PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
993   /* Return control to the user so that the submatrices can be modified (e.g., to apply
994      different boundary conditions for the submatrices than for the global problem) */
995   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
996 
997   for (i = 0; i < n_local; i++) {
998     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
999     if (pc->useAmat) {
1000       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1001       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1002     } else {
1003       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1004     }
1005     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1006     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1007   }
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 /*
1012       These are for a single block with multiple processes
1013 */
1014 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc) {
1015   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1016   KSP                subksp = jac->ksp[0];
1017   KSPConvergedReason reason;
1018 
1019   PetscFunctionBegin;
1020   PetscCall(KSPSetUp(subksp));
1021   PetscCall(KSPGetConvergedReason(subksp, &reason));
1022   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc) {
1027   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1028   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(VecDestroy(&mpjac->ysub));
1032   PetscCall(VecDestroy(&mpjac->xsub));
1033   PetscCall(MatDestroy(&mpjac->submats));
1034   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc) {
1039   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1040   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1041 
1042   PetscFunctionBegin;
1043   PetscCall(PCReset_BJacobi_Multiproc(pc));
1044   PetscCall(KSPDestroy(&jac->ksp[0]));
1045   PetscCall(PetscFree(jac->ksp));
1046   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1047 
1048   PetscCall(PetscFree(mpjac));
1049   PetscCall(PCDestroy_BJacobi(pc));
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y) {
1054   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1055   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1056   PetscScalar          *yarray;
1057   const PetscScalar    *xarray;
1058   KSPConvergedReason    reason;
1059 
1060   PetscFunctionBegin;
1061   /* place x's and y's local arrays into xsub and ysub */
1062   PetscCall(VecGetArrayRead(x, &xarray));
1063   PetscCall(VecGetArray(y, &yarray));
1064   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1065   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1066 
1067   /* apply preconditioner on each matrix block */
1068   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1069   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1070   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1071   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1072   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1073   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1074 
1075   PetscCall(VecResetArray(mpjac->xsub));
1076   PetscCall(VecResetArray(mpjac->ysub));
1077   PetscCall(VecRestoreArrayRead(x, &xarray));
1078   PetscCall(VecRestoreArray(y, &yarray));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y) {
1083   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1084   KSPConvergedReason reason;
1085   Mat                sX, sY;
1086   const PetscScalar *x;
1087   PetscScalar       *y;
1088   PetscInt           m, N, lda, ldb;
1089 
1090   PetscFunctionBegin;
1091   /* apply preconditioner on each matrix block */
1092   PetscCall(MatGetLocalSize(X, &m, NULL));
1093   PetscCall(MatGetSize(X, NULL, &N));
1094   PetscCall(MatDenseGetLDA(X, &lda));
1095   PetscCall(MatDenseGetLDA(Y, &ldb));
1096   PetscCall(MatDenseGetArrayRead(X, &x));
1097   PetscCall(MatDenseGetArrayWrite(Y, &y));
1098   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1099   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1100   PetscCall(MatDenseSetLDA(sX, lda));
1101   PetscCall(MatDenseSetLDA(sY, ldb));
1102   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1103   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1104   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1105   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1106   PetscCall(MatDestroy(&sY));
1107   PetscCall(MatDestroy(&sX));
1108   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1109   PetscCall(MatDenseRestoreArrayRead(X, &x));
1110   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1111   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc) {
1116   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1117   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1118   PetscInt              m, n;
1119   MPI_Comm              comm, subcomm = 0;
1120   const char           *prefix;
1121   PetscBool             wasSetup = PETSC_TRUE;
1122   VecType               vectype;
1123 
1124   PetscFunctionBegin;
1125   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1126   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1127   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1128   if (!pc->setupcalled) {
1129     wasSetup = PETSC_FALSE;
1130     PetscCall(PetscNew(&mpjac));
1131     jac->data = (void *)mpjac;
1132 
1133     /* initialize datastructure mpjac */
1134     if (!jac->psubcomm) {
1135       /* Create default contiguous subcommunicatiors if user does not provide them */
1136       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1137       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1138       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1139     }
1140     mpjac->psubcomm = jac->psubcomm;
1141     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1142 
1143     /* Get matrix blocks of pmat */
1144     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1145 
1146     /* create a new PC that processors in each subcomm have copy of */
1147     PetscCall(PetscMalloc1(1, &jac->ksp));
1148     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1149     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1150     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1151     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1152     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1153 
1154     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1155     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1156     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1157     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1158     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1159 
1160     /* create dummy vectors xsub and ysub */
1161     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1162     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1163     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1164     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1165     PetscCall(VecSetType(mpjac->xsub, vectype));
1166     PetscCall(VecSetType(mpjac->ysub, vectype));
1167 
1168     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1169     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1170     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1171     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1172     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1173   } else { /* pc->setupcalled */
1174     subcomm = PetscSubcommChild(mpjac->psubcomm);
1175     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1176       /* destroy old matrix blocks, then get new matrix blocks */
1177       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1178       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1179     } else {
1180       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1181     }
1182     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1183   }
1184 
1185   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1186   PetscFunctionReturn(0);
1187 }
1188