xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3    Defines a block Jacobi preconditioner.
4 */
5 
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
7 
8 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
10 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
11 
12 static PetscErrorCode PCSetUp_BJacobi(PC pc) {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(0);
29   }
30 
31   /* --------------------------------------------------------------------------
32       Determines the number of blocks assigned to each processor
33   -----------------------------------------------------------------------------*/
34 
35   /*   local block count  given */
36   if (jac->n_local > 0 && jac->n < 0) {
37     PetscCall(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
38     if (jac->l_lens) { /* check that user set these correctly */
39       sum = 0;
40       for (i = 0; i < jac->n_local; i++) {
41         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
42         sum += jac->l_lens[i];
43       }
44       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
45     } else {
46       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
47       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
48     }
49   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
50     /* global blocks given: determine which ones are local */
51     if (jac->g_lens) {
52       /* check if the g_lens is has valid entries */
53       for (i = 0; i < jac->n; i++) {
54         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
55         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
56       }
57       if (size == 1) {
58         jac->n_local = jac->n;
59         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
60         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
61         /* check that user set these correctly */
62         sum = 0;
63         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
64         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
65       } else {
66         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
67         /* loop over blocks determing first one owned by me */
68         sum = 0;
69         for (i = 0; i < jac->n + 1; i++) {
70           if (sum == start) {
71             i_start = i;
72             goto start_1;
73           }
74           if (i < jac->n) sum += jac->g_lens[i];
75         }
76         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
77       start_1:
78         for (i = i_start; i < jac->n + 1; i++) {
79           if (sum == end) {
80             i_end = i;
81             goto end_1;
82           }
83           if (i < jac->n) sum += jac->g_lens[i];
84         }
85         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
86       end_1:
87         jac->n_local = i_end - i_start;
88         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
89         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
90       }
91     } else { /* no global blocks given, determine then using default layout */
92       jac->n_local = jac->n / size + ((jac->n % size) > rank);
93       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
94       for (i = 0; i < jac->n_local; i++) {
95         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
96         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
97       }
98     }
99   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
100     jac->n       = size;
101     jac->n_local = 1;
102     PetscCall(PetscMalloc1(1, &jac->l_lens));
103     jac->l_lens[0] = M;
104   } else { /* jac->n > 0 && jac->n_local > 0 */
105     if (!jac->l_lens) {
106       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
107       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
108     }
109   }
110   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
111 
112   /* -------------------------
113       Determines mat and pmat
114   ---------------------------*/
115   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
116   if (!hasop && size == 1) {
117     mat  = pc->mat;
118     pmat = pc->pmat;
119   } else {
120     if (pc->useAmat) {
121       /* use block from Amat matrix, not Pmat for local MatMult() */
122       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
123     }
124     if (pc->pmat != pc->mat || !pc->useAmat) {
125       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
126     } else pmat = mat;
127   }
128 
129   /* ------
130      Setup code depends on the number of blocks
131   */
132   if (jac->n_local == 1) {
133     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
134   } else {
135     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 /* Default destroy, if it has never been setup */
141 static PetscErrorCode PCDestroy_BJacobi(PC pc) {
142   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
143 
144   PetscFunctionBegin;
145   PetscCall(PetscFree(jac->g_lens));
146   PetscCall(PetscFree(jac->l_lens));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
148   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
149   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
150   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
151   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
152   PetscCall(PetscFree(pc->data));
153   PetscFunctionReturn(0);
154 }
155 
156 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject) {
157   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
158   PetscInt    blocks, i;
159   PetscBool   flg;
160 
161   PetscFunctionBegin;
162   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
163   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
164   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
165   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
166   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
167   if (jac->ksp) {
168     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
169      * unless we had already been called. */
170     for (i = 0; i < jac->n_local; i++) { PetscCall(KSPSetFromOptions(jac->ksp[i])); }
171   }
172   PetscOptionsHeadEnd();
173   PetscFunctionReturn(0);
174 }
175 
176 #include <petscdraw.h>
177 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer) {
178   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
179   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
180   PetscMPIInt           rank;
181   PetscInt              i;
182   PetscBool             iascii, isstring, isdraw;
183   PetscViewer           sviewer;
184   PetscViewerFormat     format;
185   const char           *prefix;
186 
187   PetscFunctionBegin;
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
189   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
190   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
191   if (iascii) {
192     if (pc->useAmat) { PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n)); }
193     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
194     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
195     PetscCall(PetscViewerGetFormat(viewer, &format));
196     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
198       PetscCall(PCGetOptionsPrefix(pc, &prefix));
199       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
200       if (jac->ksp && !jac->psubcomm) {
201         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
202         if (rank == 0) {
203           PetscCall(PetscViewerASCIIPushTab(viewer));
204           PetscCall(KSPView(jac->ksp[0], sviewer));
205           PetscCall(PetscViewerASCIIPopTab(viewer));
206         }
207         PetscCall(PetscViewerFlush(sviewer));
208         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
209         PetscCall(PetscViewerFlush(viewer));
210         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
211         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
212       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
213         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
214         if (!mpjac->psubcomm->color) {
215           PetscCall(PetscViewerASCIIPushTab(viewer));
216           PetscCall(KSPView(*(jac->ksp), sviewer));
217           PetscCall(PetscViewerASCIIPopTab(viewer));
218         }
219         PetscCall(PetscViewerFlush(sviewer));
220         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
221         PetscCall(PetscViewerFlush(viewer));
222         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
223         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
224       } else {
225         PetscCall(PetscViewerFlush(viewer));
226       }
227     } else {
228       PetscInt n_global;
229       PetscCall(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
230       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
231       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
232       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
233       PetscCall(PetscViewerASCIIPushTab(viewer));
234       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
235       for (i = 0; i < jac->n_local; i++) {
236         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
237         PetscCall(KSPView(jac->ksp[i], sviewer));
238         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "- - - - - - - - - - - - - - - - - -\n"));
239       }
240       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
241       PetscCall(PetscViewerASCIIPopTab(viewer));
242       PetscCall(PetscViewerFlush(viewer));
243       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
244     }
245   } else if (isstring) {
246     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
247     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
248     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
249     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
250   } else if (isdraw) {
251     PetscDraw draw;
252     char      str[25];
253     PetscReal x, y, bottom, h;
254 
255     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
256     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
257     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
258     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
259     bottom = y - h;
260     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
261     /* warning the communicator on viewer is different then on ksp in parallel */
262     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
263     PetscCall(PetscDrawPopCurrentPoint(draw));
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 /* -------------------------------------------------------------------------------------*/
269 
270 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp) {
271   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
272 
273   PetscFunctionBegin;
274   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
275 
276   if (n_local) *n_local = jac->n_local;
277   if (first_local) *first_local = jac->first_local;
278   if (ksp) *ksp = jac->ksp;
279   PetscFunctionReturn(0);
280 }
281 
282 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, PetscInt *lens) {
283   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
284 
285   PetscFunctionBegin;
286   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
287   jac->n = blocks;
288   if (!lens) jac->g_lens = NULL;
289   else {
290     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
291     PetscCall(PetscLogObjectMemory((PetscObject)pc, blocks * sizeof(PetscInt)));
292     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) {
298   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
299 
300   PetscFunctionBegin;
301   *blocks = jac->n;
302   if (lens) *lens = jac->g_lens;
303   PetscFunctionReturn(0);
304 }
305 
306 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[]) {
307   PC_BJacobi *jac;
308 
309   PetscFunctionBegin;
310   jac = (PC_BJacobi *)pc->data;
311 
312   jac->n_local = blocks;
313   if (!lens) jac->l_lens = NULL;
314   else {
315     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
316     PetscCall(PetscLogObjectMemory((PetscObject)pc, blocks * sizeof(PetscInt)));
317     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) {
323   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
324 
325   PetscFunctionBegin;
326   *blocks = jac->n_local;
327   if (lens) *lens = jac->l_lens;
328   PetscFunctionReturn(0);
329 }
330 
331 /* -------------------------------------------------------------------------------------*/
332 
333 /*@C
334    PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on
335    this processor.
336 
337    Not Collective
338 
339    Input Parameter:
340 .  pc - the preconditioner context
341 
342    Output Parameters:
343 +  n_local - the number of blocks on this processor, or NULL
344 .  first_local - the global number of the first block on this processor, or NULL
345 -  ksp - the array of KSP contexts
346 
347    Notes:
348    After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed.
349 
350    Currently for some matrix implementations only 1 block per processor
351    is supported.
352 
353    You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP().
354 
355    Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs.
356       You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the
357       KSP array must be.
358 
359    Level: advanced
360 
361 .seealso: `PCASMGetSubKSP()`
362 @*/
363 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[]) {
364   PetscFunctionBegin;
365   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
366   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
367   PetscFunctionReturn(0);
368 }
369 
370 /*@
371    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
372    Jacobi preconditioner.
373 
374    Collective on PC
375 
376    Input Parameters:
377 +  pc - the preconditioner context
378 .  blocks - the number of blocks
379 -  lens - [optional] integer array containing the size of each block
380 
381    Options Database Key:
382 .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
383 
384    Notes:
385    Currently only a limited number of blocking configurations are supported.
386    All processors sharing the PC must call this routine with the same data.
387 
388    Level: intermediate
389 
390 .seealso: `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
391 @*/
392 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) {
393   PetscFunctionBegin;
394   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
395   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
396   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
397   PetscFunctionReturn(0);
398 }
399 
400 /*@C
401    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
402    Jacobi preconditioner.
403 
404    Not Collective
405 
406    Input Parameter:
407 .  pc - the preconditioner context
408 
409    Output parameters:
410 +  blocks - the number of blocks
411 -  lens - integer array containing the size of each block
412 
413    Level: intermediate
414 
415 .seealso: `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
416 @*/
417 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) {
418   PetscFunctionBegin;
419   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
420   PetscValidIntPointer(blocks, 2);
421   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
422   PetscFunctionReturn(0);
423 }
424 
425 /*@
426    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
427    Jacobi preconditioner.
428 
429    Not Collective
430 
431    Input Parameters:
432 +  pc - the preconditioner context
433 .  blocks - the number of blocks
434 -  lens - [optional] integer array containing size of each block
435 
436    Options Database Key:
437 .  -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
438 
439    Note:
440    Currently only a limited number of blocking configurations are supported.
441 
442    Level: intermediate
443 
444 .seealso: `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
445 @*/
446 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) {
447   PetscFunctionBegin;
448   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
449   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
450   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
451   PetscFunctionReturn(0);
452 }
453 
454 /*@C
455    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
456    Jacobi preconditioner.
457 
458    Not Collective
459 
460    Input Parameters:
461 +  pc - the preconditioner context
462 .  blocks - the number of blocks
463 -  lens - [optional] integer array containing size of each block
464 
465    Note:
466    Currently only a limited number of blocking configurations are supported.
467 
468    Level: intermediate
469 
470 .seealso: `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
471 @*/
472 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) {
473   PetscFunctionBegin;
474   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
475   PetscValidIntPointer(blocks, 2);
476   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
477   PetscFunctionReturn(0);
478 }
479 
480 /* -----------------------------------------------------------------------------------*/
481 
482 /*MC
483    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
484            its own KSP object.
485 
486    Options Database Keys:
487 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
488 -  -pc_bjacobi_blocks <n> - use n total blocks
489 
490    Notes:
491     See PCJACOBI for diagonal Jacobi, PCVPBJACOBI for variable point block, and PCPBJACOBI for fixed size point block
492 
493     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
494 
495      To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC
496         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
497 
498      To set the options on the solvers separate for each block call PCBJacobiGetSubKSP()
499          and set the options directly on the resulting KSP object (you can access its PC
500          KSPGetPC())
501 
502      For GPU-based vectors (CUDA, ViennaCL) it is recommended to use exactly one block per MPI process for best
503          performance.  Different block partitioning may lead to additional data transfers
504          between host and GPU that lead to degraded performance.
505 
506      The options prefix for each block is sub_, for example -sub_pc_type lu.
507 
508      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
509 
510      See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCPBJACOBI for large blocks
511 
512    Level: beginner
513 
514 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
515           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
516           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
517 M*/
518 
519 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc) {
520   PetscMPIInt rank;
521   PC_BJacobi *jac;
522 
523   PetscFunctionBegin;
524   PetscCall(PetscNewLog(pc, &jac));
525   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
526 
527   pc->ops->apply           = NULL;
528   pc->ops->matapply        = NULL;
529   pc->ops->applytranspose  = NULL;
530   pc->ops->setup           = PCSetUp_BJacobi;
531   pc->ops->destroy         = PCDestroy_BJacobi;
532   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
533   pc->ops->view            = PCView_BJacobi;
534   pc->ops->applyrichardson = NULL;
535 
536   pc->data         = (void *)jac;
537   jac->n           = -1;
538   jac->n_local     = -1;
539   jac->first_local = rank;
540   jac->ksp         = NULL;
541   jac->g_lens      = NULL;
542   jac->l_lens      = NULL;
543   jac->psubcomm    = NULL;
544 
545   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
546   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
547   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
548   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
549   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
550   PetscFunctionReturn(0);
551 }
552 
553 /* --------------------------------------------------------------------------------------------*/
554 /*
555         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
556 */
557 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc) {
558   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
559   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
560 
561   PetscFunctionBegin;
562   PetscCall(KSPReset(jac->ksp[0]));
563   PetscCall(VecDestroy(&bjac->x));
564   PetscCall(VecDestroy(&bjac->y));
565   PetscFunctionReturn(0);
566 }
567 
568 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc) {
569   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
570   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
571 
572   PetscFunctionBegin;
573   PetscCall(PCReset_BJacobi_Singleblock(pc));
574   PetscCall(KSPDestroy(&jac->ksp[0]));
575   PetscCall(PetscFree(jac->ksp));
576   PetscCall(PetscFree(bjac));
577   PetscCall(PCDestroy_BJacobi(pc));
578   PetscFunctionReturn(0);
579 }
580 
581 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc) {
582   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
583   KSP                subksp = jac->ksp[0];
584   KSPConvergedReason reason;
585 
586   PetscFunctionBegin;
587   PetscCall(KSPSetUp(subksp));
588   PetscCall(KSPGetConvergedReason(subksp, &reason));
589   if (reason == KSP_DIVERGED_PC_FAILED) { pc->failedreason = PC_SUBPC_ERROR; }
590   PetscFunctionReturn(0);
591 }
592 
593 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y) {
594   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
595   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
596 
597   PetscFunctionBegin;
598   PetscCall(VecGetLocalVectorRead(x, bjac->x));
599   PetscCall(VecGetLocalVector(y, bjac->y));
600   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
601      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
602      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
603   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
604   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
605   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
606   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
607   PetscCall(VecRestoreLocalVector(y, bjac->y));
608   PetscFunctionReturn(0);
609 }
610 
611 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y) {
612   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
613   Mat         sX, sY;
614 
615   PetscFunctionBegin;
616   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
617      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
618      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
619   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
620   PetscCall(MatDenseGetLocalMatrix(X, &sX));
621   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
622   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
623   PetscFunctionReturn(0);
624 }
625 
626 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y) {
627   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
628   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
629   PetscScalar            *y_array;
630   const PetscScalar      *x_array;
631   PC                      subpc;
632 
633   PetscFunctionBegin;
634   /*
635       The VecPlaceArray() is to avoid having to copy the
636     y vector into the bjac->x vector. The reason for
637     the bjac->x vector is that we need a sequential vector
638     for the sequential solve.
639   */
640   PetscCall(VecGetArrayRead(x, &x_array));
641   PetscCall(VecGetArray(y, &y_array));
642   PetscCall(VecPlaceArray(bjac->x, x_array));
643   PetscCall(VecPlaceArray(bjac->y, y_array));
644   /* apply the symmetric left portion of the inner PC operator */
645   /* note this by-passes the inner KSP and its options completely */
646   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
647   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
648   PetscCall(VecResetArray(bjac->x));
649   PetscCall(VecResetArray(bjac->y));
650   PetscCall(VecRestoreArrayRead(x, &x_array));
651   PetscCall(VecRestoreArray(y, &y_array));
652   PetscFunctionReturn(0);
653 }
654 
655 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y) {
656   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
657   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
658   PetscScalar            *y_array;
659   const PetscScalar      *x_array;
660   PC                      subpc;
661 
662   PetscFunctionBegin;
663   /*
664       The VecPlaceArray() is to avoid having to copy the
665     y vector into the bjac->x vector. The reason for
666     the bjac->x vector is that we need a sequential vector
667     for the sequential solve.
668   */
669   PetscCall(VecGetArrayRead(x, &x_array));
670   PetscCall(VecGetArray(y, &y_array));
671   PetscCall(VecPlaceArray(bjac->x, x_array));
672   PetscCall(VecPlaceArray(bjac->y, y_array));
673 
674   /* apply the symmetric right portion of the inner PC operator */
675   /* note this by-passes the inner KSP and its options completely */
676 
677   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
678   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
679 
680   PetscCall(VecRestoreArrayRead(x, &x_array));
681   PetscCall(VecRestoreArray(y, &y_array));
682   PetscFunctionReturn(0);
683 }
684 
685 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y) {
686   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
687   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
688   PetscScalar            *y_array;
689   const PetscScalar      *x_array;
690 
691   PetscFunctionBegin;
692   /*
693       The VecPlaceArray() is to avoid having to copy the
694     y vector into the bjac->x vector. The reason for
695     the bjac->x vector is that we need a sequential vector
696     for the sequential solve.
697   */
698   PetscCall(VecGetArrayRead(x, &x_array));
699   PetscCall(VecGetArray(y, &y_array));
700   PetscCall(VecPlaceArray(bjac->x, x_array));
701   PetscCall(VecPlaceArray(bjac->y, y_array));
702   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
703   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
704   PetscCall(VecResetArray(bjac->x));
705   PetscCall(VecResetArray(bjac->y));
706   PetscCall(VecRestoreArrayRead(x, &x_array));
707   PetscCall(VecRestoreArray(y, &y_array));
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat) {
712   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
713   PetscInt                m;
714   KSP                     ksp;
715   PC_BJacobi_Singleblock *bjac;
716   PetscBool               wasSetup = PETSC_TRUE;
717   VecType                 vectype;
718   const char             *prefix;
719 
720   PetscFunctionBegin;
721   if (!pc->setupcalled) {
722     if (!jac->ksp) {
723       wasSetup = PETSC_FALSE;
724 
725       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
726       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
727       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
728       PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)ksp));
729       PetscCall(KSPSetType(ksp, KSPPREONLY));
730       PetscCall(PCGetOptionsPrefix(pc, &prefix));
731       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
732       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
733 
734       pc->ops->reset               = PCReset_BJacobi_Singleblock;
735       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
736       pc->ops->apply               = PCApply_BJacobi_Singleblock;
737       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
738       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
739       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
740       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
741       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
742 
743       PetscCall(PetscMalloc1(1, &jac->ksp));
744       jac->ksp[0] = ksp;
745 
746       PetscCall(PetscNewLog(pc, &bjac));
747       jac->data = (void *)bjac;
748     } else {
749       ksp  = jac->ksp[0];
750       bjac = (PC_BJacobi_Singleblock *)jac->data;
751     }
752 
753     /*
754       The reason we need to generate these vectors is to serve
755       as the right-hand side and solution vector for the solve on the
756       block. We do not need to allocate space for the vectors since
757       that is provided via VecPlaceArray() just before the call to
758       KSPSolve() on the block.
759     */
760     PetscCall(MatGetSize(pmat, &m, &m));
761     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
762     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
763     PetscCall(MatGetVecType(pmat, &vectype));
764     PetscCall(VecSetType(bjac->x, vectype));
765     PetscCall(VecSetType(bjac->y, vectype));
766     PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)bjac->x));
767     PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)bjac->y));
768   } else {
769     ksp  = jac->ksp[0];
770     bjac = (PC_BJacobi_Singleblock *)jac->data;
771   }
772   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
773   if (pc->useAmat) {
774     PetscCall(KSPSetOperators(ksp, mat, pmat));
775     PetscCall(MatSetOptionsPrefix(mat, prefix));
776   } else {
777     PetscCall(KSPSetOperators(ksp, pmat, pmat));
778   }
779   PetscCall(MatSetOptionsPrefix(pmat, prefix));
780   if (!wasSetup && pc->setfromoptionscalled) {
781     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
782     PetscCall(KSPSetFromOptions(ksp));
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 /* ---------------------------------------------------------------------------------------------*/
788 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc) {
789   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
790   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
791   PetscInt               i;
792 
793   PetscFunctionBegin;
794   if (bjac && bjac->pmat) {
795     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
796     if (pc->useAmat) { PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat)); }
797   }
798 
799   for (i = 0; i < jac->n_local; i++) {
800     PetscCall(KSPReset(jac->ksp[i]));
801     if (bjac && bjac->x) {
802       PetscCall(VecDestroy(&bjac->x[i]));
803       PetscCall(VecDestroy(&bjac->y[i]));
804       PetscCall(ISDestroy(&bjac->is[i]));
805     }
806   }
807   PetscCall(PetscFree(jac->l_lens));
808   PetscCall(PetscFree(jac->g_lens));
809   PetscFunctionReturn(0);
810 }
811 
812 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc) {
813   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
814   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
815   PetscInt               i;
816 
817   PetscFunctionBegin;
818   PetscCall(PCReset_BJacobi_Multiblock(pc));
819   if (bjac) {
820     PetscCall(PetscFree2(bjac->x, bjac->y));
821     PetscCall(PetscFree(bjac->starts));
822     PetscCall(PetscFree(bjac->is));
823   }
824   PetscCall(PetscFree(jac->data));
825   for (i = 0; i < jac->n_local; i++) { PetscCall(KSPDestroy(&jac->ksp[i])); }
826   PetscCall(PetscFree(jac->ksp));
827   PetscCall(PCDestroy_BJacobi(pc));
828   PetscFunctionReturn(0);
829 }
830 
831 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc) {
832   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
833   PetscInt           i, n_local = jac->n_local;
834   KSPConvergedReason reason;
835 
836   PetscFunctionBegin;
837   for (i = 0; i < n_local; i++) {
838     PetscCall(KSPSetUp(jac->ksp[i]));
839     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
840     if (reason == KSP_DIVERGED_PC_FAILED) { pc->failedreason = PC_SUBPC_ERROR; }
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 /*
846       Preconditioner for block Jacobi
847 */
848 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y) {
849   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
850   PetscInt               i, n_local = jac->n_local;
851   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
852   PetscScalar           *yin;
853   const PetscScalar     *xin;
854 
855   PetscFunctionBegin;
856   PetscCall(VecGetArrayRead(x, &xin));
857   PetscCall(VecGetArray(y, &yin));
858   for (i = 0; i < n_local; i++) {
859     /*
860        To avoid copying the subvector from x into a workspace we instead
861        make the workspace vector array point to the subpart of the array of
862        the global vector.
863     */
864     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
865     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
866 
867     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
868     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
869     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
870     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
871 
872     PetscCall(VecResetArray(bjac->x[i]));
873     PetscCall(VecResetArray(bjac->y[i]));
874   }
875   PetscCall(VecRestoreArrayRead(x, &xin));
876   PetscCall(VecRestoreArray(y, &yin));
877   PetscFunctionReturn(0);
878 }
879 
880 /*
881       Preconditioner for block Jacobi
882 */
883 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y) {
884   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
885   PetscInt               i, n_local = jac->n_local;
886   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
887   PetscScalar           *yin;
888   const PetscScalar     *xin;
889 
890   PetscFunctionBegin;
891   PetscCall(VecGetArrayRead(x, &xin));
892   PetscCall(VecGetArray(y, &yin));
893   for (i = 0; i < n_local; i++) {
894     /*
895        To avoid copying the subvector from x into a workspace we instead
896        make the workspace vector array point to the subpart of the array of
897        the global vector.
898     */
899     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
900     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
901 
902     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
903     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
904     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
905     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
906 
907     PetscCall(VecResetArray(bjac->x[i]));
908     PetscCall(VecResetArray(bjac->y[i]));
909   }
910   PetscCall(VecRestoreArrayRead(x, &xin));
911   PetscCall(VecRestoreArray(y, &yin));
912   PetscFunctionReturn(0);
913 }
914 
915 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat) {
916   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
917   PetscInt               m, n_local, N, M, start, i;
918   const char            *prefix;
919   KSP                    ksp;
920   Vec                    x, y;
921   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
922   PC                     subpc;
923   IS                     is;
924   MatReuse               scall;
925   VecType                vectype;
926 
927   PetscFunctionBegin;
928   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
929 
930   n_local = jac->n_local;
931 
932   if (pc->useAmat) {
933     PetscBool same;
934     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
935     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
936   }
937 
938   if (!pc->setupcalled) {
939     scall = MAT_INITIAL_MATRIX;
940 
941     if (!jac->ksp) {
942       pc->ops->reset          = PCReset_BJacobi_Multiblock;
943       pc->ops->destroy        = PCDestroy_BJacobi_Multiblock;
944       pc->ops->apply          = PCApply_BJacobi_Multiblock;
945       pc->ops->matapply       = NULL;
946       pc->ops->applytranspose = PCApplyTranspose_BJacobi_Multiblock;
947       pc->ops->setuponblocks  = PCSetUpOnBlocks_BJacobi_Multiblock;
948 
949       PetscCall(PetscNewLog(pc, &bjac));
950       PetscCall(PetscMalloc1(n_local, &jac->ksp));
951       PetscCall(PetscLogObjectMemory((PetscObject)pc, sizeof(n_local * sizeof(KSP))));
952       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
953       PetscCall(PetscMalloc1(n_local, &bjac->starts));
954       PetscCall(PetscLogObjectMemory((PetscObject)pc, sizeof(n_local * sizeof(PetscScalar))));
955 
956       jac->data = (void *)bjac;
957       PetscCall(PetscMalloc1(n_local, &bjac->is));
958       PetscCall(PetscLogObjectMemory((PetscObject)pc, sizeof(n_local * sizeof(IS))));
959 
960       for (i = 0; i < n_local; i++) {
961         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
962         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
963         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
964         PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)ksp));
965         PetscCall(KSPSetType(ksp, KSPPREONLY));
966         PetscCall(KSPGetPC(ksp, &subpc));
967         PetscCall(PCGetOptionsPrefix(pc, &prefix));
968         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
969         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
970 
971         jac->ksp[i] = ksp;
972       }
973     } else {
974       bjac = (PC_BJacobi_Multiblock *)jac->data;
975     }
976 
977     start = 0;
978     PetscCall(MatGetVecType(pmat, &vectype));
979     for (i = 0; i < n_local; i++) {
980       m = jac->l_lens[i];
981       /*
982       The reason we need to generate these vectors is to serve
983       as the right-hand side and solution vector for the solve on the
984       block. We do not need to allocate space for the vectors since
985       that is provided via VecPlaceArray() just before the call to
986       KSPSolve() on the block.
987 
988       */
989       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
990       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
991       PetscCall(VecSetType(x, vectype));
992       PetscCall(VecSetType(y, vectype));
993       PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)x));
994       PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)y));
995 
996       bjac->x[i]      = x;
997       bjac->y[i]      = y;
998       bjac->starts[i] = start;
999 
1000       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1001       bjac->is[i] = is;
1002       PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)is));
1003 
1004       start += m;
1005     }
1006   } else {
1007     bjac = (PC_BJacobi_Multiblock *)jac->data;
1008     /*
1009        Destroy the blocks from the previous iteration
1010     */
1011     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1012       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1013       if (pc->useAmat) { PetscCall(MatDestroyMatrices(n_local, &bjac->mat)); }
1014       scall = MAT_INITIAL_MATRIX;
1015     } else scall = MAT_REUSE_MATRIX;
1016   }
1017 
1018   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1019   if (pc->useAmat) { PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat)); }
1020   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1021      different boundary conditions for the submatrices than for the global problem) */
1022   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1023 
1024   for (i = 0; i < n_local; i++) {
1025     PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)bjac->pmat[i]));
1026     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1027     if (pc->useAmat) {
1028       PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)bjac->mat[i]));
1029       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1030       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1031     } else {
1032       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1033     }
1034     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1035     if (pc->setfromoptionscalled) { PetscCall(KSPSetFromOptions(jac->ksp[i])); }
1036   }
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 /* ---------------------------------------------------------------------------------------------*/
1041 /*
1042       These are for a single block with multiple processes
1043 */
1044 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc) {
1045   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1046   KSP                subksp = jac->ksp[0];
1047   KSPConvergedReason reason;
1048 
1049   PetscFunctionBegin;
1050   PetscCall(KSPSetUp(subksp));
1051   PetscCall(KSPGetConvergedReason(subksp, &reason));
1052   if (reason == KSP_DIVERGED_PC_FAILED) { pc->failedreason = PC_SUBPC_ERROR; }
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc) {
1057   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1058   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(VecDestroy(&mpjac->ysub));
1062   PetscCall(VecDestroy(&mpjac->xsub));
1063   PetscCall(MatDestroy(&mpjac->submats));
1064   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc) {
1069   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1070   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(PCReset_BJacobi_Multiproc(pc));
1074   PetscCall(KSPDestroy(&jac->ksp[0]));
1075   PetscCall(PetscFree(jac->ksp));
1076   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1077 
1078   PetscCall(PetscFree(mpjac));
1079   PetscCall(PCDestroy_BJacobi(pc));
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y) {
1084   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1085   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1086   PetscScalar          *yarray;
1087   const PetscScalar    *xarray;
1088   KSPConvergedReason    reason;
1089 
1090   PetscFunctionBegin;
1091   /* place x's and y's local arrays into xsub and ysub */
1092   PetscCall(VecGetArrayRead(x, &xarray));
1093   PetscCall(VecGetArray(y, &yarray));
1094   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1095   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1096 
1097   /* apply preconditioner on each matrix block */
1098   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1099   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1100   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1101   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1102   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1103   if (reason == KSP_DIVERGED_PC_FAILED) { pc->failedreason = PC_SUBPC_ERROR; }
1104 
1105   PetscCall(VecResetArray(mpjac->xsub));
1106   PetscCall(VecResetArray(mpjac->ysub));
1107   PetscCall(VecRestoreArrayRead(x, &xarray));
1108   PetscCall(VecRestoreArray(y, &yarray));
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y) {
1113   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1114   KSPConvergedReason reason;
1115   Mat                sX, sY;
1116   const PetscScalar *x;
1117   PetscScalar       *y;
1118   PetscInt           m, N, lda, ldb;
1119 
1120   PetscFunctionBegin;
1121   /* apply preconditioner on each matrix block */
1122   PetscCall(MatGetLocalSize(X, &m, NULL));
1123   PetscCall(MatGetSize(X, NULL, &N));
1124   PetscCall(MatDenseGetLDA(X, &lda));
1125   PetscCall(MatDenseGetLDA(Y, &ldb));
1126   PetscCall(MatDenseGetArrayRead(X, &x));
1127   PetscCall(MatDenseGetArrayWrite(Y, &y));
1128   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1129   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1130   PetscCall(MatDenseSetLDA(sX, lda));
1131   PetscCall(MatDenseSetLDA(sY, ldb));
1132   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1133   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1134   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1135   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1136   PetscCall(MatDestroy(&sY));
1137   PetscCall(MatDestroy(&sX));
1138   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1139   PetscCall(MatDenseRestoreArrayRead(X, &x));
1140   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1141   if (reason == KSP_DIVERGED_PC_FAILED) { pc->failedreason = PC_SUBPC_ERROR; }
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc) {
1146   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1147   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1148   PetscInt              m, n;
1149   MPI_Comm              comm, subcomm = 0;
1150   const char           *prefix;
1151   PetscBool             wasSetup = PETSC_TRUE;
1152   VecType               vectype;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1156   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1157   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1158   if (!pc->setupcalled) {
1159     wasSetup = PETSC_FALSE;
1160     PetscCall(PetscNewLog(pc, &mpjac));
1161     jac->data = (void *)mpjac;
1162 
1163     /* initialize datastructure mpjac */
1164     if (!jac->psubcomm) {
1165       /* Create default contiguous subcommunicatiors if user does not provide them */
1166       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1167       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1168       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1169       PetscCall(PetscLogObjectMemory((PetscObject)pc, sizeof(PetscSubcomm)));
1170     }
1171     mpjac->psubcomm = jac->psubcomm;
1172     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1173 
1174     /* Get matrix blocks of pmat */
1175     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1176 
1177     /* create a new PC that processors in each subcomm have copy of */
1178     PetscCall(PetscMalloc1(1, &jac->ksp));
1179     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1180     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1181     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1182     PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)jac->ksp[0]));
1183     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1184     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1185 
1186     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1187     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1188     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1189     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1190     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1191 
1192     /* create dummy vectors xsub and ysub */
1193     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1194     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1195     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1196     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1197     PetscCall(VecSetType(mpjac->xsub, vectype));
1198     PetscCall(VecSetType(mpjac->ysub, vectype));
1199     PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)mpjac->xsub));
1200     PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)mpjac->ysub));
1201 
1202     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1203     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1204     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1205     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1206     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1207   } else { /* pc->setupcalled */
1208     subcomm = PetscSubcommChild(mpjac->psubcomm);
1209     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1210       /* destroy old matrix blocks, then get new matrix blocks */
1211       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1212       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1213     } else {
1214       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1215     }
1216     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1217   }
1218 
1219   if (!wasSetup && pc->setfromoptionscalled) { PetscCall(KSPSetFromOptions(jac->ksp[0])); }
1220   PetscFunctionReturn(0);
1221 }
1222