xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision 51ece73c09ceeb024bb01a3716d2d4b0ee62fef2)
1 /*
2    Defines a block Jacobi preconditioner.
3 */
4 
5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
6 
7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
10 
11 static PetscErrorCode PCSetUp_BJacobi(PC pc)
12 {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(PETSC_SUCCESS);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCallMPI(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determining first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
148   PetscCall(PetscFree(pc->data));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems PetscOptionsObject)
153 {
154   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
155   PetscInt    blocks, i;
156   PetscBool   flg;
157 
158   PetscFunctionBegin;
159   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
160   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
162   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
163   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
164   if (jac->ksp) {
165     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
166      * unless we had already been called. */
167     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
168   }
169   PetscOptionsHeadEnd();
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 #include <petscdraw.h>
174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
175 {
176   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
177   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
178   PetscMPIInt           rank;
179   PetscInt              i;
180   PetscBool             iascii, isstring, isdraw;
181   PetscViewer           sviewer;
182   PetscViewerFormat     format;
183   const char           *prefix;
184 
185   PetscFunctionBegin;
186   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
189   if (iascii) {
190     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
191     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
193     PetscCall(PetscViewerGetFormat(viewer, &format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       PetscCall(PCGetOptionsPrefix(pc, &prefix));
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
198       if (jac->ksp && !jac->psubcomm) {
199         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
200         if (rank == 0) {
201           PetscCall(PetscViewerASCIIPushTab(sviewer));
202           PetscCall(KSPView(jac->ksp[0], sviewer));
203           PetscCall(PetscViewerASCIIPopTab(sviewer));
204         }
205         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
206         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
207         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
208       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
209         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
210         if (!mpjac->psubcomm->color) {
211           PetscCall(PetscViewerASCIIPushTab(sviewer));
212           PetscCall(KSPView(*jac->ksp, sviewer));
213           PetscCall(PetscViewerASCIIPopTab(sviewer));
214         }
215         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
216         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
217         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
218       }
219     } else {
220       PetscInt n_global;
221       PetscCallMPI(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
222       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
223       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
224       PetscCall(PetscViewerASCIIPushTab(viewer));
225       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
226       PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
227       for (i = 0; i < jac->n_local; i++) {
228         PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
229         PetscCall(KSPView(jac->ksp[i], sviewer));
230         PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n"));
231       }
232       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
233       PetscCall(PetscViewerASCIIPopTab(viewer));
234       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
235     }
236   } else if (isstring) {
237     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
238     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
239     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
240     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
241   } else if (isdraw) {
242     PetscDraw draw;
243     char      str[25];
244     PetscReal x, y, bottom, h;
245 
246     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
247     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
248     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
249     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
250     bottom = y - h;
251     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
252     /* warning the communicator on viewer is different then on ksp in parallel */
253     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
254     PetscCall(PetscDrawPopCurrentPoint(draw));
255   }
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
260 {
261   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
262 
263   PetscFunctionBegin;
264   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
265 
266   if (n_local) *n_local = jac->n_local;
267   if (first_local) *first_local = jac->first_local;
268   if (ksp) *ksp = jac->ksp;
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens)
273 {
274   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
278   jac->n = blocks;
279   if (!lens) jac->g_lens = NULL;
280   else {
281     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
282     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
288 {
289   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
290 
291   PetscFunctionBegin;
292   *blocks = jac->n;
293   if (lens) *lens = jac->g_lens;
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
298 {
299   PC_BJacobi *jac;
300 
301   PetscFunctionBegin;
302   jac = (PC_BJacobi *)pc->data;
303 
304   jac->n_local = blocks;
305   if (!lens) jac->l_lens = NULL;
306   else {
307     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
308     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
309   }
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
314 {
315   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
316 
317   PetscFunctionBegin;
318   *blocks = jac->n_local;
319   if (lens) *lens = jac->l_lens;
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@C
324   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
325   this processor.
326 
327   Not Collective
328 
329   Input Parameter:
330 . pc - the preconditioner context
331 
332   Output Parameters:
333 + n_local     - the number of blocks on this processor, or NULL
334 . first_local - the global number of the first block on this processor, or NULL
335 - ksp         - the array of KSP contexts
336 
337   Level: advanced
338 
339   Notes:
340   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
341 
342   Currently for some matrix implementations only 1 block per processor
343   is supported.
344 
345   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
346 
347 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
348 @*/
349 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
350 {
351   PetscFunctionBegin;
352   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
353   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*@
358   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
359   Jacobi preconditioner.
360 
361   Collective
362 
363   Input Parameters:
364 + pc     - the preconditioner context
365 . blocks - the number of blocks
366 - lens   - [optional] integer array containing the size of each block
367 
368   Options Database Key:
369 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
370 
371   Level: intermediate
372 
373   Note:
374   Currently only a limited number of blocking configurations are supported.
375   All processors sharing the `PC` must call this routine with the same data.
376 
377 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
378 @*/
379 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
380 {
381   PetscFunctionBegin;
382   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
383   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
384   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 /*@C
389   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
390   Jacobi, `PCBJACOBI`, preconditioner.
391 
392   Not Collective
393 
394   Input Parameter:
395 . pc - the preconditioner context
396 
397   Output Parameters:
398 + blocks - the number of blocks
399 - lens   - integer array containing the size of each block
400 
401   Level: intermediate
402 
403 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
404 @*/
405 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
406 {
407   PetscFunctionBegin;
408   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
409   PetscAssertPointer(blocks, 2);
410   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 
414 /*@
415   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
416   Jacobi, `PCBJACOBI`,  preconditioner.
417 
418   Not Collective
419 
420   Input Parameters:
421 + pc     - the preconditioner context
422 . blocks - the number of blocks
423 - lens   - [optional] integer array containing size of each block
424 
425   Options Database Key:
426 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
427 
428   Level: intermediate
429 
430   Note:
431   Currently only a limited number of blocking configurations are supported.
432 
433 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
434 @*/
435 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
436 {
437   PetscFunctionBegin;
438   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
439   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
440   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 /*@C
445   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
446   Jacobi, `PCBJACOBI`, preconditioner.
447 
448   Not Collective
449 
450   Input Parameters:
451 + pc     - the preconditioner context
452 . blocks - the number of blocks
453 - lens   - [optional] integer array containing size of each block
454 
455   Level: intermediate
456 
457   Note:
458   Currently only a limited number of blocking configurations are supported.
459 
460 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
461 @*/
462 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
463 {
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
466   PetscAssertPointer(blocks, 2);
467   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
468   PetscFunctionReturn(PETSC_SUCCESS);
469 }
470 
471 /*MC
472    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
473            its own `KSP` object.
474 
475    Options Database Keys:
476 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
477 -  -pc_bjacobi_blocks <n> - use n total blocks
478 
479    Level: beginner
480 
481    Notes:
482     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
483 
484     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
485 
486      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
487         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
488 
489      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
490          and set the options directly on the resulting `KSP` object (you can access its `PC`
491          `KSPGetPC()`)
492 
493      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
494          performance.  Different block partitioning may lead to additional data transfers
495          between host and GPU that lead to degraded performance.
496 
497      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
498 
499 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
500           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
501           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
502 M*/
503 
504 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
505 {
506   PetscMPIInt rank;
507   PC_BJacobi *jac;
508 
509   PetscFunctionBegin;
510   PetscCall(PetscNew(&jac));
511   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
512 
513   pc->ops->apply           = NULL;
514   pc->ops->matapply        = NULL;
515   pc->ops->applytranspose  = NULL;
516   pc->ops->setup           = PCSetUp_BJacobi;
517   pc->ops->destroy         = PCDestroy_BJacobi;
518   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
519   pc->ops->view            = PCView_BJacobi;
520   pc->ops->applyrichardson = NULL;
521 
522   pc->data         = (void *)jac;
523   jac->n           = -1;
524   jac->n_local     = -1;
525   jac->first_local = rank;
526   jac->ksp         = NULL;
527   jac->g_lens      = NULL;
528   jac->l_lens      = NULL;
529   jac->psubcomm    = NULL;
530 
531   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
532   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
533   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
534   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
535   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 /*
540         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
541 */
542 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
543 {
544   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
545   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
546 
547   PetscFunctionBegin;
548   PetscCall(KSPReset(jac->ksp[0]));
549   PetscCall(VecDestroy(&bjac->x));
550   PetscCall(VecDestroy(&bjac->y));
551   PetscFunctionReturn(PETSC_SUCCESS);
552 }
553 
554 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
555 {
556   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
557   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
558 
559   PetscFunctionBegin;
560   PetscCall(PCReset_BJacobi_Singleblock(pc));
561   PetscCall(KSPDestroy(&jac->ksp[0]));
562   PetscCall(PetscFree(jac->ksp));
563   PetscCall(PetscFree(bjac));
564   PetscCall(PCDestroy_BJacobi(pc));
565   PetscFunctionReturn(PETSC_SUCCESS);
566 }
567 
568 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
569 {
570   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
571   KSP                subksp = jac->ksp[0];
572   KSPConvergedReason reason;
573 
574   PetscFunctionBegin;
575   PetscCall(KSPSetUp(subksp));
576   PetscCall(KSPGetConvergedReason(subksp, &reason));
577   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
578   PetscFunctionReturn(PETSC_SUCCESS);
579 }
580 
581 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
582 {
583   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
584   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
585 
586   PetscFunctionBegin;
587   PetscCall(VecGetLocalVectorRead(x, bjac->x));
588   PetscCall(VecGetLocalVector(y, bjac->y));
589   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
590      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
591      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
592   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
593   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0));
594   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
595   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
596   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0));
597   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
598   PetscCall(VecRestoreLocalVector(y, bjac->y));
599   PetscFunctionReturn(PETSC_SUCCESS);
600 }
601 
602 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
603 {
604   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
605   Mat         sX, sY;
606 
607   PetscFunctionBegin;
608   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
609      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
610      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
611   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
612   PetscCall(MatDenseGetLocalMatrix(X, &sX));
613   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
614   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
619 {
620   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
621   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
622   PetscScalar            *y_array;
623   const PetscScalar      *x_array;
624   PC                      subpc;
625 
626   PetscFunctionBegin;
627   /*
628       The VecPlaceArray() is to avoid having to copy the
629     y vector into the bjac->x vector. The reason for
630     the bjac->x vector is that we need a sequential vector
631     for the sequential solve.
632   */
633   PetscCall(VecGetArrayRead(x, &x_array));
634   PetscCall(VecGetArray(y, &y_array));
635   PetscCall(VecPlaceArray(bjac->x, x_array));
636   PetscCall(VecPlaceArray(bjac->y, y_array));
637   /* apply the symmetric left portion of the inner PC operator */
638   /* note this bypasses the inner KSP and its options completely */
639   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
640   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
641   PetscCall(VecResetArray(bjac->x));
642   PetscCall(VecResetArray(bjac->y));
643   PetscCall(VecRestoreArrayRead(x, &x_array));
644   PetscCall(VecRestoreArray(y, &y_array));
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
649 {
650   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
651   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
652   PetscScalar            *y_array;
653   const PetscScalar      *x_array;
654   PC                      subpc;
655 
656   PetscFunctionBegin;
657   /*
658       The VecPlaceArray() is to avoid having to copy the
659     y vector into the bjac->x vector. The reason for
660     the bjac->x vector is that we need a sequential vector
661     for the sequential solve.
662   */
663   PetscCall(VecGetArrayRead(x, &x_array));
664   PetscCall(VecGetArray(y, &y_array));
665   PetscCall(VecPlaceArray(bjac->x, x_array));
666   PetscCall(VecPlaceArray(bjac->y, y_array));
667 
668   /* apply the symmetric right portion of the inner PC operator */
669   /* note this bypasses the inner KSP and its options completely */
670 
671   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
672   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
673 
674   PetscCall(VecResetArray(bjac->x));
675   PetscCall(VecResetArray(bjac->y));
676   PetscCall(VecRestoreArrayRead(x, &x_array));
677   PetscCall(VecRestoreArray(y, &y_array));
678   PetscFunctionReturn(PETSC_SUCCESS);
679 }
680 
681 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
682 {
683   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
684   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
685   PetscScalar            *y_array;
686   const PetscScalar      *x_array;
687 
688   PetscFunctionBegin;
689   /*
690       The VecPlaceArray() is to avoid having to copy the
691     y vector into the bjac->x vector. The reason for
692     the bjac->x vector is that we need a sequential vector
693     for the sequential solve.
694   */
695   PetscCall(VecGetArrayRead(x, &x_array));
696   PetscCall(VecGetArray(y, &y_array));
697   PetscCall(VecPlaceArray(bjac->x, x_array));
698   PetscCall(VecPlaceArray(bjac->y, y_array));
699   PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0));
700   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
701   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
702   PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0));
703   PetscCall(VecResetArray(bjac->x));
704   PetscCall(VecResetArray(bjac->y));
705   PetscCall(VecRestoreArrayRead(x, &x_array));
706   PetscCall(VecRestoreArray(y, &y_array));
707   PetscFunctionReturn(PETSC_SUCCESS);
708 }
709 
710 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
711 {
712   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
713   PetscInt                m;
714   KSP                     ksp;
715   PC_BJacobi_Singleblock *bjac;
716   PetscBool               wasSetup = PETSC_TRUE;
717   VecType                 vectype;
718   const char             *prefix;
719 
720   PetscFunctionBegin;
721   if (!pc->setupcalled) {
722     if (!jac->ksp) {
723       PetscInt nestlevel;
724 
725       wasSetup = PETSC_FALSE;
726 
727       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
728       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
729       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
730       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
731       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
732       PetscCall(KSPSetType(ksp, KSPPREONLY));
733       PetscCall(PCGetOptionsPrefix(pc, &prefix));
734       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
735       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
736 
737       pc->ops->reset               = PCReset_BJacobi_Singleblock;
738       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
739       pc->ops->apply               = PCApply_BJacobi_Singleblock;
740       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
741       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
742       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
743       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
744       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
745 
746       PetscCall(PetscMalloc1(1, &jac->ksp));
747       jac->ksp[0] = ksp;
748 
749       PetscCall(PetscNew(&bjac));
750       jac->data = (void *)bjac;
751     } else {
752       ksp  = jac->ksp[0];
753       bjac = (PC_BJacobi_Singleblock *)jac->data;
754     }
755 
756     /*
757       The reason we need to generate these vectors is to serve
758       as the right-hand side and solution vector for the solve on the
759       block. We do not need to allocate space for the vectors since
760       that is provided via VecPlaceArray() just before the call to
761       KSPSolve() on the block.
762     */
763     PetscCall(MatGetSize(pmat, &m, &m));
764     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
765     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
766     PetscCall(MatGetVecType(pmat, &vectype));
767     PetscCall(VecSetType(bjac->x, vectype));
768     PetscCall(VecSetType(bjac->y, vectype));
769   } else {
770     ksp  = jac->ksp[0];
771     bjac = (PC_BJacobi_Singleblock *)jac->data;
772   }
773   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
774   if (pc->useAmat) {
775     PetscCall(KSPSetOperators(ksp, mat, pmat));
776     PetscCall(MatSetOptionsPrefix(mat, prefix));
777   } else {
778     PetscCall(KSPSetOperators(ksp, pmat, pmat));
779   }
780   PetscCall(MatSetOptionsPrefix(pmat, prefix));
781   if (!wasSetup && pc->setfromoptionscalled) {
782     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
783     PetscCall(KSPSetFromOptions(ksp));
784   }
785   PetscFunctionReturn(PETSC_SUCCESS);
786 }
787 
788 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
789 {
790   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
791   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
792   PetscInt               i;
793 
794   PetscFunctionBegin;
795   if (bjac && bjac->pmat) {
796     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
797     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
798   }
799 
800   for (i = 0; i < jac->n_local; i++) {
801     PetscCall(KSPReset(jac->ksp[i]));
802     if (bjac && bjac->x) {
803       PetscCall(VecDestroy(&bjac->x[i]));
804       PetscCall(VecDestroy(&bjac->y[i]));
805       PetscCall(ISDestroy(&bjac->is[i]));
806     }
807   }
808   PetscCall(PetscFree(jac->l_lens));
809   PetscCall(PetscFree(jac->g_lens));
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
814 {
815   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
816   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
817   PetscInt               i;
818 
819   PetscFunctionBegin;
820   PetscCall(PCReset_BJacobi_Multiblock(pc));
821   if (bjac) {
822     PetscCall(PetscFree2(bjac->x, bjac->y));
823     PetscCall(PetscFree(bjac->starts));
824     PetscCall(PetscFree(bjac->is));
825   }
826   PetscCall(PetscFree(jac->data));
827   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
828   PetscCall(PetscFree(jac->ksp));
829   PetscCall(PCDestroy_BJacobi(pc));
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
834 {
835   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
836   PetscInt           i, n_local = jac->n_local;
837   KSPConvergedReason reason;
838 
839   PetscFunctionBegin;
840   for (i = 0; i < n_local; i++) {
841     PetscCall(KSPSetUp(jac->ksp[i]));
842     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
843     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
844   }
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
849 {
850   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
851   PetscInt               i, n_local = jac->n_local;
852   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
853   PetscScalar           *yin;
854   const PetscScalar     *xin;
855 
856   PetscFunctionBegin;
857   PetscCall(VecGetArrayRead(x, &xin));
858   PetscCall(VecGetArray(y, &yin));
859   for (i = 0; i < n_local; i++) {
860     /*
861        To avoid copying the subvector from x into a workspace we instead
862        make the workspace vector array point to the subpart of the array of
863        the global vector.
864     */
865     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
866     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
867 
868     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
869     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
870     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
871     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
872 
873     PetscCall(VecResetArray(bjac->x[i]));
874     PetscCall(VecResetArray(bjac->y[i]));
875   }
876   PetscCall(VecRestoreArrayRead(x, &xin));
877   PetscCall(VecRestoreArray(y, &yin));
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
882 {
883   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
884   PetscInt               i, n_local = jac->n_local;
885   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
886   PetscScalar           *yin;
887   const PetscScalar     *xin;
888   PC                     subpc;
889 
890   PetscFunctionBegin;
891   PetscCall(VecGetArrayRead(x, &xin));
892   PetscCall(VecGetArray(y, &yin));
893   for (i = 0; i < n_local; i++) {
894     /*
895        To avoid copying the subvector from x into a workspace we instead
896        make the workspace vector array point to the subpart of the array of
897        the global vector.
898     */
899     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
900     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
901 
902     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
903     /* apply the symmetric left portion of the inner PC operator */
904     /* note this bypasses the inner KSP and its options completely */
905     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
906     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
907     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
908 
909     PetscCall(VecResetArray(bjac->x[i]));
910     PetscCall(VecResetArray(bjac->y[i]));
911   }
912   PetscCall(VecRestoreArrayRead(x, &xin));
913   PetscCall(VecRestoreArray(y, &yin));
914   PetscFunctionReturn(PETSC_SUCCESS);
915 }
916 
917 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
918 {
919   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
920   PetscInt               i, n_local = jac->n_local;
921   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
922   PetscScalar           *yin;
923   const PetscScalar     *xin;
924   PC                     subpc;
925 
926   PetscFunctionBegin;
927   PetscCall(VecGetArrayRead(x, &xin));
928   PetscCall(VecGetArray(y, &yin));
929   for (i = 0; i < n_local; i++) {
930     /*
931        To avoid copying the subvector from x into a workspace we instead
932        make the workspace vector array point to the subpart of the array of
933        the global vector.
934     */
935     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
936     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
937 
938     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
939     /* apply the symmetric left portion of the inner PC operator */
940     /* note this bypasses the inner KSP and its options completely */
941     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
942     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
943     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
944 
945     PetscCall(VecResetArray(bjac->x[i]));
946     PetscCall(VecResetArray(bjac->y[i]));
947   }
948   PetscCall(VecRestoreArrayRead(x, &xin));
949   PetscCall(VecRestoreArray(y, &yin));
950   PetscFunctionReturn(PETSC_SUCCESS);
951 }
952 
953 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
954 {
955   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
956   PetscInt               i, n_local = jac->n_local;
957   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
958   PetscScalar           *yin;
959   const PetscScalar     *xin;
960 
961   PetscFunctionBegin;
962   PetscCall(VecGetArrayRead(x, &xin));
963   PetscCall(VecGetArray(y, &yin));
964   for (i = 0; i < n_local; i++) {
965     /*
966        To avoid copying the subvector from x into a workspace we instead
967        make the workspace vector array point to the subpart of the array of
968        the global vector.
969     */
970     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
971     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
972 
973     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
974     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
975     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
976     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
977 
978     PetscCall(VecResetArray(bjac->x[i]));
979     PetscCall(VecResetArray(bjac->y[i]));
980   }
981   PetscCall(VecRestoreArrayRead(x, &xin));
982   PetscCall(VecRestoreArray(y, &yin));
983   PetscFunctionReturn(PETSC_SUCCESS);
984 }
985 
986 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
987 {
988   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
989   PetscInt               m, n_local, N, M, start, i;
990   const char            *prefix;
991   KSP                    ksp;
992   Vec                    x, y;
993   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
994   PC                     subpc;
995   IS                     is;
996   MatReuse               scall;
997   VecType                vectype;
998   MatNullSpace          *nullsp_mat = NULL, *nullsp_pmat = NULL;
999 
1000   PetscFunctionBegin;
1001   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
1002 
1003   n_local = jac->n_local;
1004 
1005   if (pc->useAmat) {
1006     PetscBool same;
1007     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1008     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1009   }
1010 
1011   if (!pc->setupcalled) {
1012     PetscInt nestlevel;
1013 
1014     scall = MAT_INITIAL_MATRIX;
1015 
1016     if (!jac->ksp) {
1017       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1018       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1019       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1020       pc->ops->matapply            = NULL;
1021       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1022       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1023       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1024       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1025 
1026       PetscCall(PetscNew(&bjac));
1027       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1028       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1029       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1030 
1031       jac->data = (void *)bjac;
1032       PetscCall(PetscMalloc1(n_local, &bjac->is));
1033 
1034       for (i = 0; i < n_local; i++) {
1035         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1036         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1037         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1038         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1039         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1040         PetscCall(KSPSetType(ksp, KSPPREONLY));
1041         PetscCall(KSPGetPC(ksp, &subpc));
1042         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1043         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1044         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1045 
1046         jac->ksp[i] = ksp;
1047       }
1048     } else {
1049       bjac = (PC_BJacobi_Multiblock *)jac->data;
1050     }
1051 
1052     start = 0;
1053     PetscCall(MatGetVecType(pmat, &vectype));
1054     for (i = 0; i < n_local; i++) {
1055       m = jac->l_lens[i];
1056       /*
1057       The reason we need to generate these vectors is to serve
1058       as the right-hand side and solution vector for the solve on the
1059       block. We do not need to allocate space for the vectors since
1060       that is provided via VecPlaceArray() just before the call to
1061       KSPSolve() on the block.
1062 
1063       */
1064       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1065       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1066       PetscCall(VecSetType(x, vectype));
1067       PetscCall(VecSetType(y, vectype));
1068 
1069       bjac->x[i]      = x;
1070       bjac->y[i]      = y;
1071       bjac->starts[i] = start;
1072 
1073       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1074       bjac->is[i] = is;
1075 
1076       start += m;
1077     }
1078   } else {
1079     bjac = (PC_BJacobi_Multiblock *)jac->data;
1080     /*
1081        Destroy the blocks from the previous iteration
1082     */
1083     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1084       PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1085       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1086       if (pc->useAmat) {
1087         PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat));
1088         PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1089       }
1090       scall = MAT_INITIAL_MATRIX;
1091     } else scall = MAT_REUSE_MATRIX;
1092   }
1093 
1094   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1095   if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1096   if (pc->useAmat) {
1097     PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1098     if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat));
1099   }
1100   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1101      different boundary conditions for the submatrices than for the global problem) */
1102   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1103 
1104   for (i = 0; i < n_local; i++) {
1105     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1106     if (pc->useAmat) {
1107       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1108       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1109     } else {
1110       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1111     }
1112     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1113     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1114   }
1115   PetscFunctionReturn(PETSC_SUCCESS);
1116 }
1117 
1118 /*
1119       These are for a single block with multiple processes
1120 */
1121 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1122 {
1123   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1124   KSP                subksp = jac->ksp[0];
1125   KSPConvergedReason reason;
1126 
1127   PetscFunctionBegin;
1128   PetscCall(KSPSetUp(subksp));
1129   PetscCall(KSPGetConvergedReason(subksp, &reason));
1130   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1131   PetscFunctionReturn(PETSC_SUCCESS);
1132 }
1133 
1134 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1135 {
1136   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1137   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1138 
1139   PetscFunctionBegin;
1140   PetscCall(VecDestroy(&mpjac->ysub));
1141   PetscCall(VecDestroy(&mpjac->xsub));
1142   PetscCall(MatDestroy(&mpjac->submats));
1143   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1148 {
1149   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1150   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1151 
1152   PetscFunctionBegin;
1153   PetscCall(PCReset_BJacobi_Multiproc(pc));
1154   PetscCall(KSPDestroy(&jac->ksp[0]));
1155   PetscCall(PetscFree(jac->ksp));
1156   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1157 
1158   PetscCall(PetscFree(mpjac));
1159   PetscCall(PCDestroy_BJacobi(pc));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1164 {
1165   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1166   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1167   PetscScalar          *yarray;
1168   const PetscScalar    *xarray;
1169   KSPConvergedReason    reason;
1170 
1171   PetscFunctionBegin;
1172   /* place x's and y's local arrays into xsub and ysub */
1173   PetscCall(VecGetArrayRead(x, &xarray));
1174   PetscCall(VecGetArray(y, &yarray));
1175   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1176   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1177 
1178   /* apply preconditioner on each matrix block */
1179   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1180   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1181   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1182   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1183   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1184   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1185 
1186   PetscCall(VecResetArray(mpjac->xsub));
1187   PetscCall(VecResetArray(mpjac->ysub));
1188   PetscCall(VecRestoreArrayRead(x, &xarray));
1189   PetscCall(VecRestoreArray(y, &yarray));
1190   PetscFunctionReturn(PETSC_SUCCESS);
1191 }
1192 
1193 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1194 {
1195   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1196   KSPConvergedReason reason;
1197   Mat                sX, sY;
1198   const PetscScalar *x;
1199   PetscScalar       *y;
1200   PetscInt           m, N, lda, ldb;
1201 
1202   PetscFunctionBegin;
1203   /* apply preconditioner on each matrix block */
1204   PetscCall(MatGetLocalSize(X, &m, NULL));
1205   PetscCall(MatGetSize(X, NULL, &N));
1206   PetscCall(MatDenseGetLDA(X, &lda));
1207   PetscCall(MatDenseGetLDA(Y, &ldb));
1208   PetscCall(MatDenseGetArrayRead(X, &x));
1209   PetscCall(MatDenseGetArrayWrite(Y, &y));
1210   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1211   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1212   PetscCall(MatDenseSetLDA(sX, lda));
1213   PetscCall(MatDenseSetLDA(sY, ldb));
1214   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1215   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1216   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1217   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1218   PetscCall(MatDestroy(&sY));
1219   PetscCall(MatDestroy(&sX));
1220   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1221   PetscCall(MatDenseRestoreArrayRead(X, &x));
1222   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1223   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1224   PetscFunctionReturn(PETSC_SUCCESS);
1225 }
1226 
1227 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1228 {
1229   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1230   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1231   PetscInt              m, n;
1232   MPI_Comm              comm, subcomm = 0;
1233   const char           *prefix;
1234   PetscBool             wasSetup = PETSC_TRUE;
1235   VecType               vectype;
1236 
1237   PetscFunctionBegin;
1238   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1239   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1240   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1241   if (!pc->setupcalled) {
1242     PetscInt nestlevel;
1243 
1244     wasSetup = PETSC_FALSE;
1245     PetscCall(PetscNew(&mpjac));
1246     jac->data = (void *)mpjac;
1247 
1248     /* initialize datastructure mpjac */
1249     if (!jac->psubcomm) {
1250       /* Create default contiguous subcommunicatiors if user does not provide them */
1251       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1252       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1253       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1254     }
1255     mpjac->psubcomm = jac->psubcomm;
1256     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1257 
1258     /* Get matrix blocks of pmat */
1259     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1260 
1261     /* create a new PC that processors in each subcomm have copy of */
1262     PetscCall(PetscMalloc1(1, &jac->ksp));
1263     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1264     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1265     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1266     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1267     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1268     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1269     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1270 
1271     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1272     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1273     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1274     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1275     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1276 
1277     /* create dummy vectors xsub and ysub */
1278     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1279     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1280     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1281     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1282     PetscCall(VecSetType(mpjac->xsub, vectype));
1283     PetscCall(VecSetType(mpjac->ysub, vectype));
1284 
1285     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1286     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1287     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1288     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1289     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1290   } else { /* pc->setupcalled */
1291     subcomm = PetscSubcommChild(mpjac->psubcomm);
1292     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1293       /* destroy old matrix blocks, then get new matrix blocks */
1294       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1295       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1296     } else {
1297       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1298     }
1299     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1300   }
1301 
1302   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305