xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision a4e35b1925eceef64945ea472b84f2bf06a67b5e)
1 
2 /*
3    Defines a block Jacobi preconditioner.
4 */
5 
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
7 
8 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
10 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
11 
12 static PetscErrorCode PCSetUp_BJacobi(PC pc)
13 {
14   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
15   Mat         mat = pc->mat, pmat = pc->pmat;
16   PetscBool   hasop;
17   PetscInt    N, M, start, i, sum, end;
18   PetscInt    bs, i_start = -1, i_end = -1;
19   PetscMPIInt rank, size;
20 
21   PetscFunctionBegin;
22   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
23   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
24   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
25   PetscCall(MatGetBlockSize(pc->pmat, &bs));
26 
27   if (jac->n > 0 && jac->n < size) {
28     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
29     PetscFunctionReturn(PETSC_SUCCESS);
30   }
31 
32   /*    Determines the number of blocks assigned to each processor */
33   /*   local block count  given */
34   if (jac->n_local > 0 && jac->n < 0) {
35     PetscCall(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
36     if (jac->l_lens) { /* check that user set these correctly */
37       sum = 0;
38       for (i = 0; i < jac->n_local; i++) {
39         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
40         sum += jac->l_lens[i];
41       }
42       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
43     } else {
44       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
45       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
46     }
47   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
48     /* global blocks given: determine which ones are local */
49     if (jac->g_lens) {
50       /* check if the g_lens is has valid entries */
51       for (i = 0; i < jac->n; i++) {
52         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
53         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
54       }
55       if (size == 1) {
56         jac->n_local = jac->n;
57         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
58         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
59         /* check that user set these correctly */
60         sum = 0;
61         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
62         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
63       } else {
64         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
65         /* loop over blocks determining first one owned by me */
66         sum = 0;
67         for (i = 0; i < jac->n + 1; i++) {
68           if (sum == start) {
69             i_start = i;
70             goto start_1;
71           }
72           if (i < jac->n) sum += jac->g_lens[i];
73         }
74         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
75       start_1:
76         for (i = i_start; i < jac->n + 1; i++) {
77           if (sum == end) {
78             i_end = i;
79             goto end_1;
80           }
81           if (i < jac->n) sum += jac->g_lens[i];
82         }
83         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
84       end_1:
85         jac->n_local = i_end - i_start;
86         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
87         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
88       }
89     } else { /* no global blocks given, determine then using default layout */
90       jac->n_local = jac->n / size + ((jac->n % size) > rank);
91       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
92       for (i = 0; i < jac->n_local; i++) {
93         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
94         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
95       }
96     }
97   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
98     jac->n       = size;
99     jac->n_local = 1;
100     PetscCall(PetscMalloc1(1, &jac->l_lens));
101     jac->l_lens[0] = M;
102   } else { /* jac->n > 0 && jac->n_local > 0 */
103     if (!jac->l_lens) {
104       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
105       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
106     }
107   }
108   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
109 
110   /*    Determines mat and pmat */
111   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
112   if (!hasop && size == 1) {
113     mat  = pc->mat;
114     pmat = pc->pmat;
115   } else {
116     if (pc->useAmat) {
117       /* use block from Amat matrix, not Pmat for local MatMult() */
118       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
119     }
120     if (pc->pmat != pc->mat || !pc->useAmat) {
121       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
122     } else pmat = mat;
123   }
124 
125   /*
126      Setup code depends on the number of blocks
127   */
128   if (jac->n_local == 1) {
129     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
130   } else {
131     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
132   }
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /* Default destroy, if it has never been setup */
137 static PetscErrorCode PCDestroy_BJacobi(PC pc)
138 {
139   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
140 
141   PetscFunctionBegin;
142   PetscCall(PetscFree(jac->g_lens));
143   PetscCall(PetscFree(jac->l_lens));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
148   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
149   PetscCall(PetscFree(pc->data));
150   PetscFunctionReturn(PETSC_SUCCESS);
151 }
152 
153 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject)
154 {
155   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
156   PetscInt    blocks, i;
157   PetscBool   flg;
158 
159   PetscFunctionBegin;
160   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
161   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
162   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
163   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
164   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
165   if (jac->ksp) {
166     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
167      * unless we had already been called. */
168     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
169   }
170   PetscOptionsHeadEnd();
171   PetscFunctionReturn(PETSC_SUCCESS);
172 }
173 
174 #include <petscdraw.h>
175 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
176 {
177   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
178   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
179   PetscMPIInt           rank;
180   PetscInt              i;
181   PetscBool             iascii, isstring, isdraw;
182   PetscViewer           sviewer;
183   PetscViewerFormat     format;
184   const char           *prefix;
185 
186   PetscFunctionBegin;
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
189   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
190   if (iascii) {
191     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
193     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
194     PetscCall(PetscViewerGetFormat(viewer, &format));
195     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
196       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
197       PetscCall(PCGetOptionsPrefix(pc, &prefix));
198       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
199       if (jac->ksp && !jac->psubcomm) {
200         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
201         if (rank == 0) {
202           PetscCall(PetscViewerASCIIPushTab(viewer));
203           PetscCall(KSPView(jac->ksp[0], sviewer));
204           PetscCall(PetscViewerASCIIPopTab(viewer));
205         }
206         PetscCall(PetscViewerFlush(sviewer));
207         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
208         PetscCall(PetscViewerFlush(viewer));
209         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
210         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
211       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
212         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
213         if (!mpjac->psubcomm->color) {
214           PetscCall(PetscViewerASCIIPushTab(viewer));
215           PetscCall(KSPView(*(jac->ksp), sviewer));
216           PetscCall(PetscViewerASCIIPopTab(viewer));
217         }
218         PetscCall(PetscViewerFlush(sviewer));
219         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
220         PetscCall(PetscViewerFlush(viewer));
221         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
222         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
223       } else {
224         PetscCall(PetscViewerFlush(viewer));
225       }
226     } else {
227       PetscInt n_global;
228       PetscCall(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
229       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
230       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
231       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
232       PetscCall(PetscViewerASCIIPushTab(viewer));
233       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
234       for (i = 0; i < jac->n_local; i++) {
235         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
236         PetscCall(KSPView(jac->ksp[i], sviewer));
237         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "- - - - - - - - - - - - - - - - - -\n"));
238       }
239       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
240       PetscCall(PetscViewerASCIIPopTab(viewer));
241       PetscCall(PetscViewerFlush(viewer));
242       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
243     }
244   } else if (isstring) {
245     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
246     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
247     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
248     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
249   } else if (isdraw) {
250     PetscDraw draw;
251     char      str[25];
252     PetscReal x, y, bottom, h;
253 
254     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
255     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
256     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
257     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
258     bottom = y - h;
259     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
260     /* warning the communicator on viewer is different then on ksp in parallel */
261     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
262     PetscCall(PetscDrawPopCurrentPoint(draw));
263   }
264   PetscFunctionReturn(PETSC_SUCCESS);
265 }
266 
267 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
268 {
269   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
270 
271   PetscFunctionBegin;
272   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
273 
274   if (n_local) *n_local = jac->n_local;
275   if (first_local) *first_local = jac->first_local;
276   if (ksp) *ksp = jac->ksp;
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, PetscInt *lens)
281 {
282   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
283 
284   PetscFunctionBegin;
285   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
286   jac->n = blocks;
287   if (!lens) jac->g_lens = NULL;
288   else {
289     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
290     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
291   }
292   PetscFunctionReturn(PETSC_SUCCESS);
293 }
294 
295 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
296 {
297   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
298 
299   PetscFunctionBegin;
300   *blocks = jac->n;
301   if (lens) *lens = jac->g_lens;
302   PetscFunctionReturn(PETSC_SUCCESS);
303 }
304 
305 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
306 {
307   PC_BJacobi *jac;
308 
309   PetscFunctionBegin;
310   jac = (PC_BJacobi *)pc->data;
311 
312   jac->n_local = blocks;
313   if (!lens) jac->l_lens = NULL;
314   else {
315     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
316     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
317   }
318   PetscFunctionReturn(PETSC_SUCCESS);
319 }
320 
321 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
322 {
323   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
324 
325   PetscFunctionBegin;
326   *blocks = jac->n_local;
327   if (lens) *lens = jac->l_lens;
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 /*@C
332   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
333   this processor.
334 
335   Not Collective
336 
337   Input Parameter:
338 . pc - the preconditioner context
339 
340   Output Parameters:
341 + n_local     - the number of blocks on this processor, or NULL
342 . first_local - the global number of the first block on this processor, or NULL
343 - ksp         - the array of KSP contexts
344 
345   Notes:
346   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
347 
348   Currently for some matrix implementations only 1 block per processor
349   is supported.
350 
351   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
352 
353   Fortran Notes:
354   You must pass in a `KSP` array that is large enough to contain all the local `KSP`s.
355 
356   You can call `PCBJacobiGetSubKSP`(pc,nlocal,firstlocal,`PETSC_NULL_KSP`,ierr) to determine how large the
357   `KSP` array must be.
358 
359   Level: advanced
360 
361 .seealso: `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
362 @*/
363 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
364 {
365   PetscFunctionBegin;
366   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
367   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 /*@
372   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
373   Jacobi preconditioner.
374 
375   Collective
376 
377   Input Parameters:
378 + pc     - the preconditioner context
379 . blocks - the number of blocks
380 - lens   - [optional] integer array containing the size of each block
381 
382   Options Database Key:
383 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
384 
385   Note:
386   Currently only a limited number of blocking configurations are supported.
387   All processors sharing the `PC` must call this routine with the same data.
388 
389   Level: intermediate
390 
391 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
392 @*/
393 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
394 {
395   PetscFunctionBegin;
396   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
397   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
398   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 /*@C
403   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
404   Jacobi, `PCBJACOBI`, preconditioner.
405 
406   Not Collective
407 
408   Input Parameter:
409 . pc - the preconditioner context
410 
411   Output Parameters:
412 + blocks - the number of blocks
413 - lens   - integer array containing the size of each block
414 
415   Level: intermediate
416 
417 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
418 @*/
419 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
420 {
421   PetscFunctionBegin;
422   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
423   PetscAssertPointer(blocks, 2);
424   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
425   PetscFunctionReturn(PETSC_SUCCESS);
426 }
427 
428 /*@
429   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
430   Jacobi, `PCBJACOBI`,  preconditioner.
431 
432   Not Collective
433 
434   Input Parameters:
435 + pc     - the preconditioner context
436 . blocks - the number of blocks
437 - lens   - [optional] integer array containing size of each block
438 
439   Options Database Key:
440 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
441 
442   Note:
443   Currently only a limited number of blocking configurations are supported.
444 
445   Level: intermediate
446 
447 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
448 @*/
449 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
450 {
451   PetscFunctionBegin;
452   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
453   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
454   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 
458 /*@C
459   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
460   Jacobi, `PCBJACOBI`, preconditioner.
461 
462   Not Collective
463 
464   Input Parameters:
465 + pc     - the preconditioner context
466 . blocks - the number of blocks
467 - lens   - [optional] integer array containing size of each block
468 
469   Note:
470   Currently only a limited number of blocking configurations are supported.
471 
472   Level: intermediate
473 
474 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
475 @*/
476 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
477 {
478   PetscFunctionBegin;
479   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
480   PetscAssertPointer(blocks, 2);
481   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 /*MC
486    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
487            its own `KSP` object.
488 
489    Options Database Keys:
490 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
491 -  -pc_bjacobi_blocks <n> - use n total blocks
492 
493    Notes:
494     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
495 
496     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
497 
498      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
499         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
500 
501      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
502          and set the options directly on the resulting `KSP` object (you can access its `PC`
503          `KSPGetPC()`)
504 
505      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
506          performance.  Different block partitioning may lead to additional data transfers
507          between host and GPU that lead to degraded performance.
508 
509      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
510 
511    Level: beginner
512 
513 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
514           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
515           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
516 M*/
517 
518 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
519 {
520   PetscMPIInt rank;
521   PC_BJacobi *jac;
522 
523   PetscFunctionBegin;
524   PetscCall(PetscNew(&jac));
525   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
526 
527   pc->ops->apply           = NULL;
528   pc->ops->matapply        = NULL;
529   pc->ops->applytranspose  = NULL;
530   pc->ops->setup           = PCSetUp_BJacobi;
531   pc->ops->destroy         = PCDestroy_BJacobi;
532   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
533   pc->ops->view            = PCView_BJacobi;
534   pc->ops->applyrichardson = NULL;
535 
536   pc->data         = (void *)jac;
537   jac->n           = -1;
538   jac->n_local     = -1;
539   jac->first_local = rank;
540   jac->ksp         = NULL;
541   jac->g_lens      = NULL;
542   jac->l_lens      = NULL;
543   jac->psubcomm    = NULL;
544 
545   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
546   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
547   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
548   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
549   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
550   PetscFunctionReturn(PETSC_SUCCESS);
551 }
552 
553 /*
554         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
555 */
556 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
557 {
558   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
559   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
560 
561   PetscFunctionBegin;
562   PetscCall(KSPReset(jac->ksp[0]));
563   PetscCall(VecDestroy(&bjac->x));
564   PetscCall(VecDestroy(&bjac->y));
565   PetscFunctionReturn(PETSC_SUCCESS);
566 }
567 
568 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
569 {
570   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
571   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
572 
573   PetscFunctionBegin;
574   PetscCall(PCReset_BJacobi_Singleblock(pc));
575   PetscCall(KSPDestroy(&jac->ksp[0]));
576   PetscCall(PetscFree(jac->ksp));
577   PetscCall(PetscFree(bjac));
578   PetscCall(PCDestroy_BJacobi(pc));
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
582 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
583 {
584   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
585   KSP                subksp = jac->ksp[0];
586   KSPConvergedReason reason;
587 
588   PetscFunctionBegin;
589   PetscCall(KSPSetUp(subksp));
590   PetscCall(KSPGetConvergedReason(subksp, &reason));
591   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
596 {
597   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
598   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
599 
600   PetscFunctionBegin;
601   PetscCall(VecGetLocalVectorRead(x, bjac->x));
602   PetscCall(VecGetLocalVector(y, bjac->y));
603   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
604      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
605      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
606   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
607   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
608   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
609   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
610   PetscCall(VecRestoreLocalVector(y, bjac->y));
611   PetscFunctionReturn(PETSC_SUCCESS);
612 }
613 
614 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
615 {
616   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
617   Mat         sX, sY;
618 
619   PetscFunctionBegin;
620   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
621      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
622      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
623   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
624   PetscCall(MatDenseGetLocalMatrix(X, &sX));
625   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
626   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
627   PetscFunctionReturn(PETSC_SUCCESS);
628 }
629 
630 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
631 {
632   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
633   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
634   PetscScalar            *y_array;
635   const PetscScalar      *x_array;
636   PC                      subpc;
637 
638   PetscFunctionBegin;
639   /*
640       The VecPlaceArray() is to avoid having to copy the
641     y vector into the bjac->x vector. The reason for
642     the bjac->x vector is that we need a sequential vector
643     for the sequential solve.
644   */
645   PetscCall(VecGetArrayRead(x, &x_array));
646   PetscCall(VecGetArray(y, &y_array));
647   PetscCall(VecPlaceArray(bjac->x, x_array));
648   PetscCall(VecPlaceArray(bjac->y, y_array));
649   /* apply the symmetric left portion of the inner PC operator */
650   /* note this by-passes the inner KSP and its options completely */
651   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
652   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
653   PetscCall(VecResetArray(bjac->x));
654   PetscCall(VecResetArray(bjac->y));
655   PetscCall(VecRestoreArrayRead(x, &x_array));
656   PetscCall(VecRestoreArray(y, &y_array));
657   PetscFunctionReturn(PETSC_SUCCESS);
658 }
659 
660 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
661 {
662   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
663   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
664   PetscScalar            *y_array;
665   const PetscScalar      *x_array;
666   PC                      subpc;
667 
668   PetscFunctionBegin;
669   /*
670       The VecPlaceArray() is to avoid having to copy the
671     y vector into the bjac->x vector. The reason for
672     the bjac->x vector is that we need a sequential vector
673     for the sequential solve.
674   */
675   PetscCall(VecGetArrayRead(x, &x_array));
676   PetscCall(VecGetArray(y, &y_array));
677   PetscCall(VecPlaceArray(bjac->x, x_array));
678   PetscCall(VecPlaceArray(bjac->y, y_array));
679 
680   /* apply the symmetric right portion of the inner PC operator */
681   /* note this by-passes the inner KSP and its options completely */
682 
683   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
684   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
685 
686   PetscCall(VecResetArray(bjac->x));
687   PetscCall(VecResetArray(bjac->y));
688   PetscCall(VecRestoreArrayRead(x, &x_array));
689   PetscCall(VecRestoreArray(y, &y_array));
690   PetscFunctionReturn(PETSC_SUCCESS);
691 }
692 
693 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
694 {
695   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
696   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
697   PetscScalar            *y_array;
698   const PetscScalar      *x_array;
699 
700   PetscFunctionBegin;
701   /*
702       The VecPlaceArray() is to avoid having to copy the
703     y vector into the bjac->x vector. The reason for
704     the bjac->x vector is that we need a sequential vector
705     for the sequential solve.
706   */
707   PetscCall(VecGetArrayRead(x, &x_array));
708   PetscCall(VecGetArray(y, &y_array));
709   PetscCall(VecPlaceArray(bjac->x, x_array));
710   PetscCall(VecPlaceArray(bjac->y, y_array));
711   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
712   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
713   PetscCall(VecResetArray(bjac->x));
714   PetscCall(VecResetArray(bjac->y));
715   PetscCall(VecRestoreArrayRead(x, &x_array));
716   PetscCall(VecRestoreArray(y, &y_array));
717   PetscFunctionReturn(PETSC_SUCCESS);
718 }
719 
720 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
721 {
722   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
723   PetscInt                m;
724   KSP                     ksp;
725   PC_BJacobi_Singleblock *bjac;
726   PetscBool               wasSetup = PETSC_TRUE;
727   VecType                 vectype;
728   const char             *prefix;
729 
730   PetscFunctionBegin;
731   if (!pc->setupcalled) {
732     if (!jac->ksp) {
733       PetscInt nestlevel;
734 
735       wasSetup = PETSC_FALSE;
736 
737       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
738       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
739       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
740       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
741       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
742       PetscCall(KSPSetType(ksp, KSPPREONLY));
743       PetscCall(PCGetOptionsPrefix(pc, &prefix));
744       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
745       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
746 
747       pc->ops->reset               = PCReset_BJacobi_Singleblock;
748       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
749       pc->ops->apply               = PCApply_BJacobi_Singleblock;
750       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
751       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
752       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
753       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
754       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
755 
756       PetscCall(PetscMalloc1(1, &jac->ksp));
757       jac->ksp[0] = ksp;
758 
759       PetscCall(PetscNew(&bjac));
760       jac->data = (void *)bjac;
761     } else {
762       ksp  = jac->ksp[0];
763       bjac = (PC_BJacobi_Singleblock *)jac->data;
764     }
765 
766     /*
767       The reason we need to generate these vectors is to serve
768       as the right-hand side and solution vector for the solve on the
769       block. We do not need to allocate space for the vectors since
770       that is provided via VecPlaceArray() just before the call to
771       KSPSolve() on the block.
772     */
773     PetscCall(MatGetSize(pmat, &m, &m));
774     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
775     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
776     PetscCall(MatGetVecType(pmat, &vectype));
777     PetscCall(VecSetType(bjac->x, vectype));
778     PetscCall(VecSetType(bjac->y, vectype));
779   } else {
780     ksp  = jac->ksp[0];
781     bjac = (PC_BJacobi_Singleblock *)jac->data;
782   }
783   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
784   if (pc->useAmat) {
785     PetscCall(KSPSetOperators(ksp, mat, pmat));
786     PetscCall(MatSetOptionsPrefix(mat, prefix));
787   } else {
788     PetscCall(KSPSetOperators(ksp, pmat, pmat));
789   }
790   PetscCall(MatSetOptionsPrefix(pmat, prefix));
791   if (!wasSetup && pc->setfromoptionscalled) {
792     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
793     PetscCall(KSPSetFromOptions(ksp));
794   }
795   PetscFunctionReturn(PETSC_SUCCESS);
796 }
797 
798 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
799 {
800   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
801   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
802   PetscInt               i;
803 
804   PetscFunctionBegin;
805   if (bjac && bjac->pmat) {
806     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
807     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
808   }
809 
810   for (i = 0; i < jac->n_local; i++) {
811     PetscCall(KSPReset(jac->ksp[i]));
812     if (bjac && bjac->x) {
813       PetscCall(VecDestroy(&bjac->x[i]));
814       PetscCall(VecDestroy(&bjac->y[i]));
815       PetscCall(ISDestroy(&bjac->is[i]));
816     }
817   }
818   PetscCall(PetscFree(jac->l_lens));
819   PetscCall(PetscFree(jac->g_lens));
820   PetscFunctionReturn(PETSC_SUCCESS);
821 }
822 
823 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
824 {
825   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
826   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
827   PetscInt               i;
828 
829   PetscFunctionBegin;
830   PetscCall(PCReset_BJacobi_Multiblock(pc));
831   if (bjac) {
832     PetscCall(PetscFree2(bjac->x, bjac->y));
833     PetscCall(PetscFree(bjac->starts));
834     PetscCall(PetscFree(bjac->is));
835   }
836   PetscCall(PetscFree(jac->data));
837   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
838   PetscCall(PetscFree(jac->ksp));
839   PetscCall(PCDestroy_BJacobi(pc));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
844 {
845   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
846   PetscInt           i, n_local = jac->n_local;
847   KSPConvergedReason reason;
848 
849   PetscFunctionBegin;
850   for (i = 0; i < n_local; i++) {
851     PetscCall(KSPSetUp(jac->ksp[i]));
852     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
853     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
854   }
855   PetscFunctionReturn(PETSC_SUCCESS);
856 }
857 
858 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
859 {
860   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
861   PetscInt               i, n_local = jac->n_local;
862   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
863   PetscScalar           *yin;
864   const PetscScalar     *xin;
865 
866   PetscFunctionBegin;
867   PetscCall(VecGetArrayRead(x, &xin));
868   PetscCall(VecGetArray(y, &yin));
869   for (i = 0; i < n_local; i++) {
870     /*
871        To avoid copying the subvector from x into a workspace we instead
872        make the workspace vector array point to the subpart of the array of
873        the global vector.
874     */
875     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
876     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
877 
878     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
879     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
880     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
881     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
882 
883     PetscCall(VecResetArray(bjac->x[i]));
884     PetscCall(VecResetArray(bjac->y[i]));
885   }
886   PetscCall(VecRestoreArrayRead(x, &xin));
887   PetscCall(VecRestoreArray(y, &yin));
888   PetscFunctionReturn(PETSC_SUCCESS);
889 }
890 
891 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
892 {
893   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
894   PetscInt               i, n_local = jac->n_local;
895   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
896   PetscScalar           *yin;
897   const PetscScalar     *xin;
898   PC                     subpc;
899 
900   PetscFunctionBegin;
901   PetscCall(VecGetArrayRead(x, &xin));
902   PetscCall(VecGetArray(y, &yin));
903   for (i = 0; i < n_local; i++) {
904     /*
905        To avoid copying the subvector from x into a workspace we instead
906        make the workspace vector array point to the subpart of the array of
907        the global vector.
908     */
909     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
910     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
911 
912     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
913     /* apply the symmetric left portion of the inner PC operator */
914     /* note this by-passes the inner KSP and its options completely */
915     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
916     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
917     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
918 
919     PetscCall(VecResetArray(bjac->x[i]));
920     PetscCall(VecResetArray(bjac->y[i]));
921   }
922   PetscCall(VecRestoreArrayRead(x, &xin));
923   PetscCall(VecRestoreArray(y, &yin));
924   PetscFunctionReturn(PETSC_SUCCESS);
925 }
926 
927 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
928 {
929   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
930   PetscInt               i, n_local = jac->n_local;
931   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
932   PetscScalar           *yin;
933   const PetscScalar     *xin;
934   PC                     subpc;
935 
936   PetscFunctionBegin;
937   PetscCall(VecGetArrayRead(x, &xin));
938   PetscCall(VecGetArray(y, &yin));
939   for (i = 0; i < n_local; i++) {
940     /*
941        To avoid copying the subvector from x into a workspace we instead
942        make the workspace vector array point to the subpart of the array of
943        the global vector.
944     */
945     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
946     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
947 
948     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
949     /* apply the symmetric left portion of the inner PC operator */
950     /* note this by-passes the inner KSP and its options completely */
951     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
952     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
953     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
954 
955     PetscCall(VecResetArray(bjac->x[i]));
956     PetscCall(VecResetArray(bjac->y[i]));
957   }
958   PetscCall(VecRestoreArrayRead(x, &xin));
959   PetscCall(VecRestoreArray(y, &yin));
960   PetscFunctionReturn(PETSC_SUCCESS);
961 }
962 
963 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
964 {
965   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
966   PetscInt               i, n_local = jac->n_local;
967   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
968   PetscScalar           *yin;
969   const PetscScalar     *xin;
970 
971   PetscFunctionBegin;
972   PetscCall(VecGetArrayRead(x, &xin));
973   PetscCall(VecGetArray(y, &yin));
974   for (i = 0; i < n_local; i++) {
975     /*
976        To avoid copying the subvector from x into a workspace we instead
977        make the workspace vector array point to the subpart of the array of
978        the global vector.
979     */
980     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
981     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
982 
983     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
984     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
985     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
986     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
987 
988     PetscCall(VecResetArray(bjac->x[i]));
989     PetscCall(VecResetArray(bjac->y[i]));
990   }
991   PetscCall(VecRestoreArrayRead(x, &xin));
992   PetscCall(VecRestoreArray(y, &yin));
993   PetscFunctionReturn(PETSC_SUCCESS);
994 }
995 
996 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
997 {
998   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
999   PetscInt               m, n_local, N, M, start, i;
1000   const char            *prefix;
1001   KSP                    ksp;
1002   Vec                    x, y;
1003   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
1004   PC                     subpc;
1005   IS                     is;
1006   MatReuse               scall;
1007   VecType                vectype;
1008 
1009   PetscFunctionBegin;
1010   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
1011 
1012   n_local = jac->n_local;
1013 
1014   if (pc->useAmat) {
1015     PetscBool same;
1016     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1017     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1018   }
1019 
1020   if (!pc->setupcalled) {
1021     PetscInt nestlevel;
1022 
1023     scall = MAT_INITIAL_MATRIX;
1024 
1025     if (!jac->ksp) {
1026       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1027       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1028       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1029       pc->ops->matapply            = NULL;
1030       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1031       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1032       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1033       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1034 
1035       PetscCall(PetscNew(&bjac));
1036       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1037       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1038       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1039 
1040       jac->data = (void *)bjac;
1041       PetscCall(PetscMalloc1(n_local, &bjac->is));
1042 
1043       for (i = 0; i < n_local; i++) {
1044         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1045         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1046         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1047         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1048         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1049         PetscCall(KSPSetType(ksp, KSPPREONLY));
1050         PetscCall(KSPGetPC(ksp, &subpc));
1051         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1052         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1053         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1054 
1055         jac->ksp[i] = ksp;
1056       }
1057     } else {
1058       bjac = (PC_BJacobi_Multiblock *)jac->data;
1059     }
1060 
1061     start = 0;
1062     PetscCall(MatGetVecType(pmat, &vectype));
1063     for (i = 0; i < n_local; i++) {
1064       m = jac->l_lens[i];
1065       /*
1066       The reason we need to generate these vectors is to serve
1067       as the right-hand side and solution vector for the solve on the
1068       block. We do not need to allocate space for the vectors since
1069       that is provided via VecPlaceArray() just before the call to
1070       KSPSolve() on the block.
1071 
1072       */
1073       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1074       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1075       PetscCall(VecSetType(x, vectype));
1076       PetscCall(VecSetType(y, vectype));
1077 
1078       bjac->x[i]      = x;
1079       bjac->y[i]      = y;
1080       bjac->starts[i] = start;
1081 
1082       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1083       bjac->is[i] = is;
1084 
1085       start += m;
1086     }
1087   } else {
1088     bjac = (PC_BJacobi_Multiblock *)jac->data;
1089     /*
1090        Destroy the blocks from the previous iteration
1091     */
1092     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1093       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1094       if (pc->useAmat) PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1095       scall = MAT_INITIAL_MATRIX;
1096     } else scall = MAT_REUSE_MATRIX;
1097   }
1098 
1099   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1100   if (pc->useAmat) PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1101   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1102      different boundary conditions for the submatrices than for the global problem) */
1103   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1104 
1105   for (i = 0; i < n_local; i++) {
1106     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1107     if (pc->useAmat) {
1108       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1109       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1110     } else {
1111       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1112     }
1113     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1114     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1115   }
1116   PetscFunctionReturn(PETSC_SUCCESS);
1117 }
1118 
1119 /*
1120       These are for a single block with multiple processes
1121 */
1122 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1123 {
1124   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1125   KSP                subksp = jac->ksp[0];
1126   KSPConvergedReason reason;
1127 
1128   PetscFunctionBegin;
1129   PetscCall(KSPSetUp(subksp));
1130   PetscCall(KSPGetConvergedReason(subksp, &reason));
1131   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1136 {
1137   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1138   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1139 
1140   PetscFunctionBegin;
1141   PetscCall(VecDestroy(&mpjac->ysub));
1142   PetscCall(VecDestroy(&mpjac->xsub));
1143   PetscCall(MatDestroy(&mpjac->submats));
1144   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1145   PetscFunctionReturn(PETSC_SUCCESS);
1146 }
1147 
1148 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1149 {
1150   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1151   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1152 
1153   PetscFunctionBegin;
1154   PetscCall(PCReset_BJacobi_Multiproc(pc));
1155   PetscCall(KSPDestroy(&jac->ksp[0]));
1156   PetscCall(PetscFree(jac->ksp));
1157   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1158 
1159   PetscCall(PetscFree(mpjac));
1160   PetscCall(PCDestroy_BJacobi(pc));
1161   PetscFunctionReturn(PETSC_SUCCESS);
1162 }
1163 
1164 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1165 {
1166   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1167   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1168   PetscScalar          *yarray;
1169   const PetscScalar    *xarray;
1170   KSPConvergedReason    reason;
1171 
1172   PetscFunctionBegin;
1173   /* place x's and y's local arrays into xsub and ysub */
1174   PetscCall(VecGetArrayRead(x, &xarray));
1175   PetscCall(VecGetArray(y, &yarray));
1176   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1177   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1178 
1179   /* apply preconditioner on each matrix block */
1180   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1181   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1182   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1183   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1184   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1185   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1186 
1187   PetscCall(VecResetArray(mpjac->xsub));
1188   PetscCall(VecResetArray(mpjac->ysub));
1189   PetscCall(VecRestoreArrayRead(x, &xarray));
1190   PetscCall(VecRestoreArray(y, &yarray));
1191   PetscFunctionReturn(PETSC_SUCCESS);
1192 }
1193 
1194 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1195 {
1196   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1197   KSPConvergedReason reason;
1198   Mat                sX, sY;
1199   const PetscScalar *x;
1200   PetscScalar       *y;
1201   PetscInt           m, N, lda, ldb;
1202 
1203   PetscFunctionBegin;
1204   /* apply preconditioner on each matrix block */
1205   PetscCall(MatGetLocalSize(X, &m, NULL));
1206   PetscCall(MatGetSize(X, NULL, &N));
1207   PetscCall(MatDenseGetLDA(X, &lda));
1208   PetscCall(MatDenseGetLDA(Y, &ldb));
1209   PetscCall(MatDenseGetArrayRead(X, &x));
1210   PetscCall(MatDenseGetArrayWrite(Y, &y));
1211   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1212   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1213   PetscCall(MatDenseSetLDA(sX, lda));
1214   PetscCall(MatDenseSetLDA(sY, ldb));
1215   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1216   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1217   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1218   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1219   PetscCall(MatDestroy(&sY));
1220   PetscCall(MatDestroy(&sX));
1221   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1222   PetscCall(MatDenseRestoreArrayRead(X, &x));
1223   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1224   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1225   PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227 
1228 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1229 {
1230   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1231   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1232   PetscInt              m, n;
1233   MPI_Comm              comm, subcomm = 0;
1234   const char           *prefix;
1235   PetscBool             wasSetup = PETSC_TRUE;
1236   VecType               vectype;
1237 
1238   PetscFunctionBegin;
1239   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1240   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1241   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1242   if (!pc->setupcalled) {
1243     PetscInt nestlevel;
1244 
1245     wasSetup = PETSC_FALSE;
1246     PetscCall(PetscNew(&mpjac));
1247     jac->data = (void *)mpjac;
1248 
1249     /* initialize datastructure mpjac */
1250     if (!jac->psubcomm) {
1251       /* Create default contiguous subcommunicatiors if user does not provide them */
1252       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1253       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1254       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1255     }
1256     mpjac->psubcomm = jac->psubcomm;
1257     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1258 
1259     /* Get matrix blocks of pmat */
1260     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1261 
1262     /* create a new PC that processors in each subcomm have copy of */
1263     PetscCall(PetscMalloc1(1, &jac->ksp));
1264     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1265     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1266     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1267     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1268     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1269     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1270     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1271 
1272     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1273     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1274     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1275     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1276     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1277 
1278     /* create dummy vectors xsub and ysub */
1279     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1280     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1281     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1282     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1283     PetscCall(VecSetType(mpjac->xsub, vectype));
1284     PetscCall(VecSetType(mpjac->ysub, vectype));
1285 
1286     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1287     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1288     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1289     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1290     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1291   } else { /* pc->setupcalled */
1292     subcomm = PetscSubcommChild(mpjac->psubcomm);
1293     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1294       /* destroy old matrix blocks, then get new matrix blocks */
1295       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1296       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1297     } else {
1298       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1299     }
1300     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1301   }
1302 
1303   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1304   PetscFunctionReturn(PETSC_SUCCESS);
1305 }
1306