xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision 7f296bb328fcd4c99f2da7bfe8ba7ed8a4ebceee)
1 /*
2    Defines a block Jacobi preconditioner.
3 */
4 
5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
6 
7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
10 
11 static PetscErrorCode PCSetUp_BJacobi(PC pc)
12 {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(PETSC_SUCCESS);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCallMPI(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determining first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
148   PetscCall(PetscFree(pc->data));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems PetscOptionsObject)
153 {
154   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
155   PetscInt    blocks, i;
156   PetscBool   flg;
157 
158   PetscFunctionBegin;
159   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
160   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
162   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
163   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
164   if (jac->ksp) {
165     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
166      * unless we had already been called. */
167     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
168   }
169   PetscOptionsHeadEnd();
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 #include <petscdraw.h>
174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
175 {
176   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
177   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
178   PetscMPIInt           rank;
179   PetscInt              i;
180   PetscBool             iascii, isstring, isdraw;
181   PetscViewer           sviewer;
182   PetscViewerFormat     format;
183   const char           *prefix;
184 
185   PetscFunctionBegin;
186   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
189   if (iascii) {
190     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
191     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
193     PetscCall(PetscViewerGetFormat(viewer, &format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       PetscCall(PCGetOptionsPrefix(pc, &prefix));
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
198       if (jac->ksp && !jac->psubcomm) {
199         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
200         if (rank == 0) {
201           PetscCall(PetscViewerASCIIPushTab(sviewer));
202           PetscCall(KSPView(jac->ksp[0], sviewer));
203           PetscCall(PetscViewerASCIIPopTab(sviewer));
204         }
205         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
206         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
207         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
208       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
209         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
210         if (!mpjac->psubcomm->color) {
211           PetscCall(PetscViewerASCIIPushTab(sviewer));
212           PetscCall(KSPView(*jac->ksp, sviewer));
213           PetscCall(PetscViewerASCIIPopTab(sviewer));
214         }
215         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
216         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
217         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
218       }
219     } else {
220       PetscInt n_global;
221       PetscCallMPI(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
222       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
223       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
224       PetscCall(PetscViewerASCIIPushTab(viewer));
225       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
226       PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
227       for (i = 0; i < jac->n_local; i++) {
228         PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
229         PetscCall(KSPView(jac->ksp[i], sviewer));
230         PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n"));
231       }
232       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
233       PetscCall(PetscViewerASCIIPopTab(viewer));
234       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
235     }
236   } else if (isstring) {
237     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
238     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
239     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
240     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
241   } else if (isdraw) {
242     PetscDraw draw;
243     char      str[25];
244     PetscReal x, y, bottom, h;
245 
246     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
247     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
248     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
249     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
250     bottom = y - h;
251     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
252     /* warning the communicator on viewer is different then on ksp in parallel */
253     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
254     PetscCall(PetscDrawPopCurrentPoint(draw));
255   }
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
260 {
261   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
262 
263   PetscFunctionBegin;
264   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
265 
266   if (n_local) *n_local = jac->n_local;
267   if (first_local) *first_local = jac->first_local;
268   if (ksp) *ksp = jac->ksp;
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens)
273 {
274   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
278   jac->n = blocks;
279   if (!lens) jac->g_lens = NULL;
280   else {
281     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
282     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
288 {
289   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
290 
291   PetscFunctionBegin;
292   *blocks = jac->n;
293   if (lens) *lens = jac->g_lens;
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
298 {
299   PC_BJacobi *jac;
300 
301   PetscFunctionBegin;
302   jac = (PC_BJacobi *)pc->data;
303 
304   jac->n_local = blocks;
305   if (!lens) jac->l_lens = NULL;
306   else {
307     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
308     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
309   }
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
314 {
315   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
316 
317   PetscFunctionBegin;
318   *blocks = jac->n_local;
319   if (lens) *lens = jac->l_lens;
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@C
324   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
325   this processor.
326 
327   Not Collective
328 
329   Input Parameter:
330 . pc - the preconditioner context
331 
332   Output Parameters:
333 + n_local     - the number of blocks on this processor, or NULL
334 . first_local - the global number of the first block on this processor, or NULL
335 - ksp         - the array of KSP contexts
336 
337   Level: advanced
338 
339   Notes:
340   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
341 
342   Currently for some matrix implementations only 1 block per processor
343   is supported.
344 
345   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
346 
347 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
348 @*/
349 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
350 {
351   PetscFunctionBegin;
352   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
353   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*@
358   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
359   Jacobi preconditioner.
360 
361   Collective
362 
363   Input Parameters:
364 + pc     - the preconditioner context
365 . blocks - the number of blocks
366 - lens   - [optional] integer array containing the size of each block
367 
368   Options Database Key:
369 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
370 
371   Level: intermediate
372 
373   Note:
374   Currently only a limited number of blocking configurations are supported.
375   All processors sharing the `PC` must call this routine with the same data.
376 
377 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
378 @*/
379 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
380 {
381   PetscFunctionBegin;
382   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
383   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
384   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 /*@C
389   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
390   Jacobi, `PCBJACOBI`, preconditioner.
391 
392   Not Collective
393 
394   Input Parameter:
395 . pc - the preconditioner context
396 
397   Output Parameters:
398 + blocks - the number of blocks
399 - lens   - integer array containing the size of each block
400 
401   Level: intermediate
402 
403 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
404 @*/
405 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
406 {
407   PetscFunctionBegin;
408   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
409   PetscAssertPointer(blocks, 2);
410   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 
414 /*@
415   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
416   Jacobi, `PCBJACOBI`,  preconditioner.
417 
418   Not Collective
419 
420   Input Parameters:
421 + pc     - the preconditioner context
422 . blocks - the number of blocks
423 - lens   - [optional] integer array containing size of each block
424 
425   Options Database Key:
426 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
427 
428   Level: intermediate
429 
430   Note:
431   Currently only a limited number of blocking configurations are supported.
432 
433 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
434 @*/
435 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
436 {
437   PetscFunctionBegin;
438   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
439   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
440   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 /*@C
445   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
446   Jacobi, `PCBJACOBI`, preconditioner.
447 
448   Not Collective
449 
450   Input Parameters:
451 + pc     - the preconditioner context
452 . blocks - the number of blocks
453 - lens   - [optional] integer array containing size of each block
454 
455   Level: intermediate
456 
457   Note:
458   Currently only a limited number of blocking configurations are supported.
459 
460 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
461 @*/
462 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
463 {
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
466   PetscAssertPointer(blocks, 2);
467   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
468   PetscFunctionReturn(PETSC_SUCCESS);
469 }
470 
471 /*MC
472    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
473            its own `KSP` object.
474 
475    Options Database Keys:
476 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
477 -  -pc_bjacobi_blocks <n> - use n total blocks
478 
479    Level: beginner
480 
481    Notes:
482     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
483 
484     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
485 
486      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
487         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
488 
489      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
490          and set the options directly on the resulting `KSP` object (you can access its `PC`
491          `KSPGetPC()`)
492 
493      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
494          performance.  Different block partitioning may lead to additional data transfers
495          between host and GPU that lead to degraded performance.
496 
497      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
498 
499 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
500           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
501           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
502 M*/
503 
504 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
505 {
506   PetscMPIInt rank;
507   PC_BJacobi *jac;
508 
509   PetscFunctionBegin;
510   PetscCall(PetscNew(&jac));
511   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
512 
513   pc->ops->apply           = NULL;
514   pc->ops->matapply        = NULL;
515   pc->ops->applytranspose  = NULL;
516   pc->ops->setup           = PCSetUp_BJacobi;
517   pc->ops->destroy         = PCDestroy_BJacobi;
518   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
519   pc->ops->view            = PCView_BJacobi;
520   pc->ops->applyrichardson = NULL;
521 
522   pc->data         = (void *)jac;
523   jac->n           = -1;
524   jac->n_local     = -1;
525   jac->first_local = rank;
526   jac->ksp         = NULL;
527   jac->g_lens      = NULL;
528   jac->l_lens      = NULL;
529   jac->psubcomm    = NULL;
530 
531   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
532   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
533   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
534   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
535   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 /*
540         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
541 */
542 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
543 {
544   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
545   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
546 
547   PetscFunctionBegin;
548   PetscCall(KSPReset(jac->ksp[0]));
549   PetscCall(VecDestroy(&bjac->x));
550   PetscCall(VecDestroy(&bjac->y));
551   PetscFunctionReturn(PETSC_SUCCESS);
552 }
553 
554 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
555 {
556   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
557   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
558 
559   PetscFunctionBegin;
560   PetscCall(PCReset_BJacobi_Singleblock(pc));
561   PetscCall(KSPDestroy(&jac->ksp[0]));
562   PetscCall(PetscFree(jac->ksp));
563   PetscCall(PetscFree(bjac));
564   PetscCall(PCDestroy_BJacobi(pc));
565   PetscFunctionReturn(PETSC_SUCCESS);
566 }
567 
568 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
569 {
570   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
571   KSP                subksp = jac->ksp[0];
572   KSPConvergedReason reason;
573 
574   PetscFunctionBegin;
575   PetscCall(KSPSetUp(subksp));
576   PetscCall(KSPGetConvergedReason(subksp, &reason));
577   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
578   PetscFunctionReturn(PETSC_SUCCESS);
579 }
580 
581 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
582 {
583   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
584   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
585 
586   PetscFunctionBegin;
587   PetscCall(VecGetLocalVectorRead(x, bjac->x));
588   PetscCall(VecGetLocalVector(y, bjac->y));
589   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
590      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
591      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
592   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
593   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
594   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
595   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
596   PetscCall(VecRestoreLocalVector(y, bjac->y));
597   PetscFunctionReturn(PETSC_SUCCESS);
598 }
599 
600 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
601 {
602   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
603   Mat         sX, sY;
604 
605   PetscFunctionBegin;
606   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
607      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
608      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
609   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
610   PetscCall(MatDenseGetLocalMatrix(X, &sX));
611   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
612   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
617 {
618   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
619   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
620   PetscScalar            *y_array;
621   const PetscScalar      *x_array;
622   PC                      subpc;
623 
624   PetscFunctionBegin;
625   /*
626       The VecPlaceArray() is to avoid having to copy the
627     y vector into the bjac->x vector. The reason for
628     the bjac->x vector is that we need a sequential vector
629     for the sequential solve.
630   */
631   PetscCall(VecGetArrayRead(x, &x_array));
632   PetscCall(VecGetArray(y, &y_array));
633   PetscCall(VecPlaceArray(bjac->x, x_array));
634   PetscCall(VecPlaceArray(bjac->y, y_array));
635   /* apply the symmetric left portion of the inner PC operator */
636   /* note this bypasses the inner KSP and its options completely */
637   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
638   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
639   PetscCall(VecResetArray(bjac->x));
640   PetscCall(VecResetArray(bjac->y));
641   PetscCall(VecRestoreArrayRead(x, &x_array));
642   PetscCall(VecRestoreArray(y, &y_array));
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
647 {
648   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
649   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
650   PetscScalar            *y_array;
651   const PetscScalar      *x_array;
652   PC                      subpc;
653 
654   PetscFunctionBegin;
655   /*
656       The VecPlaceArray() is to avoid having to copy the
657     y vector into the bjac->x vector. The reason for
658     the bjac->x vector is that we need a sequential vector
659     for the sequential solve.
660   */
661   PetscCall(VecGetArrayRead(x, &x_array));
662   PetscCall(VecGetArray(y, &y_array));
663   PetscCall(VecPlaceArray(bjac->x, x_array));
664   PetscCall(VecPlaceArray(bjac->y, y_array));
665 
666   /* apply the symmetric right portion of the inner PC operator */
667   /* note this bypasses the inner KSP and its options completely */
668 
669   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
670   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
671 
672   PetscCall(VecResetArray(bjac->x));
673   PetscCall(VecResetArray(bjac->y));
674   PetscCall(VecRestoreArrayRead(x, &x_array));
675   PetscCall(VecRestoreArray(y, &y_array));
676   PetscFunctionReturn(PETSC_SUCCESS);
677 }
678 
679 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
680 {
681   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
682   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
683   PetscScalar            *y_array;
684   const PetscScalar      *x_array;
685 
686   PetscFunctionBegin;
687   /*
688       The VecPlaceArray() is to avoid having to copy the
689     y vector into the bjac->x vector. The reason for
690     the bjac->x vector is that we need a sequential vector
691     for the sequential solve.
692   */
693   PetscCall(VecGetArrayRead(x, &x_array));
694   PetscCall(VecGetArray(y, &y_array));
695   PetscCall(VecPlaceArray(bjac->x, x_array));
696   PetscCall(VecPlaceArray(bjac->y, y_array));
697   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
698   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
699   PetscCall(VecResetArray(bjac->x));
700   PetscCall(VecResetArray(bjac->y));
701   PetscCall(VecRestoreArrayRead(x, &x_array));
702   PetscCall(VecRestoreArray(y, &y_array));
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
707 {
708   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
709   PetscInt                m;
710   KSP                     ksp;
711   PC_BJacobi_Singleblock *bjac;
712   PetscBool               wasSetup = PETSC_TRUE;
713   VecType                 vectype;
714   const char             *prefix;
715 
716   PetscFunctionBegin;
717   if (!pc->setupcalled) {
718     if (!jac->ksp) {
719       PetscInt nestlevel;
720 
721       wasSetup = PETSC_FALSE;
722 
723       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
724       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
725       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
726       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
727       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
728       PetscCall(KSPSetType(ksp, KSPPREONLY));
729       PetscCall(PCGetOptionsPrefix(pc, &prefix));
730       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
731       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
732 
733       pc->ops->reset               = PCReset_BJacobi_Singleblock;
734       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
735       pc->ops->apply               = PCApply_BJacobi_Singleblock;
736       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
737       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
738       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
739       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
740       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
741 
742       PetscCall(PetscMalloc1(1, &jac->ksp));
743       jac->ksp[0] = ksp;
744 
745       PetscCall(PetscNew(&bjac));
746       jac->data = (void *)bjac;
747     } else {
748       ksp  = jac->ksp[0];
749       bjac = (PC_BJacobi_Singleblock *)jac->data;
750     }
751 
752     /*
753       The reason we need to generate these vectors is to serve
754       as the right-hand side and solution vector for the solve on the
755       block. We do not need to allocate space for the vectors since
756       that is provided via VecPlaceArray() just before the call to
757       KSPSolve() on the block.
758     */
759     PetscCall(MatGetSize(pmat, &m, &m));
760     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
761     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
762     PetscCall(MatGetVecType(pmat, &vectype));
763     PetscCall(VecSetType(bjac->x, vectype));
764     PetscCall(VecSetType(bjac->y, vectype));
765   } else {
766     ksp  = jac->ksp[0];
767     bjac = (PC_BJacobi_Singleblock *)jac->data;
768   }
769   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
770   if (pc->useAmat) {
771     PetscCall(KSPSetOperators(ksp, mat, pmat));
772     PetscCall(MatSetOptionsPrefix(mat, prefix));
773   } else {
774     PetscCall(KSPSetOperators(ksp, pmat, pmat));
775   }
776   PetscCall(MatSetOptionsPrefix(pmat, prefix));
777   if (!wasSetup && pc->setfromoptionscalled) {
778     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
779     PetscCall(KSPSetFromOptions(ksp));
780   }
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
785 {
786   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
787   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
788   PetscInt               i;
789 
790   PetscFunctionBegin;
791   if (bjac && bjac->pmat) {
792     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
793     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
794   }
795 
796   for (i = 0; i < jac->n_local; i++) {
797     PetscCall(KSPReset(jac->ksp[i]));
798     if (bjac && bjac->x) {
799       PetscCall(VecDestroy(&bjac->x[i]));
800       PetscCall(VecDestroy(&bjac->y[i]));
801       PetscCall(ISDestroy(&bjac->is[i]));
802     }
803   }
804   PetscCall(PetscFree(jac->l_lens));
805   PetscCall(PetscFree(jac->g_lens));
806   PetscFunctionReturn(PETSC_SUCCESS);
807 }
808 
809 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
810 {
811   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
812   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
813   PetscInt               i;
814 
815   PetscFunctionBegin;
816   PetscCall(PCReset_BJacobi_Multiblock(pc));
817   if (bjac) {
818     PetscCall(PetscFree2(bjac->x, bjac->y));
819     PetscCall(PetscFree(bjac->starts));
820     PetscCall(PetscFree(bjac->is));
821   }
822   PetscCall(PetscFree(jac->data));
823   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
824   PetscCall(PetscFree(jac->ksp));
825   PetscCall(PCDestroy_BJacobi(pc));
826   PetscFunctionReturn(PETSC_SUCCESS);
827 }
828 
829 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
830 {
831   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
832   PetscInt           i, n_local = jac->n_local;
833   KSPConvergedReason reason;
834 
835   PetscFunctionBegin;
836   for (i = 0; i < n_local; i++) {
837     PetscCall(KSPSetUp(jac->ksp[i]));
838     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
839     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
840   }
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
845 {
846   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
847   PetscInt               i, n_local = jac->n_local;
848   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
849   PetscScalar           *yin;
850   const PetscScalar     *xin;
851 
852   PetscFunctionBegin;
853   PetscCall(VecGetArrayRead(x, &xin));
854   PetscCall(VecGetArray(y, &yin));
855   for (i = 0; i < n_local; i++) {
856     /*
857        To avoid copying the subvector from x into a workspace we instead
858        make the workspace vector array point to the subpart of the array of
859        the global vector.
860     */
861     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
862     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
863 
864     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
865     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
866     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
867     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
868 
869     PetscCall(VecResetArray(bjac->x[i]));
870     PetscCall(VecResetArray(bjac->y[i]));
871   }
872   PetscCall(VecRestoreArrayRead(x, &xin));
873   PetscCall(VecRestoreArray(y, &yin));
874   PetscFunctionReturn(PETSC_SUCCESS);
875 }
876 
877 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
878 {
879   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
880   PetscInt               i, n_local = jac->n_local;
881   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
882   PetscScalar           *yin;
883   const PetscScalar     *xin;
884   PC                     subpc;
885 
886   PetscFunctionBegin;
887   PetscCall(VecGetArrayRead(x, &xin));
888   PetscCall(VecGetArray(y, &yin));
889   for (i = 0; i < n_local; i++) {
890     /*
891        To avoid copying the subvector from x into a workspace we instead
892        make the workspace vector array point to the subpart of the array of
893        the global vector.
894     */
895     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
896     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
897 
898     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
899     /* apply the symmetric left portion of the inner PC operator */
900     /* note this bypasses the inner KSP and its options completely */
901     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
902     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
903     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
904 
905     PetscCall(VecResetArray(bjac->x[i]));
906     PetscCall(VecResetArray(bjac->y[i]));
907   }
908   PetscCall(VecRestoreArrayRead(x, &xin));
909   PetscCall(VecRestoreArray(y, &yin));
910   PetscFunctionReturn(PETSC_SUCCESS);
911 }
912 
913 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
914 {
915   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
916   PetscInt               i, n_local = jac->n_local;
917   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
918   PetscScalar           *yin;
919   const PetscScalar     *xin;
920   PC                     subpc;
921 
922   PetscFunctionBegin;
923   PetscCall(VecGetArrayRead(x, &xin));
924   PetscCall(VecGetArray(y, &yin));
925   for (i = 0; i < n_local; i++) {
926     /*
927        To avoid copying the subvector from x into a workspace we instead
928        make the workspace vector array point to the subpart of the array of
929        the global vector.
930     */
931     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
932     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
933 
934     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
935     /* apply the symmetric left portion of the inner PC operator */
936     /* note this bypasses the inner KSP and its options completely */
937     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
938     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
939     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
940 
941     PetscCall(VecResetArray(bjac->x[i]));
942     PetscCall(VecResetArray(bjac->y[i]));
943   }
944   PetscCall(VecRestoreArrayRead(x, &xin));
945   PetscCall(VecRestoreArray(y, &yin));
946   PetscFunctionReturn(PETSC_SUCCESS);
947 }
948 
949 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
950 {
951   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
952   PetscInt               i, n_local = jac->n_local;
953   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
954   PetscScalar           *yin;
955   const PetscScalar     *xin;
956 
957   PetscFunctionBegin;
958   PetscCall(VecGetArrayRead(x, &xin));
959   PetscCall(VecGetArray(y, &yin));
960   for (i = 0; i < n_local; i++) {
961     /*
962        To avoid copying the subvector from x into a workspace we instead
963        make the workspace vector array point to the subpart of the array of
964        the global vector.
965     */
966     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
967     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
968 
969     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
970     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
971     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
972     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
973 
974     PetscCall(VecResetArray(bjac->x[i]));
975     PetscCall(VecResetArray(bjac->y[i]));
976   }
977   PetscCall(VecRestoreArrayRead(x, &xin));
978   PetscCall(VecRestoreArray(y, &yin));
979   PetscFunctionReturn(PETSC_SUCCESS);
980 }
981 
982 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
983 {
984   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
985   PetscInt               m, n_local, N, M, start, i;
986   const char            *prefix;
987   KSP                    ksp;
988   Vec                    x, y;
989   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
990   PC                     subpc;
991   IS                     is;
992   MatReuse               scall;
993   VecType                vectype;
994   MatNullSpace          *nullsp_mat = NULL, *nullsp_pmat = NULL;
995 
996   PetscFunctionBegin;
997   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
998 
999   n_local = jac->n_local;
1000 
1001   if (pc->useAmat) {
1002     PetscBool same;
1003     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1004     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1005   }
1006 
1007   if (!pc->setupcalled) {
1008     PetscInt nestlevel;
1009 
1010     scall = MAT_INITIAL_MATRIX;
1011 
1012     if (!jac->ksp) {
1013       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1014       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1015       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1016       pc->ops->matapply            = NULL;
1017       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1018       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1019       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1020       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1021 
1022       PetscCall(PetscNew(&bjac));
1023       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1024       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1025       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1026 
1027       jac->data = (void *)bjac;
1028       PetscCall(PetscMalloc1(n_local, &bjac->is));
1029 
1030       for (i = 0; i < n_local; i++) {
1031         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1032         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1033         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1034         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1035         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1036         PetscCall(KSPSetType(ksp, KSPPREONLY));
1037         PetscCall(KSPGetPC(ksp, &subpc));
1038         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1039         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1040         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1041 
1042         jac->ksp[i] = ksp;
1043       }
1044     } else {
1045       bjac = (PC_BJacobi_Multiblock *)jac->data;
1046     }
1047 
1048     start = 0;
1049     PetscCall(MatGetVecType(pmat, &vectype));
1050     for (i = 0; i < n_local; i++) {
1051       m = jac->l_lens[i];
1052       /*
1053       The reason we need to generate these vectors is to serve
1054       as the right-hand side and solution vector for the solve on the
1055       block. We do not need to allocate space for the vectors since
1056       that is provided via VecPlaceArray() just before the call to
1057       KSPSolve() on the block.
1058 
1059       */
1060       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1061       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1062       PetscCall(VecSetType(x, vectype));
1063       PetscCall(VecSetType(y, vectype));
1064 
1065       bjac->x[i]      = x;
1066       bjac->y[i]      = y;
1067       bjac->starts[i] = start;
1068 
1069       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1070       bjac->is[i] = is;
1071 
1072       start += m;
1073     }
1074   } else {
1075     bjac = (PC_BJacobi_Multiblock *)jac->data;
1076     /*
1077        Destroy the blocks from the previous iteration
1078     */
1079     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1080       PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1081       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1082       if (pc->useAmat) {
1083         PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat));
1084         PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1085       }
1086       scall = MAT_INITIAL_MATRIX;
1087     } else scall = MAT_REUSE_MATRIX;
1088   }
1089 
1090   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1091   if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1092   if (pc->useAmat) {
1093     PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1094     if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat));
1095   }
1096   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1097      different boundary conditions for the submatrices than for the global problem) */
1098   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1099 
1100   for (i = 0; i < n_local; i++) {
1101     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1102     if (pc->useAmat) {
1103       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1104       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1105     } else {
1106       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1107     }
1108     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1109     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1110   }
1111   PetscFunctionReturn(PETSC_SUCCESS);
1112 }
1113 
1114 /*
1115       These are for a single block with multiple processes
1116 */
1117 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1118 {
1119   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1120   KSP                subksp = jac->ksp[0];
1121   KSPConvergedReason reason;
1122 
1123   PetscFunctionBegin;
1124   PetscCall(KSPSetUp(subksp));
1125   PetscCall(KSPGetConvergedReason(subksp, &reason));
1126   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1131 {
1132   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1133   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1134 
1135   PetscFunctionBegin;
1136   PetscCall(VecDestroy(&mpjac->ysub));
1137   PetscCall(VecDestroy(&mpjac->xsub));
1138   PetscCall(MatDestroy(&mpjac->submats));
1139   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1140   PetscFunctionReturn(PETSC_SUCCESS);
1141 }
1142 
1143 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1144 {
1145   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1146   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCall(PCReset_BJacobi_Multiproc(pc));
1150   PetscCall(KSPDestroy(&jac->ksp[0]));
1151   PetscCall(PetscFree(jac->ksp));
1152   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1153 
1154   PetscCall(PetscFree(mpjac));
1155   PetscCall(PCDestroy_BJacobi(pc));
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
1159 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1160 {
1161   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1162   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1163   PetscScalar          *yarray;
1164   const PetscScalar    *xarray;
1165   KSPConvergedReason    reason;
1166 
1167   PetscFunctionBegin;
1168   /* place x's and y's local arrays into xsub and ysub */
1169   PetscCall(VecGetArrayRead(x, &xarray));
1170   PetscCall(VecGetArray(y, &yarray));
1171   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1172   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1173 
1174   /* apply preconditioner on each matrix block */
1175   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1176   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1177   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1178   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1179   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1180   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1181 
1182   PetscCall(VecResetArray(mpjac->xsub));
1183   PetscCall(VecResetArray(mpjac->ysub));
1184   PetscCall(VecRestoreArrayRead(x, &xarray));
1185   PetscCall(VecRestoreArray(y, &yarray));
1186   PetscFunctionReturn(PETSC_SUCCESS);
1187 }
1188 
1189 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1190 {
1191   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1192   KSPConvergedReason reason;
1193   Mat                sX, sY;
1194   const PetscScalar *x;
1195   PetscScalar       *y;
1196   PetscInt           m, N, lda, ldb;
1197 
1198   PetscFunctionBegin;
1199   /* apply preconditioner on each matrix block */
1200   PetscCall(MatGetLocalSize(X, &m, NULL));
1201   PetscCall(MatGetSize(X, NULL, &N));
1202   PetscCall(MatDenseGetLDA(X, &lda));
1203   PetscCall(MatDenseGetLDA(Y, &ldb));
1204   PetscCall(MatDenseGetArrayRead(X, &x));
1205   PetscCall(MatDenseGetArrayWrite(Y, &y));
1206   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1207   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1208   PetscCall(MatDenseSetLDA(sX, lda));
1209   PetscCall(MatDenseSetLDA(sY, ldb));
1210   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1211   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1212   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1213   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1214   PetscCall(MatDestroy(&sY));
1215   PetscCall(MatDestroy(&sX));
1216   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1217   PetscCall(MatDenseRestoreArrayRead(X, &x));
1218   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1219   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1220   PetscFunctionReturn(PETSC_SUCCESS);
1221 }
1222 
1223 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1224 {
1225   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1226   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1227   PetscInt              m, n;
1228   MPI_Comm              comm, subcomm = 0;
1229   const char           *prefix;
1230   PetscBool             wasSetup = PETSC_TRUE;
1231   VecType               vectype;
1232 
1233   PetscFunctionBegin;
1234   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1235   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1236   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1237   if (!pc->setupcalled) {
1238     PetscInt nestlevel;
1239 
1240     wasSetup = PETSC_FALSE;
1241     PetscCall(PetscNew(&mpjac));
1242     jac->data = (void *)mpjac;
1243 
1244     /* initialize datastructure mpjac */
1245     if (!jac->psubcomm) {
1246       /* Create default contiguous subcommunicatiors if user does not provide them */
1247       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1248       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1249       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1250     }
1251     mpjac->psubcomm = jac->psubcomm;
1252     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1253 
1254     /* Get matrix blocks of pmat */
1255     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1256 
1257     /* create a new PC that processors in each subcomm have copy of */
1258     PetscCall(PetscMalloc1(1, &jac->ksp));
1259     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1260     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1261     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1262     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1263     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1264     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1265     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1266 
1267     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1268     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1269     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1270     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1271     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1272 
1273     /* create dummy vectors xsub and ysub */
1274     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1275     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1276     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1277     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1278     PetscCall(VecSetType(mpjac->xsub, vectype));
1279     PetscCall(VecSetType(mpjac->ysub, vectype));
1280 
1281     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1282     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1283     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1284     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1285     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1286   } else { /* pc->setupcalled */
1287     subcomm = PetscSubcommChild(mpjac->psubcomm);
1288     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1289       /* destroy old matrix blocks, then get new matrix blocks */
1290       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1291       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1292     } else {
1293       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1294     }
1295     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1296   }
1297 
1298   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301