xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision 517f4e3460cd8c7e73c19b8bf533f1efe47f30a7)
1 /*
2    Defines a block Jacobi preconditioner.
3 */
4 
5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
6 
7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
10 
11 static PetscErrorCode PCSetUp_BJacobi(PC pc)
12 {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(PETSC_SUCCESS);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCall(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determining first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
148   PetscCall(PetscFree(pc->data));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject)
153 {
154   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
155   PetscInt    blocks, i;
156   PetscBool   flg;
157 
158   PetscFunctionBegin;
159   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
160   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
162   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
163   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
164   if (jac->ksp) {
165     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
166      * unless we had already been called. */
167     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
168   }
169   PetscOptionsHeadEnd();
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 #include <petscdraw.h>
174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
175 {
176   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
177   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
178   PetscMPIInt           rank;
179   PetscInt              i;
180   PetscBool             iascii, isstring, isdraw;
181   PetscViewer           sviewer;
182   PetscViewerFormat     format;
183   const char           *prefix;
184 
185   PetscFunctionBegin;
186   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
189   if (iascii) {
190     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
191     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
193     PetscCall(PetscViewerGetFormat(viewer, &format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       PetscCall(PCGetOptionsPrefix(pc, &prefix));
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
198       if (jac->ksp && !jac->psubcomm) {
199         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
200         if (rank == 0) {
201           PetscCall(PetscViewerASCIIPushTab(viewer));
202           PetscCall(KSPView(jac->ksp[0], sviewer));
203           PetscCall(PetscViewerASCIIPopTab(viewer));
204         }
205         PetscCall(PetscViewerFlush(sviewer));
206         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
207         PetscCall(PetscViewerFlush(viewer));
208         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
209         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
210       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
211         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
212         if (!mpjac->psubcomm->color) {
213           PetscCall(PetscViewerASCIIPushTab(viewer));
214           PetscCall(KSPView(*(jac->ksp), sviewer));
215           PetscCall(PetscViewerASCIIPopTab(viewer));
216         }
217         PetscCall(PetscViewerFlush(sviewer));
218         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
219         PetscCall(PetscViewerFlush(viewer));
220         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
221         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
222       } else {
223         PetscCall(PetscViewerFlush(viewer));
224       }
225     } else {
226       PetscInt n_global;
227       PetscCall(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
228       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
229       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
230       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
231       PetscCall(PetscViewerASCIIPushTab(viewer));
232       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
233       for (i = 0; i < jac->n_local; i++) {
234         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
235         PetscCall(KSPView(jac->ksp[i], sviewer));
236         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "- - - - - - - - - - - - - - - - - -\n"));
237       }
238       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
239       PetscCall(PetscViewerASCIIPopTab(viewer));
240       PetscCall(PetscViewerFlush(viewer));
241       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
242     }
243   } else if (isstring) {
244     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
245     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
246     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
247     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
248   } else if (isdraw) {
249     PetscDraw draw;
250     char      str[25];
251     PetscReal x, y, bottom, h;
252 
253     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
254     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
255     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
256     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
257     bottom = y - h;
258     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
259     /* warning the communicator on viewer is different then on ksp in parallel */
260     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
261     PetscCall(PetscDrawPopCurrentPoint(draw));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
267 {
268   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
269 
270   PetscFunctionBegin;
271   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
272 
273   if (n_local) *n_local = jac->n_local;
274   if (first_local) *first_local = jac->first_local;
275   if (ksp) *ksp = jac->ksp;
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens)
280 {
281   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
282 
283   PetscFunctionBegin;
284   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
285   jac->n = blocks;
286   if (!lens) jac->g_lens = NULL;
287   else {
288     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
289     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
290   }
291   PetscFunctionReturn(PETSC_SUCCESS);
292 }
293 
294 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
295 {
296   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
297 
298   PetscFunctionBegin;
299   *blocks = jac->n;
300   if (lens) *lens = jac->g_lens;
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
305 {
306   PC_BJacobi *jac;
307 
308   PetscFunctionBegin;
309   jac = (PC_BJacobi *)pc->data;
310 
311   jac->n_local = blocks;
312   if (!lens) jac->l_lens = NULL;
313   else {
314     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
315     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
316   }
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
321 {
322   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
323 
324   PetscFunctionBegin;
325   *blocks = jac->n_local;
326   if (lens) *lens = jac->l_lens;
327   PetscFunctionReturn(PETSC_SUCCESS);
328 }
329 
330 /*@C
331   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
332   this processor.
333 
334   Not Collective
335 
336   Input Parameter:
337 . pc - the preconditioner context
338 
339   Output Parameters:
340 + n_local     - the number of blocks on this processor, or NULL
341 . first_local - the global number of the first block on this processor, or NULL
342 - ksp         - the array of KSP contexts
343 
344   Notes:
345   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
346 
347   Currently for some matrix implementations only 1 block per processor
348   is supported.
349 
350   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
351 
352   Fortran Notes:
353   You must pass in a `KSP` array that is large enough to contain all the local `KSP`s.
354 
355   You can call `PCBJacobiGetSubKSP`(pc,nlocal,firstlocal,`PETSC_NULL_KSP`,ierr) to determine how large the
356   `KSP` array must be.
357 
358   Level: advanced
359 
360 .seealso: `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
361 @*/
362 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
363 {
364   PetscFunctionBegin;
365   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
366   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
367   PetscFunctionReturn(PETSC_SUCCESS);
368 }
369 
370 /*@
371   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
372   Jacobi preconditioner.
373 
374   Collective
375 
376   Input Parameters:
377 + pc     - the preconditioner context
378 . blocks - the number of blocks
379 - lens   - [optional] integer array containing the size of each block
380 
381   Options Database Key:
382 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
383 
384   Note:
385   Currently only a limited number of blocking configurations are supported.
386   All processors sharing the `PC` must call this routine with the same data.
387 
388   Level: intermediate
389 
390 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
391 @*/
392 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
393 {
394   PetscFunctionBegin;
395   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
396   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
397   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
398   PetscFunctionReturn(PETSC_SUCCESS);
399 }
400 
401 /*@C
402   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
403   Jacobi, `PCBJACOBI`, preconditioner.
404 
405   Not Collective
406 
407   Input Parameter:
408 . pc - the preconditioner context
409 
410   Output Parameters:
411 + blocks - the number of blocks
412 - lens   - integer array containing the size of each block
413 
414   Level: intermediate
415 
416 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
417 @*/
418 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
419 {
420   PetscFunctionBegin;
421   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
422   PetscAssertPointer(blocks, 2);
423   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
427 /*@
428   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
429   Jacobi, `PCBJACOBI`,  preconditioner.
430 
431   Not Collective
432 
433   Input Parameters:
434 + pc     - the preconditioner context
435 . blocks - the number of blocks
436 - lens   - [optional] integer array containing size of each block
437 
438   Options Database Key:
439 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
440 
441   Note:
442   Currently only a limited number of blocking configurations are supported.
443 
444   Level: intermediate
445 
446 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
447 @*/
448 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
449 {
450   PetscFunctionBegin;
451   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
452   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
453   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
454   PetscFunctionReturn(PETSC_SUCCESS);
455 }
456 
457 /*@C
458   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
459   Jacobi, `PCBJACOBI`, preconditioner.
460 
461   Not Collective
462 
463   Input Parameters:
464 + pc     - the preconditioner context
465 . blocks - the number of blocks
466 - lens   - [optional] integer array containing size of each block
467 
468   Note:
469   Currently only a limited number of blocking configurations are supported.
470 
471   Level: intermediate
472 
473 .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
474 @*/
475 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
476 {
477   PetscFunctionBegin;
478   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
479   PetscAssertPointer(blocks, 2);
480   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
481   PetscFunctionReturn(PETSC_SUCCESS);
482 }
483 
484 /*MC
485    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
486            its own `KSP` object.
487 
488    Options Database Keys:
489 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
490 -  -pc_bjacobi_blocks <n> - use n total blocks
491 
492    Notes:
493     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
494 
495     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
496 
497      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
498         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
499 
500      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
501          and set the options directly on the resulting `KSP` object (you can access its `PC`
502          `KSPGetPC()`)
503 
504      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
505          performance.  Different block partitioning may lead to additional data transfers
506          between host and GPU that lead to degraded performance.
507 
508      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
509 
510    Level: beginner
511 
512 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
513           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
514           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
515 M*/
516 
517 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
518 {
519   PetscMPIInt rank;
520   PC_BJacobi *jac;
521 
522   PetscFunctionBegin;
523   PetscCall(PetscNew(&jac));
524   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
525 
526   pc->ops->apply           = NULL;
527   pc->ops->matapply        = NULL;
528   pc->ops->applytranspose  = NULL;
529   pc->ops->setup           = PCSetUp_BJacobi;
530   pc->ops->destroy         = PCDestroy_BJacobi;
531   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
532   pc->ops->view            = PCView_BJacobi;
533   pc->ops->applyrichardson = NULL;
534 
535   pc->data         = (void *)jac;
536   jac->n           = -1;
537   jac->n_local     = -1;
538   jac->first_local = rank;
539   jac->ksp         = NULL;
540   jac->g_lens      = NULL;
541   jac->l_lens      = NULL;
542   jac->psubcomm    = NULL;
543 
544   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
545   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
546   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
547   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
548   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
549   PetscFunctionReturn(PETSC_SUCCESS);
550 }
551 
552 /*
553         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
554 */
555 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
556 {
557   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
558   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
559 
560   PetscFunctionBegin;
561   PetscCall(KSPReset(jac->ksp[0]));
562   PetscCall(VecDestroy(&bjac->x));
563   PetscCall(VecDestroy(&bjac->y));
564   PetscFunctionReturn(PETSC_SUCCESS);
565 }
566 
567 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
568 {
569   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
570   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
571 
572   PetscFunctionBegin;
573   PetscCall(PCReset_BJacobi_Singleblock(pc));
574   PetscCall(KSPDestroy(&jac->ksp[0]));
575   PetscCall(PetscFree(jac->ksp));
576   PetscCall(PetscFree(bjac));
577   PetscCall(PCDestroy_BJacobi(pc));
578   PetscFunctionReturn(PETSC_SUCCESS);
579 }
580 
581 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
582 {
583   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
584   KSP                subksp = jac->ksp[0];
585   KSPConvergedReason reason;
586 
587   PetscFunctionBegin;
588   PetscCall(KSPSetUp(subksp));
589   PetscCall(KSPGetConvergedReason(subksp, &reason));
590   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
591   PetscFunctionReturn(PETSC_SUCCESS);
592 }
593 
594 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
595 {
596   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
597   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
598 
599   PetscFunctionBegin;
600   PetscCall(VecGetLocalVectorRead(x, bjac->x));
601   PetscCall(VecGetLocalVector(y, bjac->y));
602   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
603      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
604      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
605   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
606   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
607   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
608   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
609   PetscCall(VecRestoreLocalVector(y, bjac->y));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
614 {
615   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
616   Mat         sX, sY;
617 
618   PetscFunctionBegin;
619   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
620      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
621      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
622   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
623   PetscCall(MatDenseGetLocalMatrix(X, &sX));
624   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
625   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
630 {
631   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
632   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
633   PetscScalar            *y_array;
634   const PetscScalar      *x_array;
635   PC                      subpc;
636 
637   PetscFunctionBegin;
638   /*
639       The VecPlaceArray() is to avoid having to copy the
640     y vector into the bjac->x vector. The reason for
641     the bjac->x vector is that we need a sequential vector
642     for the sequential solve.
643   */
644   PetscCall(VecGetArrayRead(x, &x_array));
645   PetscCall(VecGetArray(y, &y_array));
646   PetscCall(VecPlaceArray(bjac->x, x_array));
647   PetscCall(VecPlaceArray(bjac->y, y_array));
648   /* apply the symmetric left portion of the inner PC operator */
649   /* note this by-passes the inner KSP and its options completely */
650   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
651   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
652   PetscCall(VecResetArray(bjac->x));
653   PetscCall(VecResetArray(bjac->y));
654   PetscCall(VecRestoreArrayRead(x, &x_array));
655   PetscCall(VecRestoreArray(y, &y_array));
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
660 {
661   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
662   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
663   PetscScalar            *y_array;
664   const PetscScalar      *x_array;
665   PC                      subpc;
666 
667   PetscFunctionBegin;
668   /*
669       The VecPlaceArray() is to avoid having to copy the
670     y vector into the bjac->x vector. The reason for
671     the bjac->x vector is that we need a sequential vector
672     for the sequential solve.
673   */
674   PetscCall(VecGetArrayRead(x, &x_array));
675   PetscCall(VecGetArray(y, &y_array));
676   PetscCall(VecPlaceArray(bjac->x, x_array));
677   PetscCall(VecPlaceArray(bjac->y, y_array));
678 
679   /* apply the symmetric right portion of the inner PC operator */
680   /* note this by-passes the inner KSP and its options completely */
681 
682   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
683   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
684 
685   PetscCall(VecResetArray(bjac->x));
686   PetscCall(VecResetArray(bjac->y));
687   PetscCall(VecRestoreArrayRead(x, &x_array));
688   PetscCall(VecRestoreArray(y, &y_array));
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
693 {
694   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
695   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
696   PetscScalar            *y_array;
697   const PetscScalar      *x_array;
698 
699   PetscFunctionBegin;
700   /*
701       The VecPlaceArray() is to avoid having to copy the
702     y vector into the bjac->x vector. The reason for
703     the bjac->x vector is that we need a sequential vector
704     for the sequential solve.
705   */
706   PetscCall(VecGetArrayRead(x, &x_array));
707   PetscCall(VecGetArray(y, &y_array));
708   PetscCall(VecPlaceArray(bjac->x, x_array));
709   PetscCall(VecPlaceArray(bjac->y, y_array));
710   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
711   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
712   PetscCall(VecResetArray(bjac->x));
713   PetscCall(VecResetArray(bjac->y));
714   PetscCall(VecRestoreArrayRead(x, &x_array));
715   PetscCall(VecRestoreArray(y, &y_array));
716   PetscFunctionReturn(PETSC_SUCCESS);
717 }
718 
719 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
720 {
721   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
722   PetscInt                m;
723   KSP                     ksp;
724   PC_BJacobi_Singleblock *bjac;
725   PetscBool               wasSetup = PETSC_TRUE;
726   VecType                 vectype;
727   const char             *prefix;
728 
729   PetscFunctionBegin;
730   if (!pc->setupcalled) {
731     if (!jac->ksp) {
732       PetscInt nestlevel;
733 
734       wasSetup = PETSC_FALSE;
735 
736       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
737       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
738       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
739       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
740       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
741       PetscCall(KSPSetType(ksp, KSPPREONLY));
742       PetscCall(PCGetOptionsPrefix(pc, &prefix));
743       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
744       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
745 
746       pc->ops->reset               = PCReset_BJacobi_Singleblock;
747       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
748       pc->ops->apply               = PCApply_BJacobi_Singleblock;
749       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
750       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
751       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
752       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
753       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
754 
755       PetscCall(PetscMalloc1(1, &jac->ksp));
756       jac->ksp[0] = ksp;
757 
758       PetscCall(PetscNew(&bjac));
759       jac->data = (void *)bjac;
760     } else {
761       ksp  = jac->ksp[0];
762       bjac = (PC_BJacobi_Singleblock *)jac->data;
763     }
764 
765     /*
766       The reason we need to generate these vectors is to serve
767       as the right-hand side and solution vector for the solve on the
768       block. We do not need to allocate space for the vectors since
769       that is provided via VecPlaceArray() just before the call to
770       KSPSolve() on the block.
771     */
772     PetscCall(MatGetSize(pmat, &m, &m));
773     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
774     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
775     PetscCall(MatGetVecType(pmat, &vectype));
776     PetscCall(VecSetType(bjac->x, vectype));
777     PetscCall(VecSetType(bjac->y, vectype));
778   } else {
779     ksp  = jac->ksp[0];
780     bjac = (PC_BJacobi_Singleblock *)jac->data;
781   }
782   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
783   if (pc->useAmat) {
784     PetscCall(KSPSetOperators(ksp, mat, pmat));
785     PetscCall(MatSetOptionsPrefix(mat, prefix));
786   } else {
787     PetscCall(KSPSetOperators(ksp, pmat, pmat));
788   }
789   PetscCall(MatSetOptionsPrefix(pmat, prefix));
790   if (!wasSetup && pc->setfromoptionscalled) {
791     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
792     PetscCall(KSPSetFromOptions(ksp));
793   }
794   PetscFunctionReturn(PETSC_SUCCESS);
795 }
796 
797 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
798 {
799   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
800   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
801   PetscInt               i;
802 
803   PetscFunctionBegin;
804   if (bjac && bjac->pmat) {
805     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
806     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
807   }
808 
809   for (i = 0; i < jac->n_local; i++) {
810     PetscCall(KSPReset(jac->ksp[i]));
811     if (bjac && bjac->x) {
812       PetscCall(VecDestroy(&bjac->x[i]));
813       PetscCall(VecDestroy(&bjac->y[i]));
814       PetscCall(ISDestroy(&bjac->is[i]));
815     }
816   }
817   PetscCall(PetscFree(jac->l_lens));
818   PetscCall(PetscFree(jac->g_lens));
819   PetscFunctionReturn(PETSC_SUCCESS);
820 }
821 
822 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
823 {
824   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
825   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
826   PetscInt               i;
827 
828   PetscFunctionBegin;
829   PetscCall(PCReset_BJacobi_Multiblock(pc));
830   if (bjac) {
831     PetscCall(PetscFree2(bjac->x, bjac->y));
832     PetscCall(PetscFree(bjac->starts));
833     PetscCall(PetscFree(bjac->is));
834   }
835   PetscCall(PetscFree(jac->data));
836   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
837   PetscCall(PetscFree(jac->ksp));
838   PetscCall(PCDestroy_BJacobi(pc));
839   PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 
842 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
843 {
844   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
845   PetscInt           i, n_local = jac->n_local;
846   KSPConvergedReason reason;
847 
848   PetscFunctionBegin;
849   for (i = 0; i < n_local; i++) {
850     PetscCall(KSPSetUp(jac->ksp[i]));
851     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
852     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
853   }
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
858 {
859   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
860   PetscInt               i, n_local = jac->n_local;
861   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
862   PetscScalar           *yin;
863   const PetscScalar     *xin;
864 
865   PetscFunctionBegin;
866   PetscCall(VecGetArrayRead(x, &xin));
867   PetscCall(VecGetArray(y, &yin));
868   for (i = 0; i < n_local; i++) {
869     /*
870        To avoid copying the subvector from x into a workspace we instead
871        make the workspace vector array point to the subpart of the array of
872        the global vector.
873     */
874     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
875     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
876 
877     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
878     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
879     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
880     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
881 
882     PetscCall(VecResetArray(bjac->x[i]));
883     PetscCall(VecResetArray(bjac->y[i]));
884   }
885   PetscCall(VecRestoreArrayRead(x, &xin));
886   PetscCall(VecRestoreArray(y, &yin));
887   PetscFunctionReturn(PETSC_SUCCESS);
888 }
889 
890 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
891 {
892   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
893   PetscInt               i, n_local = jac->n_local;
894   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
895   PetscScalar           *yin;
896   const PetscScalar     *xin;
897   PC                     subpc;
898 
899   PetscFunctionBegin;
900   PetscCall(VecGetArrayRead(x, &xin));
901   PetscCall(VecGetArray(y, &yin));
902   for (i = 0; i < n_local; i++) {
903     /*
904        To avoid copying the subvector from x into a workspace we instead
905        make the workspace vector array point to the subpart of the array of
906        the global vector.
907     */
908     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
909     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
910 
911     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
912     /* apply the symmetric left portion of the inner PC operator */
913     /* note this by-passes the inner KSP and its options completely */
914     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
915     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
916     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
917 
918     PetscCall(VecResetArray(bjac->x[i]));
919     PetscCall(VecResetArray(bjac->y[i]));
920   }
921   PetscCall(VecRestoreArrayRead(x, &xin));
922   PetscCall(VecRestoreArray(y, &yin));
923   PetscFunctionReturn(PETSC_SUCCESS);
924 }
925 
926 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
927 {
928   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
929   PetscInt               i, n_local = jac->n_local;
930   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
931   PetscScalar           *yin;
932   const PetscScalar     *xin;
933   PC                     subpc;
934 
935   PetscFunctionBegin;
936   PetscCall(VecGetArrayRead(x, &xin));
937   PetscCall(VecGetArray(y, &yin));
938   for (i = 0; i < n_local; i++) {
939     /*
940        To avoid copying the subvector from x into a workspace we instead
941        make the workspace vector array point to the subpart of the array of
942        the global vector.
943     */
944     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
945     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
946 
947     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
948     /* apply the symmetric left portion of the inner PC operator */
949     /* note this by-passes the inner KSP and its options completely */
950     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
951     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
952     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
953 
954     PetscCall(VecResetArray(bjac->x[i]));
955     PetscCall(VecResetArray(bjac->y[i]));
956   }
957   PetscCall(VecRestoreArrayRead(x, &xin));
958   PetscCall(VecRestoreArray(y, &yin));
959   PetscFunctionReturn(PETSC_SUCCESS);
960 }
961 
962 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
963 {
964   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
965   PetscInt               i, n_local = jac->n_local;
966   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
967   PetscScalar           *yin;
968   const PetscScalar     *xin;
969 
970   PetscFunctionBegin;
971   PetscCall(VecGetArrayRead(x, &xin));
972   PetscCall(VecGetArray(y, &yin));
973   for (i = 0; i < n_local; i++) {
974     /*
975        To avoid copying the subvector from x into a workspace we instead
976        make the workspace vector array point to the subpart of the array of
977        the global vector.
978     */
979     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
980     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
981 
982     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
983     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
984     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
985     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
986 
987     PetscCall(VecResetArray(bjac->x[i]));
988     PetscCall(VecResetArray(bjac->y[i]));
989   }
990   PetscCall(VecRestoreArrayRead(x, &xin));
991   PetscCall(VecRestoreArray(y, &yin));
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
996 {
997   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
998   PetscInt               m, n_local, N, M, start, i;
999   const char            *prefix;
1000   KSP                    ksp;
1001   Vec                    x, y;
1002   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
1003   PC                     subpc;
1004   IS                     is;
1005   MatReuse               scall;
1006   VecType                vectype;
1007 
1008   PetscFunctionBegin;
1009   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
1010 
1011   n_local = jac->n_local;
1012 
1013   if (pc->useAmat) {
1014     PetscBool same;
1015     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1016     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1017   }
1018 
1019   if (!pc->setupcalled) {
1020     PetscInt nestlevel;
1021 
1022     scall = MAT_INITIAL_MATRIX;
1023 
1024     if (!jac->ksp) {
1025       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1026       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1027       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1028       pc->ops->matapply            = NULL;
1029       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1030       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1031       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1032       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1033 
1034       PetscCall(PetscNew(&bjac));
1035       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1036       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1037       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1038 
1039       jac->data = (void *)bjac;
1040       PetscCall(PetscMalloc1(n_local, &bjac->is));
1041 
1042       for (i = 0; i < n_local; i++) {
1043         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1044         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1045         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1046         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1047         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1048         PetscCall(KSPSetType(ksp, KSPPREONLY));
1049         PetscCall(KSPGetPC(ksp, &subpc));
1050         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1051         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1052         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1053 
1054         jac->ksp[i] = ksp;
1055       }
1056     } else {
1057       bjac = (PC_BJacobi_Multiblock *)jac->data;
1058     }
1059 
1060     start = 0;
1061     PetscCall(MatGetVecType(pmat, &vectype));
1062     for (i = 0; i < n_local; i++) {
1063       m = jac->l_lens[i];
1064       /*
1065       The reason we need to generate these vectors is to serve
1066       as the right-hand side and solution vector for the solve on the
1067       block. We do not need to allocate space for the vectors since
1068       that is provided via VecPlaceArray() just before the call to
1069       KSPSolve() on the block.
1070 
1071       */
1072       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1073       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1074       PetscCall(VecSetType(x, vectype));
1075       PetscCall(VecSetType(y, vectype));
1076 
1077       bjac->x[i]      = x;
1078       bjac->y[i]      = y;
1079       bjac->starts[i] = start;
1080 
1081       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1082       bjac->is[i] = is;
1083 
1084       start += m;
1085     }
1086   } else {
1087     bjac = (PC_BJacobi_Multiblock *)jac->data;
1088     /*
1089        Destroy the blocks from the previous iteration
1090     */
1091     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1092       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1093       if (pc->useAmat) PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1094       scall = MAT_INITIAL_MATRIX;
1095     } else scall = MAT_REUSE_MATRIX;
1096   }
1097 
1098   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1099   if (pc->useAmat) PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1100   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1101      different boundary conditions for the submatrices than for the global problem) */
1102   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1103 
1104   for (i = 0; i < n_local; i++) {
1105     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1106     if (pc->useAmat) {
1107       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1108       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1109     } else {
1110       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1111     }
1112     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1113     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1114   }
1115   PetscFunctionReturn(PETSC_SUCCESS);
1116 }
1117 
1118 /*
1119       These are for a single block with multiple processes
1120 */
1121 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1122 {
1123   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1124   KSP                subksp = jac->ksp[0];
1125   KSPConvergedReason reason;
1126 
1127   PetscFunctionBegin;
1128   PetscCall(KSPSetUp(subksp));
1129   PetscCall(KSPGetConvergedReason(subksp, &reason));
1130   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1131   PetscFunctionReturn(PETSC_SUCCESS);
1132 }
1133 
1134 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1135 {
1136   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1137   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1138 
1139   PetscFunctionBegin;
1140   PetscCall(VecDestroy(&mpjac->ysub));
1141   PetscCall(VecDestroy(&mpjac->xsub));
1142   PetscCall(MatDestroy(&mpjac->submats));
1143   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1148 {
1149   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1150   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1151 
1152   PetscFunctionBegin;
1153   PetscCall(PCReset_BJacobi_Multiproc(pc));
1154   PetscCall(KSPDestroy(&jac->ksp[0]));
1155   PetscCall(PetscFree(jac->ksp));
1156   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1157 
1158   PetscCall(PetscFree(mpjac));
1159   PetscCall(PCDestroy_BJacobi(pc));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1164 {
1165   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1166   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1167   PetscScalar          *yarray;
1168   const PetscScalar    *xarray;
1169   KSPConvergedReason    reason;
1170 
1171   PetscFunctionBegin;
1172   /* place x's and y's local arrays into xsub and ysub */
1173   PetscCall(VecGetArrayRead(x, &xarray));
1174   PetscCall(VecGetArray(y, &yarray));
1175   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1176   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1177 
1178   /* apply preconditioner on each matrix block */
1179   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1180   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1181   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1182   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1183   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1184   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1185 
1186   PetscCall(VecResetArray(mpjac->xsub));
1187   PetscCall(VecResetArray(mpjac->ysub));
1188   PetscCall(VecRestoreArrayRead(x, &xarray));
1189   PetscCall(VecRestoreArray(y, &yarray));
1190   PetscFunctionReturn(PETSC_SUCCESS);
1191 }
1192 
1193 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1194 {
1195   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1196   KSPConvergedReason reason;
1197   Mat                sX, sY;
1198   const PetscScalar *x;
1199   PetscScalar       *y;
1200   PetscInt           m, N, lda, ldb;
1201 
1202   PetscFunctionBegin;
1203   /* apply preconditioner on each matrix block */
1204   PetscCall(MatGetLocalSize(X, &m, NULL));
1205   PetscCall(MatGetSize(X, NULL, &N));
1206   PetscCall(MatDenseGetLDA(X, &lda));
1207   PetscCall(MatDenseGetLDA(Y, &ldb));
1208   PetscCall(MatDenseGetArrayRead(X, &x));
1209   PetscCall(MatDenseGetArrayWrite(Y, &y));
1210   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1211   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1212   PetscCall(MatDenseSetLDA(sX, lda));
1213   PetscCall(MatDenseSetLDA(sY, ldb));
1214   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1215   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1216   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1217   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1218   PetscCall(MatDestroy(&sY));
1219   PetscCall(MatDestroy(&sX));
1220   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1221   PetscCall(MatDenseRestoreArrayRead(X, &x));
1222   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1223   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1224   PetscFunctionReturn(PETSC_SUCCESS);
1225 }
1226 
1227 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1228 {
1229   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1230   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1231   PetscInt              m, n;
1232   MPI_Comm              comm, subcomm = 0;
1233   const char           *prefix;
1234   PetscBool             wasSetup = PETSC_TRUE;
1235   VecType               vectype;
1236 
1237   PetscFunctionBegin;
1238   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1239   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1240   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1241   if (!pc->setupcalled) {
1242     PetscInt nestlevel;
1243 
1244     wasSetup = PETSC_FALSE;
1245     PetscCall(PetscNew(&mpjac));
1246     jac->data = (void *)mpjac;
1247 
1248     /* initialize datastructure mpjac */
1249     if (!jac->psubcomm) {
1250       /* Create default contiguous subcommunicatiors if user does not provide them */
1251       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1252       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1253       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1254     }
1255     mpjac->psubcomm = jac->psubcomm;
1256     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1257 
1258     /* Get matrix blocks of pmat */
1259     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1260 
1261     /* create a new PC that processors in each subcomm have copy of */
1262     PetscCall(PetscMalloc1(1, &jac->ksp));
1263     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1264     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1265     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1266     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1267     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1268     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1269     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1270 
1271     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1272     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1273     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1274     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1275     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1276 
1277     /* create dummy vectors xsub and ysub */
1278     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1279     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1280     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1281     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1282     PetscCall(VecSetType(mpjac->xsub, vectype));
1283     PetscCall(VecSetType(mpjac->ysub, vectype));
1284 
1285     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1286     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1287     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1288     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1289     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1290   } else { /* pc->setupcalled */
1291     subcomm = PetscSubcommChild(mpjac->psubcomm);
1292     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1293       /* destroy old matrix blocks, then get new matrix blocks */
1294       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1295       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1296     } else {
1297       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1298     }
1299     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1300   }
1301 
1302   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305