xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 /*
2    Defines a block Jacobi preconditioner.
3 */
4 
5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
6 
7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
10 
11 static PetscErrorCode PCSetUp_BJacobi(PC pc)
12 {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(PETSC_SUCCESS);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCall(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determining first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
148   PetscCall(PetscFree(pc->data));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject)
153 {
154   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
155   PetscInt    blocks, i;
156   PetscBool   flg;
157 
158   PetscFunctionBegin;
159   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
160   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
162   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
163   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
164   if (jac->ksp) {
165     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
166      * unless we had already been called. */
167     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
168   }
169   PetscOptionsHeadEnd();
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 #include <petscdraw.h>
174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
175 {
176   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
177   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
178   PetscMPIInt           rank;
179   PetscInt              i;
180   PetscBool             iascii, isstring, isdraw;
181   PetscViewer           sviewer;
182   PetscViewerFormat     format;
183   const char           *prefix;
184 
185   PetscFunctionBegin;
186   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
189   if (iascii) {
190     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
191     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
193     PetscCall(PetscViewerGetFormat(viewer, &format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       PetscCall(PCGetOptionsPrefix(pc, &prefix));
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
198       if (jac->ksp && !jac->psubcomm) {
199         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
200         if (rank == 0) {
201           PetscCall(PetscViewerASCIIPushTab(sviewer));
202           PetscCall(KSPView(jac->ksp[0], sviewer));
203           PetscCall(PetscViewerASCIIPopTab(sviewer));
204         }
205         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
206         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
207         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
208       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
209         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
210         if (!mpjac->psubcomm->color) {
211           PetscCall(PetscViewerASCIIPushTab(sviewer));
212           PetscCall(KSPView(*jac->ksp, sviewer));
213           PetscCall(PetscViewerASCIIPopTab(sviewer));
214         }
215         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
216         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
217         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
218       }
219     } else {
220       PetscInt n_global;
221       PetscCall(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
222       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
223       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
224       PetscCall(PetscViewerASCIIPushTab(viewer));
225       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
226       PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
227       for (i = 0; i < jac->n_local; i++) {
228         PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
229         PetscCall(KSPView(jac->ksp[i], sviewer));
230         PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n"));
231       }
232       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
233       PetscCall(PetscViewerASCIIPopTab(viewer));
234       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
235     }
236   } else if (isstring) {
237     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
238     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
239     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
240     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
241   } else if (isdraw) {
242     PetscDraw draw;
243     char      str[25];
244     PetscReal x, y, bottom, h;
245 
246     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
247     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
248     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
249     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
250     bottom = y - h;
251     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
252     /* warning the communicator on viewer is different then on ksp in parallel */
253     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
254     PetscCall(PetscDrawPopCurrentPoint(draw));
255   }
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
260 {
261   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
262 
263   PetscFunctionBegin;
264   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
265 
266   if (n_local) *n_local = jac->n_local;
267   if (first_local) *first_local = jac->first_local;
268   if (ksp) *ksp = jac->ksp;
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens)
273 {
274   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
278   jac->n = blocks;
279   if (!lens) jac->g_lens = NULL;
280   else {
281     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
282     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
288 {
289   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
290 
291   PetscFunctionBegin;
292   *blocks = jac->n;
293   if (lens) *lens = jac->g_lens;
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
298 {
299   PC_BJacobi *jac;
300 
301   PetscFunctionBegin;
302   jac = (PC_BJacobi *)pc->data;
303 
304   jac->n_local = blocks;
305   if (!lens) jac->l_lens = NULL;
306   else {
307     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
308     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
309   }
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
314 {
315   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
316 
317   PetscFunctionBegin;
318   *blocks = jac->n_local;
319   if (lens) *lens = jac->l_lens;
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@C
324   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
325   this processor.
326 
327   Not Collective
328 
329   Input Parameter:
330 . pc - the preconditioner context
331 
332   Output Parameters:
333 + n_local     - the number of blocks on this processor, or NULL
334 . first_local - the global number of the first block on this processor, or NULL
335 - ksp         - the array of KSP contexts
336 
337   Notes:
338   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
339 
340   Currently for some matrix implementations only 1 block per processor
341   is supported.
342 
343   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
344 
345   Fortran Notes:
346   You must pass in a `KSP` array that is large enough to contain all the local `KSP`s.
347 
348   You can call `PCBJacobiGetSubKSP`(pc,nlocal,firstlocal,`PETSC_NULL_KSP`,ierr) to determine how large the
349   `KSP` array must be.
350 
351   Level: advanced
352 
353 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
354 @*/
355 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
356 {
357   PetscFunctionBegin;
358   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
359   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
360   PetscFunctionReturn(PETSC_SUCCESS);
361 }
362 
363 /*@
364   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
365   Jacobi preconditioner.
366 
367   Collective
368 
369   Input Parameters:
370 + pc     - the preconditioner context
371 . blocks - the number of blocks
372 - lens   - [optional] integer array containing the size of each block
373 
374   Options Database Key:
375 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
376 
377   Note:
378   Currently only a limited number of blocking configurations are supported.
379   All processors sharing the `PC` must call this routine with the same data.
380 
381   Level: intermediate
382 
383 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
384 @*/
385 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
386 {
387   PetscFunctionBegin;
388   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
389   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
390   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 /*@C
395   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
396   Jacobi, `PCBJACOBI`, preconditioner.
397 
398   Not Collective
399 
400   Input Parameter:
401 . pc - the preconditioner context
402 
403   Output Parameters:
404 + blocks - the number of blocks
405 - lens   - integer array containing the size of each block
406 
407   Level: intermediate
408 
409 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
410 @*/
411 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
412 {
413   PetscFunctionBegin;
414   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
415   PetscAssertPointer(blocks, 2);
416   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
417   PetscFunctionReturn(PETSC_SUCCESS);
418 }
419 
420 /*@
421   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
422   Jacobi, `PCBJACOBI`,  preconditioner.
423 
424   Not Collective
425 
426   Input Parameters:
427 + pc     - the preconditioner context
428 . blocks - the number of blocks
429 - lens   - [optional] integer array containing size of each block
430 
431   Options Database Key:
432 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
433 
434   Note:
435   Currently only a limited number of blocking configurations are supported.
436 
437   Level: intermediate
438 
439 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
440 @*/
441 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
442 {
443   PetscFunctionBegin;
444   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
445   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
446   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
447   PetscFunctionReturn(PETSC_SUCCESS);
448 }
449 
450 /*@C
451   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
452   Jacobi, `PCBJACOBI`, preconditioner.
453 
454   Not Collective
455 
456   Input Parameters:
457 + pc     - the preconditioner context
458 . blocks - the number of blocks
459 - lens   - [optional] integer array containing size of each block
460 
461   Note:
462   Currently only a limited number of blocking configurations are supported.
463 
464   Level: intermediate
465 
466 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
467 @*/
468 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
469 {
470   PetscFunctionBegin;
471   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
472   PetscAssertPointer(blocks, 2);
473   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 /*MC
478    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
479            its own `KSP` object.
480 
481    Options Database Keys:
482 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
483 -  -pc_bjacobi_blocks <n> - use n total blocks
484 
485    Notes:
486     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
487 
488     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
489 
490      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
491         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
492 
493      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
494          and set the options directly on the resulting `KSP` object (you can access its `PC`
495          `KSPGetPC()`)
496 
497      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
498          performance.  Different block partitioning may lead to additional data transfers
499          between host and GPU that lead to degraded performance.
500 
501      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
502 
503    Level: beginner
504 
505 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
506           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
507           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
508 M*/
509 
510 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
511 {
512   PetscMPIInt rank;
513   PC_BJacobi *jac;
514 
515   PetscFunctionBegin;
516   PetscCall(PetscNew(&jac));
517   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
518 
519   pc->ops->apply           = NULL;
520   pc->ops->matapply        = NULL;
521   pc->ops->applytranspose  = NULL;
522   pc->ops->setup           = PCSetUp_BJacobi;
523   pc->ops->destroy         = PCDestroy_BJacobi;
524   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
525   pc->ops->view            = PCView_BJacobi;
526   pc->ops->applyrichardson = NULL;
527 
528   pc->data         = (void *)jac;
529   jac->n           = -1;
530   jac->n_local     = -1;
531   jac->first_local = rank;
532   jac->ksp         = NULL;
533   jac->g_lens      = NULL;
534   jac->l_lens      = NULL;
535   jac->psubcomm    = NULL;
536 
537   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
538   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
539   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
540   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
541   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
542   PetscFunctionReturn(PETSC_SUCCESS);
543 }
544 
545 /*
546         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
547 */
548 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
549 {
550   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
551   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
552 
553   PetscFunctionBegin;
554   PetscCall(KSPReset(jac->ksp[0]));
555   PetscCall(VecDestroy(&bjac->x));
556   PetscCall(VecDestroy(&bjac->y));
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
561 {
562   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
563   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
564 
565   PetscFunctionBegin;
566   PetscCall(PCReset_BJacobi_Singleblock(pc));
567   PetscCall(KSPDestroy(&jac->ksp[0]));
568   PetscCall(PetscFree(jac->ksp));
569   PetscCall(PetscFree(bjac));
570   PetscCall(PCDestroy_BJacobi(pc));
571   PetscFunctionReturn(PETSC_SUCCESS);
572 }
573 
574 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
575 {
576   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
577   KSP                subksp = jac->ksp[0];
578   KSPConvergedReason reason;
579 
580   PetscFunctionBegin;
581   PetscCall(KSPSetUp(subksp));
582   PetscCall(KSPGetConvergedReason(subksp, &reason));
583   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
584   PetscFunctionReturn(PETSC_SUCCESS);
585 }
586 
587 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
588 {
589   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
590   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
591 
592   PetscFunctionBegin;
593   PetscCall(VecGetLocalVectorRead(x, bjac->x));
594   PetscCall(VecGetLocalVector(y, bjac->y));
595   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
596      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
597      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
598   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
599   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
600   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
601   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
602   PetscCall(VecRestoreLocalVector(y, bjac->y));
603   PetscFunctionReturn(PETSC_SUCCESS);
604 }
605 
606 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
607 {
608   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
609   Mat         sX, sY;
610 
611   PetscFunctionBegin;
612   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
613      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
614      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
615   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
616   PetscCall(MatDenseGetLocalMatrix(X, &sX));
617   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
618   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
619   PetscFunctionReturn(PETSC_SUCCESS);
620 }
621 
622 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
623 {
624   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
625   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
626   PetscScalar            *y_array;
627   const PetscScalar      *x_array;
628   PC                      subpc;
629 
630   PetscFunctionBegin;
631   /*
632       The VecPlaceArray() is to avoid having to copy the
633     y vector into the bjac->x vector. The reason for
634     the bjac->x vector is that we need a sequential vector
635     for the sequential solve.
636   */
637   PetscCall(VecGetArrayRead(x, &x_array));
638   PetscCall(VecGetArray(y, &y_array));
639   PetscCall(VecPlaceArray(bjac->x, x_array));
640   PetscCall(VecPlaceArray(bjac->y, y_array));
641   /* apply the symmetric left portion of the inner PC operator */
642   /* note this by-passes the inner KSP and its options completely */
643   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
644   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
645   PetscCall(VecResetArray(bjac->x));
646   PetscCall(VecResetArray(bjac->y));
647   PetscCall(VecRestoreArrayRead(x, &x_array));
648   PetscCall(VecRestoreArray(y, &y_array));
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
653 {
654   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
655   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
656   PetscScalar            *y_array;
657   const PetscScalar      *x_array;
658   PC                      subpc;
659 
660   PetscFunctionBegin;
661   /*
662       The VecPlaceArray() is to avoid having to copy the
663     y vector into the bjac->x vector. The reason for
664     the bjac->x vector is that we need a sequential vector
665     for the sequential solve.
666   */
667   PetscCall(VecGetArrayRead(x, &x_array));
668   PetscCall(VecGetArray(y, &y_array));
669   PetscCall(VecPlaceArray(bjac->x, x_array));
670   PetscCall(VecPlaceArray(bjac->y, y_array));
671 
672   /* apply the symmetric right portion of the inner PC operator */
673   /* note this by-passes the inner KSP and its options completely */
674 
675   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
676   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
677 
678   PetscCall(VecResetArray(bjac->x));
679   PetscCall(VecResetArray(bjac->y));
680   PetscCall(VecRestoreArrayRead(x, &x_array));
681   PetscCall(VecRestoreArray(y, &y_array));
682   PetscFunctionReturn(PETSC_SUCCESS);
683 }
684 
685 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
686 {
687   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
688   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
689   PetscScalar            *y_array;
690   const PetscScalar      *x_array;
691 
692   PetscFunctionBegin;
693   /*
694       The VecPlaceArray() is to avoid having to copy the
695     y vector into the bjac->x vector. The reason for
696     the bjac->x vector is that we need a sequential vector
697     for the sequential solve.
698   */
699   PetscCall(VecGetArrayRead(x, &x_array));
700   PetscCall(VecGetArray(y, &y_array));
701   PetscCall(VecPlaceArray(bjac->x, x_array));
702   PetscCall(VecPlaceArray(bjac->y, y_array));
703   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
704   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
705   PetscCall(VecResetArray(bjac->x));
706   PetscCall(VecResetArray(bjac->y));
707   PetscCall(VecRestoreArrayRead(x, &x_array));
708   PetscCall(VecRestoreArray(y, &y_array));
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
713 {
714   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
715   PetscInt                m;
716   KSP                     ksp;
717   PC_BJacobi_Singleblock *bjac;
718   PetscBool               wasSetup = PETSC_TRUE;
719   VecType                 vectype;
720   const char             *prefix;
721 
722   PetscFunctionBegin;
723   if (!pc->setupcalled) {
724     if (!jac->ksp) {
725       PetscInt nestlevel;
726 
727       wasSetup = PETSC_FALSE;
728 
729       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
730       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
731       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
732       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
733       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
734       PetscCall(KSPSetType(ksp, KSPPREONLY));
735       PetscCall(PCGetOptionsPrefix(pc, &prefix));
736       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
737       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
738 
739       pc->ops->reset               = PCReset_BJacobi_Singleblock;
740       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
741       pc->ops->apply               = PCApply_BJacobi_Singleblock;
742       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
743       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
744       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
745       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
746       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
747 
748       PetscCall(PetscMalloc1(1, &jac->ksp));
749       jac->ksp[0] = ksp;
750 
751       PetscCall(PetscNew(&bjac));
752       jac->data = (void *)bjac;
753     } else {
754       ksp  = jac->ksp[0];
755       bjac = (PC_BJacobi_Singleblock *)jac->data;
756     }
757 
758     /*
759       The reason we need to generate these vectors is to serve
760       as the right-hand side and solution vector for the solve on the
761       block. We do not need to allocate space for the vectors since
762       that is provided via VecPlaceArray() just before the call to
763       KSPSolve() on the block.
764     */
765     PetscCall(MatGetSize(pmat, &m, &m));
766     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
767     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
768     PetscCall(MatGetVecType(pmat, &vectype));
769     PetscCall(VecSetType(bjac->x, vectype));
770     PetscCall(VecSetType(bjac->y, vectype));
771   } else {
772     ksp  = jac->ksp[0];
773     bjac = (PC_BJacobi_Singleblock *)jac->data;
774   }
775   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
776   if (pc->useAmat) {
777     PetscCall(KSPSetOperators(ksp, mat, pmat));
778     PetscCall(MatSetOptionsPrefix(mat, prefix));
779   } else {
780     PetscCall(KSPSetOperators(ksp, pmat, pmat));
781   }
782   PetscCall(MatSetOptionsPrefix(pmat, prefix));
783   if (!wasSetup && pc->setfromoptionscalled) {
784     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
785     PetscCall(KSPSetFromOptions(ksp));
786   }
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
791 {
792   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
793   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
794   PetscInt               i;
795 
796   PetscFunctionBegin;
797   if (bjac && bjac->pmat) {
798     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
799     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
800   }
801 
802   for (i = 0; i < jac->n_local; i++) {
803     PetscCall(KSPReset(jac->ksp[i]));
804     if (bjac && bjac->x) {
805       PetscCall(VecDestroy(&bjac->x[i]));
806       PetscCall(VecDestroy(&bjac->y[i]));
807       PetscCall(ISDestroy(&bjac->is[i]));
808     }
809   }
810   PetscCall(PetscFree(jac->l_lens));
811   PetscCall(PetscFree(jac->g_lens));
812   PetscFunctionReturn(PETSC_SUCCESS);
813 }
814 
815 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
816 {
817   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
818   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
819   PetscInt               i;
820 
821   PetscFunctionBegin;
822   PetscCall(PCReset_BJacobi_Multiblock(pc));
823   if (bjac) {
824     PetscCall(PetscFree2(bjac->x, bjac->y));
825     PetscCall(PetscFree(bjac->starts));
826     PetscCall(PetscFree(bjac->is));
827   }
828   PetscCall(PetscFree(jac->data));
829   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
830   PetscCall(PetscFree(jac->ksp));
831   PetscCall(PCDestroy_BJacobi(pc));
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
836 {
837   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
838   PetscInt           i, n_local = jac->n_local;
839   KSPConvergedReason reason;
840 
841   PetscFunctionBegin;
842   for (i = 0; i < n_local; i++) {
843     PetscCall(KSPSetUp(jac->ksp[i]));
844     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
845     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
846   }
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
851 {
852   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
853   PetscInt               i, n_local = jac->n_local;
854   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
855   PetscScalar           *yin;
856   const PetscScalar     *xin;
857 
858   PetscFunctionBegin;
859   PetscCall(VecGetArrayRead(x, &xin));
860   PetscCall(VecGetArray(y, &yin));
861   for (i = 0; i < n_local; i++) {
862     /*
863        To avoid copying the subvector from x into a workspace we instead
864        make the workspace vector array point to the subpart of the array of
865        the global vector.
866     */
867     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
868     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
869 
870     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
871     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
872     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
873     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
874 
875     PetscCall(VecResetArray(bjac->x[i]));
876     PetscCall(VecResetArray(bjac->y[i]));
877   }
878   PetscCall(VecRestoreArrayRead(x, &xin));
879   PetscCall(VecRestoreArray(y, &yin));
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
884 {
885   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
886   PetscInt               i, n_local = jac->n_local;
887   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
888   PetscScalar           *yin;
889   const PetscScalar     *xin;
890   PC                     subpc;
891 
892   PetscFunctionBegin;
893   PetscCall(VecGetArrayRead(x, &xin));
894   PetscCall(VecGetArray(y, &yin));
895   for (i = 0; i < n_local; i++) {
896     /*
897        To avoid copying the subvector from x into a workspace we instead
898        make the workspace vector array point to the subpart of the array of
899        the global vector.
900     */
901     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
902     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
903 
904     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
905     /* apply the symmetric left portion of the inner PC operator */
906     /* note this by-passes the inner KSP and its options completely */
907     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
908     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
909     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
910 
911     PetscCall(VecResetArray(bjac->x[i]));
912     PetscCall(VecResetArray(bjac->y[i]));
913   }
914   PetscCall(VecRestoreArrayRead(x, &xin));
915   PetscCall(VecRestoreArray(y, &yin));
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
920 {
921   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
922   PetscInt               i, n_local = jac->n_local;
923   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
924   PetscScalar           *yin;
925   const PetscScalar     *xin;
926   PC                     subpc;
927 
928   PetscFunctionBegin;
929   PetscCall(VecGetArrayRead(x, &xin));
930   PetscCall(VecGetArray(y, &yin));
931   for (i = 0; i < n_local; i++) {
932     /*
933        To avoid copying the subvector from x into a workspace we instead
934        make the workspace vector array point to the subpart of the array of
935        the global vector.
936     */
937     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
938     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
939 
940     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
941     /* apply the symmetric left portion of the inner PC operator */
942     /* note this by-passes the inner KSP and its options completely */
943     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
944     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
945     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
946 
947     PetscCall(VecResetArray(bjac->x[i]));
948     PetscCall(VecResetArray(bjac->y[i]));
949   }
950   PetscCall(VecRestoreArrayRead(x, &xin));
951   PetscCall(VecRestoreArray(y, &yin));
952   PetscFunctionReturn(PETSC_SUCCESS);
953 }
954 
955 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
956 {
957   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
958   PetscInt               i, n_local = jac->n_local;
959   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
960   PetscScalar           *yin;
961   const PetscScalar     *xin;
962 
963   PetscFunctionBegin;
964   PetscCall(VecGetArrayRead(x, &xin));
965   PetscCall(VecGetArray(y, &yin));
966   for (i = 0; i < n_local; i++) {
967     /*
968        To avoid copying the subvector from x into a workspace we instead
969        make the workspace vector array point to the subpart of the array of
970        the global vector.
971     */
972     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
973     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
974 
975     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
976     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
977     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
978     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
979 
980     PetscCall(VecResetArray(bjac->x[i]));
981     PetscCall(VecResetArray(bjac->y[i]));
982   }
983   PetscCall(VecRestoreArrayRead(x, &xin));
984   PetscCall(VecRestoreArray(y, &yin));
985   PetscFunctionReturn(PETSC_SUCCESS);
986 }
987 
988 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
989 {
990   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
991   PetscInt               m, n_local, N, M, start, i;
992   const char            *prefix;
993   KSP                    ksp;
994   Vec                    x, y;
995   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
996   PC                     subpc;
997   IS                     is;
998   MatReuse               scall;
999   VecType                vectype;
1000   MatNullSpace          *nullsp_mat = NULL, *nullsp_pmat = NULL;
1001 
1002   PetscFunctionBegin;
1003   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
1004 
1005   n_local = jac->n_local;
1006 
1007   if (pc->useAmat) {
1008     PetscBool same;
1009     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1010     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1011   }
1012 
1013   if (!pc->setupcalled) {
1014     PetscInt nestlevel;
1015 
1016     scall = MAT_INITIAL_MATRIX;
1017 
1018     if (!jac->ksp) {
1019       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1020       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1021       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1022       pc->ops->matapply            = NULL;
1023       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1024       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1025       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1026       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1027 
1028       PetscCall(PetscNew(&bjac));
1029       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1030       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1031       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1032 
1033       jac->data = (void *)bjac;
1034       PetscCall(PetscMalloc1(n_local, &bjac->is));
1035 
1036       for (i = 0; i < n_local; i++) {
1037         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1038         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1039         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1040         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1041         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1042         PetscCall(KSPSetType(ksp, KSPPREONLY));
1043         PetscCall(KSPGetPC(ksp, &subpc));
1044         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1045         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1046         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1047 
1048         jac->ksp[i] = ksp;
1049       }
1050     } else {
1051       bjac = (PC_BJacobi_Multiblock *)jac->data;
1052     }
1053 
1054     start = 0;
1055     PetscCall(MatGetVecType(pmat, &vectype));
1056     for (i = 0; i < n_local; i++) {
1057       m = jac->l_lens[i];
1058       /*
1059       The reason we need to generate these vectors is to serve
1060       as the right-hand side and solution vector for the solve on the
1061       block. We do not need to allocate space for the vectors since
1062       that is provided via VecPlaceArray() just before the call to
1063       KSPSolve() on the block.
1064 
1065       */
1066       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1067       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1068       PetscCall(VecSetType(x, vectype));
1069       PetscCall(VecSetType(y, vectype));
1070 
1071       bjac->x[i]      = x;
1072       bjac->y[i]      = y;
1073       bjac->starts[i] = start;
1074 
1075       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1076       bjac->is[i] = is;
1077 
1078       start += m;
1079     }
1080   } else {
1081     bjac = (PC_BJacobi_Multiblock *)jac->data;
1082     /*
1083        Destroy the blocks from the previous iteration
1084     */
1085     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1086       PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1087       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1088       if (pc->useAmat) {
1089         PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat));
1090         PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1091       }
1092       scall = MAT_INITIAL_MATRIX;
1093     } else scall = MAT_REUSE_MATRIX;
1094   }
1095 
1096   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1097   if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1098   if (pc->useAmat) {
1099     PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1100     if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat));
1101   }
1102   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1103      different boundary conditions for the submatrices than for the global problem) */
1104   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1105 
1106   for (i = 0; i < n_local; i++) {
1107     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1108     if (pc->useAmat) {
1109       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1110       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1111     } else {
1112       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1113     }
1114     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1115     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1116   }
1117   PetscFunctionReturn(PETSC_SUCCESS);
1118 }
1119 
1120 /*
1121       These are for a single block with multiple processes
1122 */
1123 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1124 {
1125   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1126   KSP                subksp = jac->ksp[0];
1127   KSPConvergedReason reason;
1128 
1129   PetscFunctionBegin;
1130   PetscCall(KSPSetUp(subksp));
1131   PetscCall(KSPGetConvergedReason(subksp, &reason));
1132   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 
1136 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1137 {
1138   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1139   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1140 
1141   PetscFunctionBegin;
1142   PetscCall(VecDestroy(&mpjac->ysub));
1143   PetscCall(VecDestroy(&mpjac->xsub));
1144   PetscCall(MatDestroy(&mpjac->submats));
1145   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1146   PetscFunctionReturn(PETSC_SUCCESS);
1147 }
1148 
1149 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1150 {
1151   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1152   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(PCReset_BJacobi_Multiproc(pc));
1156   PetscCall(KSPDestroy(&jac->ksp[0]));
1157   PetscCall(PetscFree(jac->ksp));
1158   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1159 
1160   PetscCall(PetscFree(mpjac));
1161   PetscCall(PCDestroy_BJacobi(pc));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1166 {
1167   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1168   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1169   PetscScalar          *yarray;
1170   const PetscScalar    *xarray;
1171   KSPConvergedReason    reason;
1172 
1173   PetscFunctionBegin;
1174   /* place x's and y's local arrays into xsub and ysub */
1175   PetscCall(VecGetArrayRead(x, &xarray));
1176   PetscCall(VecGetArray(y, &yarray));
1177   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1178   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1179 
1180   /* apply preconditioner on each matrix block */
1181   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1182   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1183   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1184   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1185   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1186   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1187 
1188   PetscCall(VecResetArray(mpjac->xsub));
1189   PetscCall(VecResetArray(mpjac->ysub));
1190   PetscCall(VecRestoreArrayRead(x, &xarray));
1191   PetscCall(VecRestoreArray(y, &yarray));
1192   PetscFunctionReturn(PETSC_SUCCESS);
1193 }
1194 
1195 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1196 {
1197   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1198   KSPConvergedReason reason;
1199   Mat                sX, sY;
1200   const PetscScalar *x;
1201   PetscScalar       *y;
1202   PetscInt           m, N, lda, ldb;
1203 
1204   PetscFunctionBegin;
1205   /* apply preconditioner on each matrix block */
1206   PetscCall(MatGetLocalSize(X, &m, NULL));
1207   PetscCall(MatGetSize(X, NULL, &N));
1208   PetscCall(MatDenseGetLDA(X, &lda));
1209   PetscCall(MatDenseGetLDA(Y, &ldb));
1210   PetscCall(MatDenseGetArrayRead(X, &x));
1211   PetscCall(MatDenseGetArrayWrite(Y, &y));
1212   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1213   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1214   PetscCall(MatDenseSetLDA(sX, lda));
1215   PetscCall(MatDenseSetLDA(sY, ldb));
1216   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1217   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1218   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1219   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1220   PetscCall(MatDestroy(&sY));
1221   PetscCall(MatDestroy(&sX));
1222   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1223   PetscCall(MatDenseRestoreArrayRead(X, &x));
1224   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1225   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1226   PetscFunctionReturn(PETSC_SUCCESS);
1227 }
1228 
1229 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1230 {
1231   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1232   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1233   PetscInt              m, n;
1234   MPI_Comm              comm, subcomm = 0;
1235   const char           *prefix;
1236   PetscBool             wasSetup = PETSC_TRUE;
1237   VecType               vectype;
1238 
1239   PetscFunctionBegin;
1240   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1241   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1242   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1243   if (!pc->setupcalled) {
1244     PetscInt nestlevel;
1245 
1246     wasSetup = PETSC_FALSE;
1247     PetscCall(PetscNew(&mpjac));
1248     jac->data = (void *)mpjac;
1249 
1250     /* initialize datastructure mpjac */
1251     if (!jac->psubcomm) {
1252       /* Create default contiguous subcommunicatiors if user does not provide them */
1253       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1254       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1255       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1256     }
1257     mpjac->psubcomm = jac->psubcomm;
1258     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1259 
1260     /* Get matrix blocks of pmat */
1261     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1262 
1263     /* create a new PC that processors in each subcomm have copy of */
1264     PetscCall(PetscMalloc1(1, &jac->ksp));
1265     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1266     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1267     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1268     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1269     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1270     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1271     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1272 
1273     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1274     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1275     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1276     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1277     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1278 
1279     /* create dummy vectors xsub and ysub */
1280     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1281     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1282     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1283     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1284     PetscCall(VecSetType(mpjac->xsub, vectype));
1285     PetscCall(VecSetType(mpjac->ysub, vectype));
1286 
1287     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1288     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1289     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1290     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1291     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1292   } else { /* pc->setupcalled */
1293     subcomm = PetscSubcommChild(mpjac->psubcomm);
1294     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1295       /* destroy old matrix blocks, then get new matrix blocks */
1296       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1297       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1298     } else {
1299       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1300     }
1301     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1302   }
1303 
1304   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1305   PetscFunctionReturn(PETSC_SUCCESS);
1306 }
1307