xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 /*
2    Defines a block Jacobi preconditioner.
3 */
4 
5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
6 
7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
10 
11 static PetscErrorCode PCSetUp_BJacobi(PC pc)
12 {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(PETSC_SUCCESS);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCall(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determining first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
148   PetscCall(PetscFree(pc->data));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject)
153 {
154   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
155   PetscInt    blocks, i;
156   PetscBool   flg;
157 
158   PetscFunctionBegin;
159   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
160   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
162   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
163   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
164   if (jac->ksp) {
165     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
166      * unless we had already been called. */
167     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
168   }
169   PetscOptionsHeadEnd();
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 #include <petscdraw.h>
174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
175 {
176   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
177   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
178   PetscMPIInt           rank;
179   PetscInt              i;
180   PetscBool             iascii, isstring, isdraw;
181   PetscViewer           sviewer;
182   PetscViewerFormat     format;
183   const char           *prefix;
184 
185   PetscFunctionBegin;
186   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
189   if (iascii) {
190     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
191     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
193     PetscCall(PetscViewerGetFormat(viewer, &format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       PetscCall(PCGetOptionsPrefix(pc, &prefix));
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
198       if (jac->ksp && !jac->psubcomm) {
199         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
200         if (rank == 0) {
201           PetscCall(PetscViewerASCIIPushTab(viewer));
202           PetscCall(KSPView(jac->ksp[0], sviewer));
203           PetscCall(PetscViewerASCIIPopTab(viewer));
204         }
205         PetscCall(PetscViewerFlush(sviewer));
206         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
207         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
208         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
209       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
210         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
211         if (!mpjac->psubcomm->color) {
212           PetscCall(PetscViewerASCIIPushTab(viewer));
213           PetscCall(KSPView(*(jac->ksp), sviewer));
214           PetscCall(PetscViewerASCIIPopTab(viewer));
215         }
216         PetscCall(PetscViewerFlush(sviewer));
217         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
218         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
219         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
220       } else {
221         PetscCall(PetscViewerFlush(viewer));
222       }
223     } else {
224       PetscInt n_global;
225       PetscCall(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
226       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
227       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
228       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
229       PetscCall(PetscViewerASCIIPushTab(viewer));
230       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
231       for (i = 0; i < jac->n_local; i++) {
232         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
233         PetscCall(KSPView(jac->ksp[i], sviewer));
234         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "- - - - - - - - - - - - - - - - - -\n"));
235       }
236       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
237       PetscCall(PetscViewerASCIIPopTab(viewer));
238       PetscCall(PetscViewerFlush(viewer));
239       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
240     }
241   } else if (isstring) {
242     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
243     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
244     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
245     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
246   } else if (isdraw) {
247     PetscDraw draw;
248     char      str[25];
249     PetscReal x, y, bottom, h;
250 
251     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
252     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
253     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
254     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
255     bottom = y - h;
256     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
257     /* warning the communicator on viewer is different then on ksp in parallel */
258     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
259     PetscCall(PetscDrawPopCurrentPoint(draw));
260   }
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
265 {
266   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
267 
268   PetscFunctionBegin;
269   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
270 
271   if (n_local) *n_local = jac->n_local;
272   if (first_local) *first_local = jac->first_local;
273   if (ksp) *ksp = jac->ksp;
274   PetscFunctionReturn(PETSC_SUCCESS);
275 }
276 
277 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens)
278 {
279   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
280 
281   PetscFunctionBegin;
282   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
283   jac->n = blocks;
284   if (!lens) jac->g_lens = NULL;
285   else {
286     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
287     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
288   }
289   PetscFunctionReturn(PETSC_SUCCESS);
290 }
291 
292 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
293 {
294   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
295 
296   PetscFunctionBegin;
297   *blocks = jac->n;
298   if (lens) *lens = jac->g_lens;
299   PetscFunctionReturn(PETSC_SUCCESS);
300 }
301 
302 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
303 {
304   PC_BJacobi *jac;
305 
306   PetscFunctionBegin;
307   jac = (PC_BJacobi *)pc->data;
308 
309   jac->n_local = blocks;
310   if (!lens) jac->l_lens = NULL;
311   else {
312     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
313     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
314   }
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 
318 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
319 {
320   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
321 
322   PetscFunctionBegin;
323   *blocks = jac->n_local;
324   if (lens) *lens = jac->l_lens;
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 /*@C
329   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
330   this processor.
331 
332   Not Collective
333 
334   Input Parameter:
335 . pc - the preconditioner context
336 
337   Output Parameters:
338 + n_local     - the number of blocks on this processor, or NULL
339 . first_local - the global number of the first block on this processor, or NULL
340 - ksp         - the array of KSP contexts
341 
342   Notes:
343   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
344 
345   Currently for some matrix implementations only 1 block per processor
346   is supported.
347 
348   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
349 
350   Fortran Notes:
351   You must pass in a `KSP` array that is large enough to contain all the local `KSP`s.
352 
353   You can call `PCBJacobiGetSubKSP`(pc,nlocal,firstlocal,`PETSC_NULL_KSP`,ierr) to determine how large the
354   `KSP` array must be.
355 
356   Level: advanced
357 
358 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
359 @*/
360 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
361 {
362   PetscFunctionBegin;
363   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
364   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 /*@
369   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
370   Jacobi preconditioner.
371 
372   Collective
373 
374   Input Parameters:
375 + pc     - the preconditioner context
376 . blocks - the number of blocks
377 - lens   - [optional] integer array containing the size of each block
378 
379   Options Database Key:
380 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
381 
382   Note:
383   Currently only a limited number of blocking configurations are supported.
384   All processors sharing the `PC` must call this routine with the same data.
385 
386   Level: intermediate
387 
388 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
389 @*/
390 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
391 {
392   PetscFunctionBegin;
393   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
394   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
395   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
396   PetscFunctionReturn(PETSC_SUCCESS);
397 }
398 
399 /*@C
400   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
401   Jacobi, `PCBJACOBI`, preconditioner.
402 
403   Not Collective
404 
405   Input Parameter:
406 . pc - the preconditioner context
407 
408   Output Parameters:
409 + blocks - the number of blocks
410 - lens   - integer array containing the size of each block
411 
412   Level: intermediate
413 
414 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
415 @*/
416 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
417 {
418   PetscFunctionBegin;
419   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
420   PetscAssertPointer(blocks, 2);
421   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 /*@
426   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
427   Jacobi, `PCBJACOBI`,  preconditioner.
428 
429   Not Collective
430 
431   Input Parameters:
432 + pc     - the preconditioner context
433 . blocks - the number of blocks
434 - lens   - [optional] integer array containing size of each block
435 
436   Options Database Key:
437 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
438 
439   Note:
440   Currently only a limited number of blocking configurations are supported.
441 
442   Level: intermediate
443 
444 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
445 @*/
446 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
447 {
448   PetscFunctionBegin;
449   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
450   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
451   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
452   PetscFunctionReturn(PETSC_SUCCESS);
453 }
454 
455 /*@C
456   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
457   Jacobi, `PCBJACOBI`, preconditioner.
458 
459   Not Collective
460 
461   Input Parameters:
462 + pc     - the preconditioner context
463 . blocks - the number of blocks
464 - lens   - [optional] integer array containing size of each block
465 
466   Note:
467   Currently only a limited number of blocking configurations are supported.
468 
469   Level: intermediate
470 
471 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
472 @*/
473 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
474 {
475   PetscFunctionBegin;
476   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
477   PetscAssertPointer(blocks, 2);
478   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 /*MC
483    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
484            its own `KSP` object.
485 
486    Options Database Keys:
487 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
488 -  -pc_bjacobi_blocks <n> - use n total blocks
489 
490    Notes:
491     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
492 
493     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
494 
495      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
496         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
497 
498      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
499          and set the options directly on the resulting `KSP` object (you can access its `PC`
500          `KSPGetPC()`)
501 
502      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
503          performance.  Different block partitioning may lead to additional data transfers
504          between host and GPU that lead to degraded performance.
505 
506      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
507 
508    Level: beginner
509 
510 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
511           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
512           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
513 M*/
514 
515 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
516 {
517   PetscMPIInt rank;
518   PC_BJacobi *jac;
519 
520   PetscFunctionBegin;
521   PetscCall(PetscNew(&jac));
522   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
523 
524   pc->ops->apply           = NULL;
525   pc->ops->matapply        = NULL;
526   pc->ops->applytranspose  = NULL;
527   pc->ops->setup           = PCSetUp_BJacobi;
528   pc->ops->destroy         = PCDestroy_BJacobi;
529   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
530   pc->ops->view            = PCView_BJacobi;
531   pc->ops->applyrichardson = NULL;
532 
533   pc->data         = (void *)jac;
534   jac->n           = -1;
535   jac->n_local     = -1;
536   jac->first_local = rank;
537   jac->ksp         = NULL;
538   jac->g_lens      = NULL;
539   jac->l_lens      = NULL;
540   jac->psubcomm    = NULL;
541 
542   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
543   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
544   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
545   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
546   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 /*
551         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
552 */
553 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
554 {
555   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
556   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
557 
558   PetscFunctionBegin;
559   PetscCall(KSPReset(jac->ksp[0]));
560   PetscCall(VecDestroy(&bjac->x));
561   PetscCall(VecDestroy(&bjac->y));
562   PetscFunctionReturn(PETSC_SUCCESS);
563 }
564 
565 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
566 {
567   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
568   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
569 
570   PetscFunctionBegin;
571   PetscCall(PCReset_BJacobi_Singleblock(pc));
572   PetscCall(KSPDestroy(&jac->ksp[0]));
573   PetscCall(PetscFree(jac->ksp));
574   PetscCall(PetscFree(bjac));
575   PetscCall(PCDestroy_BJacobi(pc));
576   PetscFunctionReturn(PETSC_SUCCESS);
577 }
578 
579 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
580 {
581   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
582   KSP                subksp = jac->ksp[0];
583   KSPConvergedReason reason;
584 
585   PetscFunctionBegin;
586   PetscCall(KSPSetUp(subksp));
587   PetscCall(KSPGetConvergedReason(subksp, &reason));
588   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
593 {
594   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
595   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
596 
597   PetscFunctionBegin;
598   PetscCall(VecGetLocalVectorRead(x, bjac->x));
599   PetscCall(VecGetLocalVector(y, bjac->y));
600   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
601      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
602      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
603   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
604   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
605   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
606   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
607   PetscCall(VecRestoreLocalVector(y, bjac->y));
608   PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 
611 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
612 {
613   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
614   Mat         sX, sY;
615 
616   PetscFunctionBegin;
617   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
618      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
619      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
620   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
621   PetscCall(MatDenseGetLocalMatrix(X, &sX));
622   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
623   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
628 {
629   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
630   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
631   PetscScalar            *y_array;
632   const PetscScalar      *x_array;
633   PC                      subpc;
634 
635   PetscFunctionBegin;
636   /*
637       The VecPlaceArray() is to avoid having to copy the
638     y vector into the bjac->x vector. The reason for
639     the bjac->x vector is that we need a sequential vector
640     for the sequential solve.
641   */
642   PetscCall(VecGetArrayRead(x, &x_array));
643   PetscCall(VecGetArray(y, &y_array));
644   PetscCall(VecPlaceArray(bjac->x, x_array));
645   PetscCall(VecPlaceArray(bjac->y, y_array));
646   /* apply the symmetric left portion of the inner PC operator */
647   /* note this by-passes the inner KSP and its options completely */
648   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
649   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
650   PetscCall(VecResetArray(bjac->x));
651   PetscCall(VecResetArray(bjac->y));
652   PetscCall(VecRestoreArrayRead(x, &x_array));
653   PetscCall(VecRestoreArray(y, &y_array));
654   PetscFunctionReturn(PETSC_SUCCESS);
655 }
656 
657 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
658 {
659   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
660   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
661   PetscScalar            *y_array;
662   const PetscScalar      *x_array;
663   PC                      subpc;
664 
665   PetscFunctionBegin;
666   /*
667       The VecPlaceArray() is to avoid having to copy the
668     y vector into the bjac->x vector. The reason for
669     the bjac->x vector is that we need a sequential vector
670     for the sequential solve.
671   */
672   PetscCall(VecGetArrayRead(x, &x_array));
673   PetscCall(VecGetArray(y, &y_array));
674   PetscCall(VecPlaceArray(bjac->x, x_array));
675   PetscCall(VecPlaceArray(bjac->y, y_array));
676 
677   /* apply the symmetric right portion of the inner PC operator */
678   /* note this by-passes the inner KSP and its options completely */
679 
680   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
681   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
682 
683   PetscCall(VecResetArray(bjac->x));
684   PetscCall(VecResetArray(bjac->y));
685   PetscCall(VecRestoreArrayRead(x, &x_array));
686   PetscCall(VecRestoreArray(y, &y_array));
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
691 {
692   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
693   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
694   PetscScalar            *y_array;
695   const PetscScalar      *x_array;
696 
697   PetscFunctionBegin;
698   /*
699       The VecPlaceArray() is to avoid having to copy the
700     y vector into the bjac->x vector. The reason for
701     the bjac->x vector is that we need a sequential vector
702     for the sequential solve.
703   */
704   PetscCall(VecGetArrayRead(x, &x_array));
705   PetscCall(VecGetArray(y, &y_array));
706   PetscCall(VecPlaceArray(bjac->x, x_array));
707   PetscCall(VecPlaceArray(bjac->y, y_array));
708   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
709   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
710   PetscCall(VecResetArray(bjac->x));
711   PetscCall(VecResetArray(bjac->y));
712   PetscCall(VecRestoreArrayRead(x, &x_array));
713   PetscCall(VecRestoreArray(y, &y_array));
714   PetscFunctionReturn(PETSC_SUCCESS);
715 }
716 
717 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
718 {
719   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
720   PetscInt                m;
721   KSP                     ksp;
722   PC_BJacobi_Singleblock *bjac;
723   PetscBool               wasSetup = PETSC_TRUE;
724   VecType                 vectype;
725   const char             *prefix;
726 
727   PetscFunctionBegin;
728   if (!pc->setupcalled) {
729     if (!jac->ksp) {
730       PetscInt nestlevel;
731 
732       wasSetup = PETSC_FALSE;
733 
734       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
735       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
736       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
737       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
738       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
739       PetscCall(KSPSetType(ksp, KSPPREONLY));
740       PetscCall(PCGetOptionsPrefix(pc, &prefix));
741       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
742       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
743 
744       pc->ops->reset               = PCReset_BJacobi_Singleblock;
745       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
746       pc->ops->apply               = PCApply_BJacobi_Singleblock;
747       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
748       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
749       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
750       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
751       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
752 
753       PetscCall(PetscMalloc1(1, &jac->ksp));
754       jac->ksp[0] = ksp;
755 
756       PetscCall(PetscNew(&bjac));
757       jac->data = (void *)bjac;
758     } else {
759       ksp  = jac->ksp[0];
760       bjac = (PC_BJacobi_Singleblock *)jac->data;
761     }
762 
763     /*
764       The reason we need to generate these vectors is to serve
765       as the right-hand side and solution vector for the solve on the
766       block. We do not need to allocate space for the vectors since
767       that is provided via VecPlaceArray() just before the call to
768       KSPSolve() on the block.
769     */
770     PetscCall(MatGetSize(pmat, &m, &m));
771     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
772     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
773     PetscCall(MatGetVecType(pmat, &vectype));
774     PetscCall(VecSetType(bjac->x, vectype));
775     PetscCall(VecSetType(bjac->y, vectype));
776   } else {
777     ksp  = jac->ksp[0];
778     bjac = (PC_BJacobi_Singleblock *)jac->data;
779   }
780   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
781   if (pc->useAmat) {
782     PetscCall(KSPSetOperators(ksp, mat, pmat));
783     PetscCall(MatSetOptionsPrefix(mat, prefix));
784   } else {
785     PetscCall(KSPSetOperators(ksp, pmat, pmat));
786   }
787   PetscCall(MatSetOptionsPrefix(pmat, prefix));
788   if (!wasSetup && pc->setfromoptionscalled) {
789     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
790     PetscCall(KSPSetFromOptions(ksp));
791   }
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
796 {
797   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
798   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
799   PetscInt               i;
800 
801   PetscFunctionBegin;
802   if (bjac && bjac->pmat) {
803     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
804     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
805   }
806 
807   for (i = 0; i < jac->n_local; i++) {
808     PetscCall(KSPReset(jac->ksp[i]));
809     if (bjac && bjac->x) {
810       PetscCall(VecDestroy(&bjac->x[i]));
811       PetscCall(VecDestroy(&bjac->y[i]));
812       PetscCall(ISDestroy(&bjac->is[i]));
813     }
814   }
815   PetscCall(PetscFree(jac->l_lens));
816   PetscCall(PetscFree(jac->g_lens));
817   PetscFunctionReturn(PETSC_SUCCESS);
818 }
819 
820 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
821 {
822   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
823   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
824   PetscInt               i;
825 
826   PetscFunctionBegin;
827   PetscCall(PCReset_BJacobi_Multiblock(pc));
828   if (bjac) {
829     PetscCall(PetscFree2(bjac->x, bjac->y));
830     PetscCall(PetscFree(bjac->starts));
831     PetscCall(PetscFree(bjac->is));
832   }
833   PetscCall(PetscFree(jac->data));
834   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
835   PetscCall(PetscFree(jac->ksp));
836   PetscCall(PCDestroy_BJacobi(pc));
837   PetscFunctionReturn(PETSC_SUCCESS);
838 }
839 
840 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
841 {
842   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
843   PetscInt           i, n_local = jac->n_local;
844   KSPConvergedReason reason;
845 
846   PetscFunctionBegin;
847   for (i = 0; i < n_local; i++) {
848     PetscCall(KSPSetUp(jac->ksp[i]));
849     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
850     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
851   }
852   PetscFunctionReturn(PETSC_SUCCESS);
853 }
854 
855 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
856 {
857   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
858   PetscInt               i, n_local = jac->n_local;
859   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
860   PetscScalar           *yin;
861   const PetscScalar     *xin;
862 
863   PetscFunctionBegin;
864   PetscCall(VecGetArrayRead(x, &xin));
865   PetscCall(VecGetArray(y, &yin));
866   for (i = 0; i < n_local; i++) {
867     /*
868        To avoid copying the subvector from x into a workspace we instead
869        make the workspace vector array point to the subpart of the array of
870        the global vector.
871     */
872     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
873     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
874 
875     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
876     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
877     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
878     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
879 
880     PetscCall(VecResetArray(bjac->x[i]));
881     PetscCall(VecResetArray(bjac->y[i]));
882   }
883   PetscCall(VecRestoreArrayRead(x, &xin));
884   PetscCall(VecRestoreArray(y, &yin));
885   PetscFunctionReturn(PETSC_SUCCESS);
886 }
887 
888 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
889 {
890   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
891   PetscInt               i, n_local = jac->n_local;
892   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
893   PetscScalar           *yin;
894   const PetscScalar     *xin;
895   PC                     subpc;
896 
897   PetscFunctionBegin;
898   PetscCall(VecGetArrayRead(x, &xin));
899   PetscCall(VecGetArray(y, &yin));
900   for (i = 0; i < n_local; i++) {
901     /*
902        To avoid copying the subvector from x into a workspace we instead
903        make the workspace vector array point to the subpart of the array of
904        the global vector.
905     */
906     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
907     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
908 
909     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
910     /* apply the symmetric left portion of the inner PC operator */
911     /* note this by-passes the inner KSP and its options completely */
912     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
913     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
914     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
915 
916     PetscCall(VecResetArray(bjac->x[i]));
917     PetscCall(VecResetArray(bjac->y[i]));
918   }
919   PetscCall(VecRestoreArrayRead(x, &xin));
920   PetscCall(VecRestoreArray(y, &yin));
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
925 {
926   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
927   PetscInt               i, n_local = jac->n_local;
928   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
929   PetscScalar           *yin;
930   const PetscScalar     *xin;
931   PC                     subpc;
932 
933   PetscFunctionBegin;
934   PetscCall(VecGetArrayRead(x, &xin));
935   PetscCall(VecGetArray(y, &yin));
936   for (i = 0; i < n_local; i++) {
937     /*
938        To avoid copying the subvector from x into a workspace we instead
939        make the workspace vector array point to the subpart of the array of
940        the global vector.
941     */
942     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
943     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
944 
945     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
946     /* apply the symmetric left portion of the inner PC operator */
947     /* note this by-passes the inner KSP and its options completely */
948     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
949     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
950     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
951 
952     PetscCall(VecResetArray(bjac->x[i]));
953     PetscCall(VecResetArray(bjac->y[i]));
954   }
955   PetscCall(VecRestoreArrayRead(x, &xin));
956   PetscCall(VecRestoreArray(y, &yin));
957   PetscFunctionReturn(PETSC_SUCCESS);
958 }
959 
960 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
961 {
962   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
963   PetscInt               i, n_local = jac->n_local;
964   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
965   PetscScalar           *yin;
966   const PetscScalar     *xin;
967 
968   PetscFunctionBegin;
969   PetscCall(VecGetArrayRead(x, &xin));
970   PetscCall(VecGetArray(y, &yin));
971   for (i = 0; i < n_local; i++) {
972     /*
973        To avoid copying the subvector from x into a workspace we instead
974        make the workspace vector array point to the subpart of the array of
975        the global vector.
976     */
977     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
978     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
979 
980     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
981     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
982     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
983     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
984 
985     PetscCall(VecResetArray(bjac->x[i]));
986     PetscCall(VecResetArray(bjac->y[i]));
987   }
988   PetscCall(VecRestoreArrayRead(x, &xin));
989   PetscCall(VecRestoreArray(y, &yin));
990   PetscFunctionReturn(PETSC_SUCCESS);
991 }
992 
993 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
994 {
995   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
996   PetscInt               m, n_local, N, M, start, i;
997   const char            *prefix;
998   KSP                    ksp;
999   Vec                    x, y;
1000   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
1001   PC                     subpc;
1002   IS                     is;
1003   MatReuse               scall;
1004   VecType                vectype;
1005   MatNullSpace          *nullsp_mat = NULL, *nullsp_pmat = NULL;
1006 
1007   PetscFunctionBegin;
1008   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
1009 
1010   n_local = jac->n_local;
1011 
1012   if (pc->useAmat) {
1013     PetscBool same;
1014     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1015     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1016   }
1017 
1018   if (!pc->setupcalled) {
1019     PetscInt nestlevel;
1020 
1021     scall = MAT_INITIAL_MATRIX;
1022 
1023     if (!jac->ksp) {
1024       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1025       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1026       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1027       pc->ops->matapply            = NULL;
1028       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1029       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1030       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1031       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1032 
1033       PetscCall(PetscNew(&bjac));
1034       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1035       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1036       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1037 
1038       jac->data = (void *)bjac;
1039       PetscCall(PetscMalloc1(n_local, &bjac->is));
1040 
1041       for (i = 0; i < n_local; i++) {
1042         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1043         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1044         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1045         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1046         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1047         PetscCall(KSPSetType(ksp, KSPPREONLY));
1048         PetscCall(KSPGetPC(ksp, &subpc));
1049         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1050         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1051         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1052 
1053         jac->ksp[i] = ksp;
1054       }
1055     } else {
1056       bjac = (PC_BJacobi_Multiblock *)jac->data;
1057     }
1058 
1059     start = 0;
1060     PetscCall(MatGetVecType(pmat, &vectype));
1061     for (i = 0; i < n_local; i++) {
1062       m = jac->l_lens[i];
1063       /*
1064       The reason we need to generate these vectors is to serve
1065       as the right-hand side and solution vector for the solve on the
1066       block. We do not need to allocate space for the vectors since
1067       that is provided via VecPlaceArray() just before the call to
1068       KSPSolve() on the block.
1069 
1070       */
1071       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1072       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1073       PetscCall(VecSetType(x, vectype));
1074       PetscCall(VecSetType(y, vectype));
1075 
1076       bjac->x[i]      = x;
1077       bjac->y[i]      = y;
1078       bjac->starts[i] = start;
1079 
1080       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1081       bjac->is[i] = is;
1082 
1083       start += m;
1084     }
1085   } else {
1086     bjac = (PC_BJacobi_Multiblock *)jac->data;
1087     /*
1088        Destroy the blocks from the previous iteration
1089     */
1090     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1091       PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1092       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1093       if (pc->useAmat) {
1094         PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat));
1095         PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1096       }
1097       scall = MAT_INITIAL_MATRIX;
1098     } else scall = MAT_REUSE_MATRIX;
1099   }
1100 
1101   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1102   if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1103   if (pc->useAmat) {
1104     PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1105     if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat));
1106   }
1107   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1108      different boundary conditions for the submatrices than for the global problem) */
1109   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1110 
1111   for (i = 0; i < n_local; i++) {
1112     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1113     if (pc->useAmat) {
1114       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1115       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1116     } else {
1117       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1118     }
1119     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1120     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1121   }
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 /*
1126       These are for a single block with multiple processes
1127 */
1128 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1129 {
1130   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1131   KSP                subksp = jac->ksp[0];
1132   KSPConvergedReason reason;
1133 
1134   PetscFunctionBegin;
1135   PetscCall(KSPSetUp(subksp));
1136   PetscCall(KSPGetConvergedReason(subksp, &reason));
1137   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1138   PetscFunctionReturn(PETSC_SUCCESS);
1139 }
1140 
1141 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1142 {
1143   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1144   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1145 
1146   PetscFunctionBegin;
1147   PetscCall(VecDestroy(&mpjac->ysub));
1148   PetscCall(VecDestroy(&mpjac->xsub));
1149   PetscCall(MatDestroy(&mpjac->submats));
1150   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1151   PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153 
1154 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1155 {
1156   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1157   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(PCReset_BJacobi_Multiproc(pc));
1161   PetscCall(KSPDestroy(&jac->ksp[0]));
1162   PetscCall(PetscFree(jac->ksp));
1163   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1164 
1165   PetscCall(PetscFree(mpjac));
1166   PetscCall(PCDestroy_BJacobi(pc));
1167   PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169 
1170 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1171 {
1172   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1173   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1174   PetscScalar          *yarray;
1175   const PetscScalar    *xarray;
1176   KSPConvergedReason    reason;
1177 
1178   PetscFunctionBegin;
1179   /* place x's and y's local arrays into xsub and ysub */
1180   PetscCall(VecGetArrayRead(x, &xarray));
1181   PetscCall(VecGetArray(y, &yarray));
1182   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1183   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1184 
1185   /* apply preconditioner on each matrix block */
1186   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1187   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1188   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1189   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1190   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1191   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1192 
1193   PetscCall(VecResetArray(mpjac->xsub));
1194   PetscCall(VecResetArray(mpjac->ysub));
1195   PetscCall(VecRestoreArrayRead(x, &xarray));
1196   PetscCall(VecRestoreArray(y, &yarray));
1197   PetscFunctionReturn(PETSC_SUCCESS);
1198 }
1199 
1200 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1201 {
1202   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1203   KSPConvergedReason reason;
1204   Mat                sX, sY;
1205   const PetscScalar *x;
1206   PetscScalar       *y;
1207   PetscInt           m, N, lda, ldb;
1208 
1209   PetscFunctionBegin;
1210   /* apply preconditioner on each matrix block */
1211   PetscCall(MatGetLocalSize(X, &m, NULL));
1212   PetscCall(MatGetSize(X, NULL, &N));
1213   PetscCall(MatDenseGetLDA(X, &lda));
1214   PetscCall(MatDenseGetLDA(Y, &ldb));
1215   PetscCall(MatDenseGetArrayRead(X, &x));
1216   PetscCall(MatDenseGetArrayWrite(Y, &y));
1217   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1218   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1219   PetscCall(MatDenseSetLDA(sX, lda));
1220   PetscCall(MatDenseSetLDA(sY, ldb));
1221   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1222   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1223   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1224   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1225   PetscCall(MatDestroy(&sY));
1226   PetscCall(MatDestroy(&sX));
1227   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1228   PetscCall(MatDenseRestoreArrayRead(X, &x));
1229   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1230   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1231   PetscFunctionReturn(PETSC_SUCCESS);
1232 }
1233 
1234 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1235 {
1236   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1237   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1238   PetscInt              m, n;
1239   MPI_Comm              comm, subcomm = 0;
1240   const char           *prefix;
1241   PetscBool             wasSetup = PETSC_TRUE;
1242   VecType               vectype;
1243 
1244   PetscFunctionBegin;
1245   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1246   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1247   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1248   if (!pc->setupcalled) {
1249     PetscInt nestlevel;
1250 
1251     wasSetup = PETSC_FALSE;
1252     PetscCall(PetscNew(&mpjac));
1253     jac->data = (void *)mpjac;
1254 
1255     /* initialize datastructure mpjac */
1256     if (!jac->psubcomm) {
1257       /* Create default contiguous subcommunicatiors if user does not provide them */
1258       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1259       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1260       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1261     }
1262     mpjac->psubcomm = jac->psubcomm;
1263     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1264 
1265     /* Get matrix blocks of pmat */
1266     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1267 
1268     /* create a new PC that processors in each subcomm have copy of */
1269     PetscCall(PetscMalloc1(1, &jac->ksp));
1270     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1271     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1272     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1273     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1274     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1275     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1276     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1277 
1278     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1279     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1280     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1281     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1282     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1283 
1284     /* create dummy vectors xsub and ysub */
1285     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1286     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1287     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1288     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1289     PetscCall(VecSetType(mpjac->xsub, vectype));
1290     PetscCall(VecSetType(mpjac->ysub, vectype));
1291 
1292     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1293     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1294     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1295     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1296     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1297   } else { /* pc->setupcalled */
1298     subcomm = PetscSubcommChild(mpjac->psubcomm);
1299     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1300       /* destroy old matrix blocks, then get new matrix blocks */
1301       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1302       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1303     } else {
1304       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1305     }
1306     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1307   }
1308 
1309   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1310   PetscFunctionReturn(PETSC_SUCCESS);
1311 }
1312