xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision 2e65eb737b5b07432530db55f6b2a145ebc548b2)
1 
2 /*
3    Defines a block Jacobi preconditioner.
4 */
5 
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
7 
8 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC,Mat,Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC,Mat,Mat);
10 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
11 
12 static PetscErrorCode PCSetUp_BJacobi(PC pc)
13 {
14   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
15   Mat            mat  = pc->mat,pmat = pc->pmat;
16   PetscBool      hasop;
17   PetscInt       N,M,start,i,sum,end;
18   PetscInt       bs,i_start=-1,i_end=-1;
19   PetscMPIInt    rank,size;
20 
21   PetscFunctionBegin;
22   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
23   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size));
24   PetscCall(MatGetLocalSize(pc->pmat,&M,&N));
25   PetscCall(MatGetBlockSize(pc->pmat,&bs));
26 
27   if (jac->n > 0 && jac->n < size) {
28     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
29     PetscFunctionReturn(0);
30   }
31 
32   /* --------------------------------------------------------------------------
33       Determines the number of blocks assigned to each processor
34   -----------------------------------------------------------------------------*/
35 
36   /*   local block count  given */
37   if (jac->n_local > 0 && jac->n < 0) {
38     PetscCall(MPIU_Allreduce(&jac->n_local,&jac->n,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc)));
39     if (jac->l_lens) { /* check that user set these correctly */
40       sum = 0;
41       for (i=0; i<jac->n_local; i++) {
42         PetscCheck(jac->l_lens[i]/bs*bs ==jac->l_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
43         sum += jac->l_lens[i];
44       }
45       PetscCheck(sum == M,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local lens set incorrectly");
46     } else {
47       PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
48       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
49     }
50   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
51     /* global blocks given: determine which ones are local */
52     if (jac->g_lens) {
53       /* check if the g_lens is has valid entries */
54       for (i=0; i<jac->n; i++) {
55         PetscCheck(jac->g_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Zero block not allowed");
56         PetscCheck(jac->g_lens[i]/bs*bs == jac->g_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
57       }
58       if (size == 1) {
59         jac->n_local = jac->n;
60         PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
61         PetscCall(PetscArraycpy(jac->l_lens,jac->g_lens,jac->n_local));
62         /* check that user set these correctly */
63         sum = 0;
64         for (i=0; i<jac->n_local; i++) sum += jac->l_lens[i];
65         PetscCheck(sum == M,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Global lens set incorrectly");
66       } else {
67         PetscCall(MatGetOwnershipRange(pc->pmat,&start,&end));
68         /* loop over blocks determing first one owned by me */
69         sum = 0;
70         for (i=0; i<jac->n+1; i++) {
71           if (sum == start) { i_start = i; goto start_1;}
72           if (i < jac->n) sum += jac->g_lens[i];
73         }
74         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
75 start_1:
76         for (i=i_start; i<jac->n+1; i++) {
77           if (sum == end) { i_end = i; goto end_1; }
78           if (i < jac->n) sum += jac->g_lens[i];
79         }
80         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
81 end_1:
82         jac->n_local = i_end - i_start;
83         PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
84         PetscCall(PetscArraycpy(jac->l_lens,jac->g_lens+i_start,jac->n_local));
85       }
86     } else { /* no global blocks given, determine then using default layout */
87       jac->n_local = jac->n/size + ((jac->n % size) > rank);
88       PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
89       for (i=0; i<jac->n_local; i++) {
90         jac->l_lens[i] = ((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i))*bs;
91         PetscCheck(jac->l_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Too many blocks given");
92       }
93     }
94   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
95     jac->n         = size;
96     jac->n_local   = 1;
97     PetscCall(PetscMalloc1(1,&jac->l_lens));
98     jac->l_lens[0] = M;
99   } else { /* jac->n > 0 && jac->n_local > 0 */
100     if (!jac->l_lens) {
101       PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
102       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
103     }
104   }
105   PetscCheck(jac->n_local >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of blocks is less than number of processors");
106 
107   /* -------------------------
108       Determines mat and pmat
109   ---------------------------*/
110   PetscCall(MatHasOperation(pc->mat,MATOP_GET_DIAGONAL_BLOCK,&hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat,&mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat,&pmat));
121     } else pmat = mat;
122   }
123 
124   /* ------
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc,mat,pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc,mat,pmat));
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscFree(pc->data));
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode PCSetFromOptions_BJacobi(PetscOptionItems *PetscOptionsObject,PC pc)
148 {
149   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
150   PetscInt       blocks,i;
151   PetscBool      flg;
152 
153   PetscFunctionBegin;
154   PetscOptionsHeadBegin(PetscOptionsObject,"Block Jacobi options");
155   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks","Total number of blocks","PCBJacobiSetTotalBlocks",jac->n,&blocks,&flg));
156   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc,blocks,NULL));
157   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks","Local number of blocks","PCBJacobiSetLocalBlocks",jac->n_local,&blocks,&flg));
158   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc,blocks,NULL));
159   if (jac->ksp) {
160     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
161      * unless we had already been called. */
162     for (i=0; i<jac->n_local; i++) {
163       PetscCall(KSPSetFromOptions(jac->ksp[i]));
164     }
165   }
166   PetscOptionsHeadEnd();
167   PetscFunctionReturn(0);
168 }
169 
170 #include <petscdraw.h>
171 static PetscErrorCode PCView_BJacobi(PC pc,PetscViewer viewer)
172 {
173   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
174   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
175   PetscMPIInt          rank;
176   PetscInt             i;
177   PetscBool            iascii,isstring,isdraw;
178   PetscViewer          sviewer;
179   PetscViewerFormat    format;
180   const char           *prefix;
181 
182   PetscFunctionBegin;
183   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
184   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring));
185   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
186   if (iascii) {
187     if (pc->useAmat) {
188       PetscCall(PetscViewerASCIIPrintf(viewer,"  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n",jac->n));
189     }
190     PetscCall(PetscViewerASCIIPrintf(viewer,"  number of blocks = %" PetscInt_FMT "\n",jac->n));
191     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
192     PetscCall(PetscViewerGetFormat(viewer,&format));
193     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
194       PetscCall(PetscViewerASCIIPrintf(viewer,"  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
195       PetscCall(PCGetOptionsPrefix(pc,&prefix));
196       PetscCall(PetscViewerASCIIPrintf(viewer,"  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n",prefix?prefix:""));
197       if (jac->ksp && !jac->psubcomm) {
198         PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
199         if (rank == 0) {
200           PetscCall(PetscViewerASCIIPushTab(viewer));
201           PetscCall(KSPView(jac->ksp[0],sviewer));
202           PetscCall(PetscViewerASCIIPopTab(viewer));
203         }
204         PetscCall(PetscViewerFlush(sviewer));
205         PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
206         PetscCall(PetscViewerFlush(viewer));
207         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
208         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
209       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
210         PetscCall(PetscViewerGetSubViewer(viewer,mpjac->psubcomm->child,&sviewer));
211         if (!mpjac->psubcomm->color) {
212           PetscCall(PetscViewerASCIIPushTab(viewer));
213           PetscCall(KSPView(*(jac->ksp),sviewer));
214           PetscCall(PetscViewerASCIIPopTab(viewer));
215         }
216         PetscCall(PetscViewerFlush(sviewer));
217         PetscCall(PetscViewerRestoreSubViewer(viewer,mpjac->psubcomm->child,&sviewer));
218         PetscCall(PetscViewerFlush(viewer));
219         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
220         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
221       } else {
222         PetscCall(PetscViewerFlush(viewer));
223       }
224     } else {
225       PetscInt n_global;
226       PetscCall(MPIU_Allreduce(&jac->n_local,&n_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc)));
227       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
228       PetscCall(PetscViewerASCIIPrintf(viewer,"  Local solver information for each block is in the following KSP and PC objects:\n"));
229       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n",
230                                                    rank,jac->n_local,jac->first_local));
231       PetscCall(PetscViewerASCIIPushTab(viewer));
232       PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
233       for (i=0; i<jac->n_local; i++) {
234         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local block number %" PetscInt_FMT "\n",rank,i));
235         PetscCall(KSPView(jac->ksp[i],sviewer));
236         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n"));
237       }
238       PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
239       PetscCall(PetscViewerASCIIPopTab(viewer));
240       PetscCall(PetscViewerFlush(viewer));
241       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
242     }
243   } else if (isstring) {
244     PetscCall(PetscViewerStringSPrintf(viewer," blks=%" PetscInt_FMT,jac->n));
245     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
246     if (jac->ksp) PetscCall(KSPView(jac->ksp[0],sviewer));
247     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
248   } else if (isdraw) {
249     PetscDraw draw;
250     char      str[25];
251     PetscReal x,y,bottom,h;
252 
253     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
254     PetscCall(PetscDrawGetCurrentPoint(draw,&x,&y));
255     PetscCall(PetscSNPrintf(str,25,"Number blocks %" PetscInt_FMT,jac->n));
256     PetscCall(PetscDrawStringBoxed(draw,x,y,PETSC_DRAW_RED,PETSC_DRAW_BLACK,str,NULL,&h));
257     bottom = y - h;
258     PetscCall(PetscDrawPushCurrentPoint(draw,x,bottom));
259     /* warning the communicator on viewer is different then on ksp in parallel */
260     if (jac->ksp) PetscCall(KSPView(jac->ksp[0],viewer));
261     PetscCall(PetscDrawPopCurrentPoint(draw));
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 /* -------------------------------------------------------------------------------------*/
267 
268 static PetscErrorCode  PCBJacobiGetSubKSP_BJacobi(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
269 {
270   PC_BJacobi *jac = (PC_BJacobi*)pc->data;
271 
272   PetscFunctionBegin;
273   PetscCheck(pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call KSPSetUp() or PCSetUp() first");
274 
275   if (n_local) *n_local = jac->n_local;
276   if (first_local) *first_local = jac->first_local;
277   if (ksp) *ksp                 = jac->ksp;
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode  PCBJacobiSetTotalBlocks_BJacobi(PC pc,PetscInt blocks,PetscInt *lens)
282 {
283   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
284 
285   PetscFunctionBegin;
286   PetscCheckFalse(pc->setupcalled > 0 && jac->n!=blocks,PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
287   jac->n = blocks;
288   if (!lens) jac->g_lens = NULL;
289   else {
290     PetscCall(PetscMalloc1(blocks,&jac->g_lens));
291     PetscCall(PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt)));
292     PetscCall(PetscArraycpy(jac->g_lens,lens,blocks));
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode  PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
298 {
299   PC_BJacobi *jac = (PC_BJacobi*) pc->data;
300 
301   PetscFunctionBegin;
302   *blocks = jac->n;
303   if (lens) *lens = jac->g_lens;
304   PetscFunctionReturn(0);
305 }
306 
307 static PetscErrorCode  PCBJacobiSetLocalBlocks_BJacobi(PC pc,PetscInt blocks,const PetscInt lens[])
308 {
309   PC_BJacobi     *jac;
310 
311   PetscFunctionBegin;
312   jac = (PC_BJacobi*)pc->data;
313 
314   jac->n_local = blocks;
315   if (!lens) jac->l_lens = NULL;
316   else {
317     PetscCall(PetscMalloc1(blocks,&jac->l_lens));
318     PetscCall(PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt)));
319     PetscCall(PetscArraycpy(jac->l_lens,lens,blocks));
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 static PetscErrorCode  PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
325 {
326   PC_BJacobi *jac = (PC_BJacobi*) pc->data;
327 
328   PetscFunctionBegin;
329   *blocks = jac->n_local;
330   if (lens) *lens = jac->l_lens;
331   PetscFunctionReturn(0);
332 }
333 
334 /* -------------------------------------------------------------------------------------*/
335 
336 /*@C
337    PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on
338    this processor.
339 
340    Not Collective
341 
342    Input Parameter:
343 .  pc - the preconditioner context
344 
345    Output Parameters:
346 +  n_local - the number of blocks on this processor, or NULL
347 .  first_local - the global number of the first block on this processor, or NULL
348 -  ksp - the array of KSP contexts
349 
350    Notes:
351    After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed.
352 
353    Currently for some matrix implementations only 1 block per processor
354    is supported.
355 
356    You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP().
357 
358    Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs.
359       You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the
360       KSP array must be.
361 
362    Level: advanced
363 
364 .seealso: PCASMGetSubKSP()
365 @*/
366 PetscErrorCode  PCBJacobiGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
367 {
368   PetscFunctionBegin;
369   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
370   PetscUseMethod(pc,"PCBJacobiGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
371   PetscFunctionReturn(0);
372 }
373 
374 /*@
375    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
376    Jacobi preconditioner.
377 
378    Collective on PC
379 
380    Input Parameters:
381 +  pc - the preconditioner context
382 .  blocks - the number of blocks
383 -  lens - [optional] integer array containing the size of each block
384 
385    Options Database Key:
386 .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
387 
388    Notes:
389    Currently only a limited number of blocking configurations are supported.
390    All processors sharing the PC must call this routine with the same data.
391 
392    Level: intermediate
393 
394 .seealso: PCSetUseAmat(), PCBJacobiSetLocalBlocks()
395 @*/
396 PetscErrorCode  PCBJacobiSetTotalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
397 {
398   PetscFunctionBegin;
399   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
400   PetscCheck(blocks > 0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Must have positive blocks");
401   PetscTryMethod(pc,"PCBJacobiSetTotalBlocks_C",(PC,PetscInt,const PetscInt[]),(pc,blocks,lens));
402   PetscFunctionReturn(0);
403 }
404 
405 /*@C
406    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
407    Jacobi preconditioner.
408 
409    Not Collective
410 
411    Input Parameter:
412 .  pc - the preconditioner context
413 
414    Output parameters:
415 +  blocks - the number of blocks
416 -  lens - integer array containing the size of each block
417 
418    Level: intermediate
419 
420 .seealso: PCSetUseAmat(), PCBJacobiGetLocalBlocks()
421 @*/
422 PetscErrorCode  PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
423 {
424   PetscFunctionBegin;
425   PetscValidHeaderSpecific(pc, PC_CLASSID,1);
426   PetscValidIntPointer(blocks,2);
427   PetscUseMethod(pc,"PCBJacobiGetTotalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
428   PetscFunctionReturn(0);
429 }
430 
431 /*@
432    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
433    Jacobi preconditioner.
434 
435    Not Collective
436 
437    Input Parameters:
438 +  pc - the preconditioner context
439 .  blocks - the number of blocks
440 -  lens - [optional] integer array containing size of each block
441 
442    Options Database Key:
443 .  -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
444 
445    Note:
446    Currently only a limited number of blocking configurations are supported.
447 
448    Level: intermediate
449 
450 .seealso: PCSetUseAmat(), PCBJacobiSetTotalBlocks()
451 @*/
452 PetscErrorCode  PCBJacobiSetLocalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
453 {
454   PetscFunctionBegin;
455   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
456   PetscCheck(blocks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must have nonegative blocks");
457   PetscTryMethod(pc,"PCBJacobiSetLocalBlocks_C",(PC,PetscInt,const PetscInt []),(pc,blocks,lens));
458   PetscFunctionReturn(0);
459 }
460 
461 /*@C
462    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
463    Jacobi preconditioner.
464 
465    Not Collective
466 
467    Input Parameters:
468 +  pc - the preconditioner context
469 .  blocks - the number of blocks
470 -  lens - [optional] integer array containing size of each block
471 
472    Note:
473    Currently only a limited number of blocking configurations are supported.
474 
475    Level: intermediate
476 
477 .seealso: PCSetUseAmat(), PCBJacobiGetTotalBlocks()
478 @*/
479 PetscErrorCode  PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
480 {
481   PetscFunctionBegin;
482   PetscValidHeaderSpecific(pc, PC_CLASSID,1);
483   PetscValidIntPointer(blocks,2);
484   PetscUseMethod(pc,"PCBJacobiGetLocalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
485   PetscFunctionReturn(0);
486 }
487 
488 /* -----------------------------------------------------------------------------------*/
489 
490 /*MC
491    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
492            its own KSP object.
493 
494    Options Database Keys:
495 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
496 -  -pc_bjacobi_blocks <n> - use n total blocks
497 
498    Notes:
499      Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
500 
501      To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC
502         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
503 
504      To set the options on the solvers separate for each block call PCBJacobiGetSubKSP()
505          and set the options directly on the resulting KSP object (you can access its PC
506          KSPGetPC())
507 
508      For GPU-based vectors (CUDA, ViennaCL) it is recommended to use exactly one block per MPI process for best
509          performance.  Different block partitioning may lead to additional data transfers
510          between host and GPU that lead to degraded performance.
511 
512      The options prefix for each block is sub_, for example -sub_pc_type lu.
513 
514      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
515 
516      See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCPBJACOBI for large blocks
517 
518    Level: beginner
519 
520 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
521            PCASM, PCSetUseAmat(), PCGetUseAmat(), PCBJacobiGetSubKSP(), PCBJacobiSetTotalBlocks(),
522            PCBJacobiSetLocalBlocks(), PCSetModifySubMatrices(), PCJACOBI, PCVPBJACOBI, PCPBJACOBI
523 M*/
524 
525 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
526 {
527   PetscMPIInt    rank;
528   PC_BJacobi     *jac;
529 
530   PetscFunctionBegin;
531   PetscCall(PetscNewLog(pc,&jac));
532   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
533 
534   pc->ops->apply           = NULL;
535   pc->ops->matapply        = NULL;
536   pc->ops->applytranspose  = NULL;
537   pc->ops->setup           = PCSetUp_BJacobi;
538   pc->ops->destroy         = PCDestroy_BJacobi;
539   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
540   pc->ops->view            = PCView_BJacobi;
541   pc->ops->applyrichardson = NULL;
542 
543   pc->data               = (void*)jac;
544   jac->n                 = -1;
545   jac->n_local           = -1;
546   jac->first_local       = rank;
547   jac->ksp               = NULL;
548   jac->g_lens            = NULL;
549   jac->l_lens            = NULL;
550   jac->psubcomm          = NULL;
551 
552   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetSubKSP_C",PCBJacobiGetSubKSP_BJacobi));
553   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetTotalBlocks_C",PCBJacobiSetTotalBlocks_BJacobi));
554   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetTotalBlocks_C",PCBJacobiGetTotalBlocks_BJacobi));
555   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetLocalBlocks_C",PCBJacobiSetLocalBlocks_BJacobi));
556   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetLocalBlocks_C",PCBJacobiGetLocalBlocks_BJacobi));
557   PetscFunctionReturn(0);
558 }
559 
560 /* --------------------------------------------------------------------------------------------*/
561 /*
562         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
563 */
564 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
565 {
566   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
567   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
568 
569   PetscFunctionBegin;
570   PetscCall(KSPReset(jac->ksp[0]));
571   PetscCall(VecDestroy(&bjac->x));
572   PetscCall(VecDestroy(&bjac->y));
573   PetscFunctionReturn(0);
574 }
575 
576 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
577 {
578   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
579   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
580 
581   PetscFunctionBegin;
582   PetscCall(PCReset_BJacobi_Singleblock(pc));
583   PetscCall(KSPDestroy(&jac->ksp[0]));
584   PetscCall(PetscFree(jac->ksp));
585   PetscCall(PetscFree(jac->l_lens));
586   PetscCall(PetscFree(jac->g_lens));
587   PetscCall(PetscFree(bjac));
588   PetscCall(PetscFree(pc->data));
589   PetscFunctionReturn(0);
590 }
591 
592 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
593 {
594   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
595   KSP                subksp = jac->ksp[0];
596   KSPConvergedReason reason;
597 
598   PetscFunctionBegin;
599   PetscCall(KSPSetUp(subksp));
600   PetscCall(KSPGetConvergedReason(subksp,&reason));
601   if (reason == KSP_DIVERGED_PC_FAILED) {
602     pc->failedreason = PC_SUBPC_ERROR;
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc,Vec x,Vec y)
608 {
609   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
610   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
611 
612   PetscFunctionBegin;
613   PetscCall(VecGetLocalVectorRead(x, bjac->x));
614   PetscCall(VecGetLocalVector(y, bjac->y));
615   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
616      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
617      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
618   PetscCall(KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner));
619   PetscCall(KSPSolve(jac->ksp[0],bjac->x,bjac->y));
620   PetscCall(KSPCheckSolve(jac->ksp[0],pc,bjac->y));
621   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
622   PetscCall(VecRestoreLocalVector(y, bjac->y));
623   PetscFunctionReturn(0);
624 }
625 
626 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc,Mat X,Mat Y)
627 {
628   PC_BJacobi     *jac  = (PC_BJacobi*)pc->data;
629   Mat            sX,sY;
630 
631   PetscFunctionBegin;
632   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
633      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
634      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
635   PetscCall(KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner));
636   PetscCall(MatDenseGetLocalMatrix(X,&sX));
637   PetscCall(MatDenseGetLocalMatrix(Y,&sY));
638   PetscCall(KSPMatSolve(jac->ksp[0],sX,sY));
639   PetscFunctionReturn(0);
640 }
641 
642 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc,Vec x,Vec y)
643 {
644   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
645   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
646   PetscScalar            *y_array;
647   const PetscScalar      *x_array;
648   PC                     subpc;
649 
650   PetscFunctionBegin;
651   /*
652       The VecPlaceArray() is to avoid having to copy the
653     y vector into the bjac->x vector. The reason for
654     the bjac->x vector is that we need a sequential vector
655     for the sequential solve.
656   */
657   PetscCall(VecGetArrayRead(x,&x_array));
658   PetscCall(VecGetArray(y,&y_array));
659   PetscCall(VecPlaceArray(bjac->x,x_array));
660   PetscCall(VecPlaceArray(bjac->y,y_array));
661   /* apply the symmetric left portion of the inner PC operator */
662   /* note this by-passes the inner KSP and its options completely */
663   PetscCall(KSPGetPC(jac->ksp[0],&subpc));
664   PetscCall(PCApplySymmetricLeft(subpc,bjac->x,bjac->y));
665   PetscCall(VecResetArray(bjac->x));
666   PetscCall(VecResetArray(bjac->y));
667   PetscCall(VecRestoreArrayRead(x,&x_array));
668   PetscCall(VecRestoreArray(y,&y_array));
669   PetscFunctionReturn(0);
670 }
671 
672 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc,Vec x,Vec y)
673 {
674   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
675   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
676   PetscScalar            *y_array;
677   const PetscScalar      *x_array;
678   PC                     subpc;
679 
680   PetscFunctionBegin;
681   /*
682       The VecPlaceArray() is to avoid having to copy the
683     y vector into the bjac->x vector. The reason for
684     the bjac->x vector is that we need a sequential vector
685     for the sequential solve.
686   */
687   PetscCall(VecGetArrayRead(x,&x_array));
688   PetscCall(VecGetArray(y,&y_array));
689   PetscCall(VecPlaceArray(bjac->x,x_array));
690   PetscCall(VecPlaceArray(bjac->y,y_array));
691 
692   /* apply the symmetric right portion of the inner PC operator */
693   /* note this by-passes the inner KSP and its options completely */
694 
695   PetscCall(KSPGetPC(jac->ksp[0],&subpc));
696   PetscCall(PCApplySymmetricRight(subpc,bjac->x,bjac->y));
697 
698   PetscCall(VecRestoreArrayRead(x,&x_array));
699   PetscCall(VecRestoreArray(y,&y_array));
700   PetscFunctionReturn(0);
701 }
702 
703 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc,Vec x,Vec y)
704 {
705   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
706   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
707   PetscScalar            *y_array;
708   const PetscScalar      *x_array;
709 
710   PetscFunctionBegin;
711   /*
712       The VecPlaceArray() is to avoid having to copy the
713     y vector into the bjac->x vector. The reason for
714     the bjac->x vector is that we need a sequential vector
715     for the sequential solve.
716   */
717   PetscCall(VecGetArrayRead(x,&x_array));
718   PetscCall(VecGetArray(y,&y_array));
719   PetscCall(VecPlaceArray(bjac->x,x_array));
720   PetscCall(VecPlaceArray(bjac->y,y_array));
721   PetscCall(KSPSolveTranspose(jac->ksp[0],bjac->x,bjac->y));
722   PetscCall(KSPCheckSolve(jac->ksp[0],pc,bjac->y));
723   PetscCall(VecResetArray(bjac->x));
724   PetscCall(VecResetArray(bjac->y));
725   PetscCall(VecRestoreArrayRead(x,&x_array));
726   PetscCall(VecRestoreArray(y,&y_array));
727   PetscFunctionReturn(0);
728 }
729 
730 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc,Mat mat,Mat pmat)
731 {
732   PC_BJacobi             *jac = (PC_BJacobi*)pc->data;
733   PetscInt               m;
734   KSP                    ksp;
735   PC_BJacobi_Singleblock *bjac;
736   PetscBool              wasSetup = PETSC_TRUE;
737   VecType                vectype;
738   const char             *prefix;
739 
740   PetscFunctionBegin;
741   if (!pc->setupcalled) {
742     if (!jac->ksp) {
743       wasSetup = PETSC_FALSE;
744 
745       PetscCall(KSPCreate(PETSC_COMM_SELF,&ksp));
746       PetscCall(KSPSetErrorIfNotConverged(ksp,pc->erroriffailure));
747       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1));
748       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp));
749       PetscCall(KSPSetType(ksp,KSPPREONLY));
750       PetscCall(PCGetOptionsPrefix(pc,&prefix));
751       PetscCall(KSPSetOptionsPrefix(ksp,prefix));
752       PetscCall(KSPAppendOptionsPrefix(ksp,"sub_"));
753 
754       pc->ops->reset               = PCReset_BJacobi_Singleblock;
755       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
756       pc->ops->apply               = PCApply_BJacobi_Singleblock;
757       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
758       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
759       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
760       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
761       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
762 
763       PetscCall(PetscMalloc1(1,&jac->ksp));
764       jac->ksp[0] = ksp;
765 
766       PetscCall(PetscNewLog(pc,&bjac));
767       jac->data = (void*)bjac;
768     } else {
769       ksp  = jac->ksp[0];
770       bjac = (PC_BJacobi_Singleblock*)jac->data;
771     }
772 
773     /*
774       The reason we need to generate these vectors is to serve
775       as the right-hand side and solution vector for the solve on the
776       block. We do not need to allocate space for the vectors since
777       that is provided via VecPlaceArray() just before the call to
778       KSPSolve() on the block.
779     */
780     PetscCall(MatGetSize(pmat,&m,&m));
781     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->x));
782     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->y));
783     PetscCall(MatGetVecType(pmat,&vectype));
784     PetscCall(VecSetType(bjac->x,vectype));
785     PetscCall(VecSetType(bjac->y,vectype));
786     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->x));
787     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->y));
788   } else {
789     ksp  = jac->ksp[0];
790     bjac = (PC_BJacobi_Singleblock*)jac->data;
791   }
792   PetscCall(KSPGetOptionsPrefix(ksp,&prefix));
793   if (pc->useAmat) {
794     PetscCall(KSPSetOperators(ksp,mat,pmat));
795     PetscCall(MatSetOptionsPrefix(mat,prefix));
796   } else {
797     PetscCall(KSPSetOperators(ksp,pmat,pmat));
798   }
799   PetscCall(MatSetOptionsPrefix(pmat,prefix));
800   if (!wasSetup && pc->setfromoptionscalled) {
801     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
802     PetscCall(KSPSetFromOptions(ksp));
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 /* ---------------------------------------------------------------------------------------------*/
808 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
809 {
810   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
811   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
812   PetscInt              i;
813 
814   PetscFunctionBegin;
815   if (bjac && bjac->pmat) {
816     PetscCall(MatDestroyMatrices(jac->n_local,&bjac->pmat));
817     if (pc->useAmat) {
818       PetscCall(MatDestroyMatrices(jac->n_local,&bjac->mat));
819     }
820   }
821 
822   for (i=0; i<jac->n_local; i++) {
823     PetscCall(KSPReset(jac->ksp[i]));
824     if (bjac && bjac->x) {
825       PetscCall(VecDestroy(&bjac->x[i]));
826       PetscCall(VecDestroy(&bjac->y[i]));
827       PetscCall(ISDestroy(&bjac->is[i]));
828     }
829   }
830   PetscCall(PetscFree(jac->l_lens));
831   PetscCall(PetscFree(jac->g_lens));
832   PetscFunctionReturn(0);
833 }
834 
835 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
836 {
837   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
838   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
839   PetscInt              i;
840 
841   PetscFunctionBegin;
842   PetscCall(PCReset_BJacobi_Multiblock(pc));
843   if (bjac) {
844     PetscCall(PetscFree2(bjac->x,bjac->y));
845     PetscCall(PetscFree(bjac->starts));
846     PetscCall(PetscFree(bjac->is));
847   }
848   PetscCall(PetscFree(jac->data));
849   for (i=0; i<jac->n_local; i++) {
850     PetscCall(KSPDestroy(&jac->ksp[i]));
851   }
852   PetscCall(PetscFree(jac->ksp));
853   PetscCall(PetscFree(pc->data));
854   PetscFunctionReturn(0);
855 }
856 
857 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
858 {
859   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
860   PetscInt           i,n_local = jac->n_local;
861   KSPConvergedReason reason;
862 
863   PetscFunctionBegin;
864   for (i=0; i<n_local; i++) {
865     PetscCall(KSPSetUp(jac->ksp[i]));
866     PetscCall(KSPGetConvergedReason(jac->ksp[i],&reason));
867     if (reason == KSP_DIVERGED_PC_FAILED) {
868       pc->failedreason = PC_SUBPC_ERROR;
869     }
870   }
871   PetscFunctionReturn(0);
872 }
873 
874 /*
875       Preconditioner for block Jacobi
876 */
877 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc,Vec x,Vec y)
878 {
879   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
880   PetscInt              i,n_local = jac->n_local;
881   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
882   PetscScalar           *yin;
883   const PetscScalar     *xin;
884 
885   PetscFunctionBegin;
886   PetscCall(VecGetArrayRead(x,&xin));
887   PetscCall(VecGetArray(y,&yin));
888   for (i=0; i<n_local; i++) {
889     /*
890        To avoid copying the subvector from x into a workspace we instead
891        make the workspace vector array point to the subpart of the array of
892        the global vector.
893     */
894     PetscCall(VecPlaceArray(bjac->x[i],xin+bjac->starts[i]));
895     PetscCall(VecPlaceArray(bjac->y[i],yin+bjac->starts[i]));
896 
897     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
898     PetscCall(KSPSolve(jac->ksp[i],bjac->x[i],bjac->y[i]));
899     PetscCall(KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]));
900     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
901 
902     PetscCall(VecResetArray(bjac->x[i]));
903     PetscCall(VecResetArray(bjac->y[i]));
904   }
905   PetscCall(VecRestoreArrayRead(x,&xin));
906   PetscCall(VecRestoreArray(y,&yin));
907   PetscFunctionReturn(0);
908 }
909 
910 /*
911       Preconditioner for block Jacobi
912 */
913 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc,Vec x,Vec y)
914 {
915   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
916   PetscInt              i,n_local = jac->n_local;
917   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
918   PetscScalar           *yin;
919   const PetscScalar     *xin;
920 
921   PetscFunctionBegin;
922   PetscCall(VecGetArrayRead(x,&xin));
923   PetscCall(VecGetArray(y,&yin));
924   for (i=0; i<n_local; i++) {
925     /*
926        To avoid copying the subvector from x into a workspace we instead
927        make the workspace vector array point to the subpart of the array of
928        the global vector.
929     */
930     PetscCall(VecPlaceArray(bjac->x[i],xin+bjac->starts[i]));
931     PetscCall(VecPlaceArray(bjac->y[i],yin+bjac->starts[i]));
932 
933     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
934     PetscCall(KSPSolveTranspose(jac->ksp[i],bjac->x[i],bjac->y[i]));
935     PetscCall(KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]));
936     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
937 
938     PetscCall(VecResetArray(bjac->x[i]));
939     PetscCall(VecResetArray(bjac->y[i]));
940   }
941   PetscCall(VecRestoreArrayRead(x,&xin));
942   PetscCall(VecRestoreArray(y,&yin));
943   PetscFunctionReturn(0);
944 }
945 
946 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc,Mat mat,Mat pmat)
947 {
948   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
949   PetscInt              m,n_local,N,M,start,i;
950   const char            *prefix;
951   KSP                   ksp;
952   Vec                   x,y;
953   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
954   PC                    subpc;
955   IS                    is;
956   MatReuse              scall;
957   VecType               vectype;
958 
959   PetscFunctionBegin;
960   PetscCall(MatGetLocalSize(pc->pmat,&M,&N));
961 
962   n_local = jac->n_local;
963 
964   if (pc->useAmat) {
965     PetscBool same;
966     PetscCall(PetscObjectTypeCompare((PetscObject)mat,((PetscObject)pmat)->type_name,&same));
967     PetscCheck(same,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Matrices not of same type");
968   }
969 
970   if (!pc->setupcalled) {
971     scall = MAT_INITIAL_MATRIX;
972 
973     if (!jac->ksp) {
974       pc->ops->reset         = PCReset_BJacobi_Multiblock;
975       pc->ops->destroy       = PCDestroy_BJacobi_Multiblock;
976       pc->ops->apply         = PCApply_BJacobi_Multiblock;
977       pc->ops->matapply      = NULL;
978       pc->ops->applytranspose= PCApplyTranspose_BJacobi_Multiblock;
979       pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock;
980 
981       PetscCall(PetscNewLog(pc,&bjac));
982       PetscCall(PetscMalloc1(n_local,&jac->ksp));
983       PetscCall(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(KSP))));
984       PetscCall(PetscMalloc2(n_local,&bjac->x,n_local,&bjac->y));
985       PetscCall(PetscMalloc1(n_local,&bjac->starts));
986       PetscCall(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(PetscScalar))));
987 
988       jac->data = (void*)bjac;
989       PetscCall(PetscMalloc1(n_local,&bjac->is));
990       PetscCall(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(IS))));
991 
992       for (i=0; i<n_local; i++) {
993         PetscCall(KSPCreate(PETSC_COMM_SELF,&ksp));
994         PetscCall(KSPSetErrorIfNotConverged(ksp,pc->erroriffailure));
995         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1));
996         PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp));
997         PetscCall(KSPSetType(ksp,KSPPREONLY));
998         PetscCall(KSPGetPC(ksp,&subpc));
999         PetscCall(PCGetOptionsPrefix(pc,&prefix));
1000         PetscCall(KSPSetOptionsPrefix(ksp,prefix));
1001         PetscCall(KSPAppendOptionsPrefix(ksp,"sub_"));
1002 
1003         jac->ksp[i] = ksp;
1004       }
1005     } else {
1006       bjac = (PC_BJacobi_Multiblock*)jac->data;
1007     }
1008 
1009     start = 0;
1010     PetscCall(MatGetVecType(pmat,&vectype));
1011     for (i=0; i<n_local; i++) {
1012       m = jac->l_lens[i];
1013       /*
1014       The reason we need to generate these vectors is to serve
1015       as the right-hand side and solution vector for the solve on the
1016       block. We do not need to allocate space for the vectors since
1017       that is provided via VecPlaceArray() just before the call to
1018       KSPSolve() on the block.
1019 
1020       */
1021       PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&x));
1022       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&y));
1023       PetscCall(VecSetType(x,vectype));
1024       PetscCall(VecSetType(y,vectype));
1025       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)x));
1026       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)y));
1027 
1028       bjac->x[i]      = x;
1029       bjac->y[i]      = y;
1030       bjac->starts[i] = start;
1031 
1032       PetscCall(ISCreateStride(PETSC_COMM_SELF,m,start,1,&is));
1033       bjac->is[i] = is;
1034       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)is));
1035 
1036       start += m;
1037     }
1038   } else {
1039     bjac = (PC_BJacobi_Multiblock*)jac->data;
1040     /*
1041        Destroy the blocks from the previous iteration
1042     */
1043     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1044       PetscCall(MatDestroyMatrices(n_local,&bjac->pmat));
1045       if (pc->useAmat) {
1046         PetscCall(MatDestroyMatrices(n_local,&bjac->mat));
1047       }
1048       scall = MAT_INITIAL_MATRIX;
1049     } else scall = MAT_REUSE_MATRIX;
1050   }
1051 
1052   PetscCall(MatCreateSubMatrices(pmat,n_local,bjac->is,bjac->is,scall,&bjac->pmat));
1053   if (pc->useAmat) {
1054     PetscCall(MatCreateSubMatrices(mat,n_local,bjac->is,bjac->is,scall,&bjac->mat));
1055   }
1056   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1057      different boundary conditions for the submatrices than for the global problem) */
1058   PetscCall(PCModifySubMatrices(pc,n_local,bjac->is,bjac->is,bjac->pmat,pc->modifysubmatricesP));
1059 
1060   for (i=0; i<n_local; i++) {
1061     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->pmat[i]));
1062     PetscCall(KSPGetOptionsPrefix(jac->ksp[i],&prefix));
1063     if (pc->useAmat) {
1064       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->mat[i]));
1065       PetscCall(KSPSetOperators(jac->ksp[i],bjac->mat[i],bjac->pmat[i]));
1066       PetscCall(MatSetOptionsPrefix(bjac->mat[i],prefix));
1067     } else {
1068       PetscCall(KSPSetOperators(jac->ksp[i],bjac->pmat[i],bjac->pmat[i]));
1069     }
1070     PetscCall(MatSetOptionsPrefix(bjac->pmat[i],prefix));
1071     if (pc->setfromoptionscalled) {
1072       PetscCall(KSPSetFromOptions(jac->ksp[i]));
1073     }
1074   }
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 /* ---------------------------------------------------------------------------------------------*/
1079 /*
1080       These are for a single block with multiple processes
1081 */
1082 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1083 {
1084   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
1085   KSP                subksp = jac->ksp[0];
1086   KSPConvergedReason reason;
1087 
1088   PetscFunctionBegin;
1089   PetscCall(KSPSetUp(subksp));
1090   PetscCall(KSPGetConvergedReason(subksp,&reason));
1091   if (reason == KSP_DIVERGED_PC_FAILED) {
1092     pc->failedreason = PC_SUBPC_ERROR;
1093   }
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1098 {
1099   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1100   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1101 
1102   PetscFunctionBegin;
1103   PetscCall(VecDestroy(&mpjac->ysub));
1104   PetscCall(VecDestroy(&mpjac->xsub));
1105   PetscCall(MatDestroy(&mpjac->submats));
1106   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1111 {
1112   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1113   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1114 
1115   PetscFunctionBegin;
1116   PetscCall(PCReset_BJacobi_Multiproc(pc));
1117   PetscCall(KSPDestroy(&jac->ksp[0]));
1118   PetscCall(PetscFree(jac->ksp));
1119   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1120 
1121   PetscCall(PetscFree(mpjac));
1122   PetscCall(PetscFree(pc->data));
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc,Vec x,Vec y)
1127 {
1128   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1129   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1130   PetscScalar          *yarray;
1131   const PetscScalar    *xarray;
1132   KSPConvergedReason   reason;
1133 
1134   PetscFunctionBegin;
1135   /* place x's and y's local arrays into xsub and ysub */
1136   PetscCall(VecGetArrayRead(x,&xarray));
1137   PetscCall(VecGetArray(y,&yarray));
1138   PetscCall(VecPlaceArray(mpjac->xsub,xarray));
1139   PetscCall(VecPlaceArray(mpjac->ysub,yarray));
1140 
1141   /* apply preconditioner on each matrix block */
1142   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0));
1143   PetscCall(KSPSolve(jac->ksp[0],mpjac->xsub,mpjac->ysub));
1144   PetscCall(KSPCheckSolve(jac->ksp[0],pc,mpjac->ysub));
1145   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0));
1146   PetscCall(KSPGetConvergedReason(jac->ksp[0],&reason));
1147   if (reason == KSP_DIVERGED_PC_FAILED) {
1148     pc->failedreason = PC_SUBPC_ERROR;
1149   }
1150 
1151   PetscCall(VecResetArray(mpjac->xsub));
1152   PetscCall(VecResetArray(mpjac->ysub));
1153   PetscCall(VecRestoreArrayRead(x,&xarray));
1154   PetscCall(VecRestoreArray(y,&yarray));
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc,Mat X,Mat Y)
1159 {
1160   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1161   KSPConvergedReason   reason;
1162   Mat                  sX,sY;
1163   const PetscScalar    *x;
1164   PetscScalar          *y;
1165   PetscInt             m,N,lda,ldb;
1166 
1167   PetscFunctionBegin;
1168   /* apply preconditioner on each matrix block */
1169   PetscCall(MatGetLocalSize(X,&m,NULL));
1170   PetscCall(MatGetSize(X,NULL,&N));
1171   PetscCall(MatDenseGetLDA(X,&lda));
1172   PetscCall(MatDenseGetLDA(Y,&ldb));
1173   PetscCall(MatDenseGetArrayRead(X,&x));
1174   PetscCall(MatDenseGetArrayWrite(Y,&y));
1175   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,(PetscScalar*)x,&sX));
1176   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,y,&sY));
1177   PetscCall(MatDenseSetLDA(sX,lda));
1178   PetscCall(MatDenseSetLDA(sY,ldb));
1179   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0));
1180   PetscCall(KSPMatSolve(jac->ksp[0],sX,sY));
1181   PetscCall(KSPCheckSolve(jac->ksp[0],pc,NULL));
1182   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0));
1183   PetscCall(MatDestroy(&sY));
1184   PetscCall(MatDestroy(&sX));
1185   PetscCall(MatDenseRestoreArrayWrite(Y,&y));
1186   PetscCall(MatDenseRestoreArrayRead(X,&x));
1187   PetscCall(KSPGetConvergedReason(jac->ksp[0],&reason));
1188   if (reason == KSP_DIVERGED_PC_FAILED) {
1189     pc->failedreason = PC_SUBPC_ERROR;
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1195 {
1196   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1197   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1198   PetscInt             m,n;
1199   MPI_Comm             comm,subcomm=0;
1200   const char           *prefix;
1201   PetscBool            wasSetup = PETSC_TRUE;
1202   VecType              vectype;
1203 
1204   PetscFunctionBegin;
1205   PetscCall(PetscObjectGetComm((PetscObject)pc,&comm));
1206   PetscCheck(jac->n_local <= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only a single block in a subcommunicator is supported");
1207   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1208   if (!pc->setupcalled) {
1209     wasSetup  = PETSC_FALSE;
1210     PetscCall(PetscNewLog(pc,&mpjac));
1211     jac->data = (void*)mpjac;
1212 
1213     /* initialize datastructure mpjac */
1214     if (!jac->psubcomm) {
1215       /* Create default contiguous subcommunicatiors if user does not provide them */
1216       PetscCall(PetscSubcommCreate(comm,&jac->psubcomm));
1217       PetscCall(PetscSubcommSetNumber(jac->psubcomm,jac->n));
1218       PetscCall(PetscSubcommSetType(jac->psubcomm,PETSC_SUBCOMM_CONTIGUOUS));
1219       PetscCall(PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm)));
1220     }
1221     mpjac->psubcomm = jac->psubcomm;
1222     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1223 
1224     /* Get matrix blocks of pmat */
1225     PetscCall(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats));
1226 
1227     /* create a new PC that processors in each subcomm have copy of */
1228     PetscCall(PetscMalloc1(1,&jac->ksp));
1229     PetscCall(KSPCreate(subcomm,&jac->ksp[0]));
1230     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0],pc->erroriffailure));
1231     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0],(PetscObject)pc,1));
1232     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->ksp[0]));
1233     PetscCall(KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats));
1234     PetscCall(KSPGetPC(jac->ksp[0],&mpjac->pc));
1235 
1236     PetscCall(PCGetOptionsPrefix(pc,&prefix));
1237     PetscCall(KSPSetOptionsPrefix(jac->ksp[0],prefix));
1238     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0],"sub_"));
1239     PetscCall(KSPGetOptionsPrefix(jac->ksp[0],&prefix));
1240     PetscCall(MatSetOptionsPrefix(mpjac->submats,prefix));
1241 
1242     /* create dummy vectors xsub and ysub */
1243     PetscCall(MatGetLocalSize(mpjac->submats,&m,&n));
1244     PetscCall(VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&mpjac->xsub));
1245     PetscCall(VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&mpjac->ysub));
1246     PetscCall(MatGetVecType(mpjac->submats,&vectype));
1247     PetscCall(VecSetType(mpjac->xsub,vectype));
1248     PetscCall(VecSetType(mpjac->ysub,vectype));
1249     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->xsub));
1250     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->ysub));
1251 
1252     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1253     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1254     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1255     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1256     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1257   } else { /* pc->setupcalled */
1258     subcomm = PetscSubcommChild(mpjac->psubcomm);
1259     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1260       /* destroy old matrix blocks, then get new matrix blocks */
1261       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1262       PetscCall(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats));
1263     } else {
1264       PetscCall(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_REUSE_MATRIX,&mpjac->submats));
1265     }
1266     PetscCall(KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats));
1267   }
1268 
1269   if (!wasSetup && pc->setfromoptionscalled) {
1270     PetscCall(KSPSetFromOptions(jac->ksp[0]));
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274